Difference between revisions of "CSV importer"

From OpenKM Documentation
Jump to: navigation, search
m
 
(11 intermediate revisions by the same user not shown)
Line 1: Line 1:
These is an which can be execute from administration scripting ( can be used as crontab script also ) to import metadata values to OpenKM.  
+
Import metadata from csv file. The script can be execute from administration scripting ( or used as crontab script too ) to import metadata values to OpenKM.  
  
{{Warning|Article under construction}}
+
Description:
 +
* There's a metadata file with two columns, one for document path and other for metadatada value.
 +
* Script search document by document name. In theory only there's one document with same name in the system, otherside script shows error. Here could be used document path from first csv column directly, but to get more complete example has been used search engine to find document.
 +
* For each document found, script add metadata property group with CSV value.
 +
 
 +
{{Note|The script can be easily changed to use more than two columns.}}
 +
 
 +
 
 +
'''CSV File'''
 +
CSV file has two colum, first column contains document path and second column has metadata value. You can download from here [[File:Metadata.csv.zip]].
 +
 
 +
[[File:User_guide_535.png|center]]
 +
 
 +
 
 +
'''Property Group definition'''
 +
<source lang="xml">
 +
<?xml version="1.0" encoding="UTF-8"?>
 +
<!DOCTYPE property-groups PUBLIC "-//OpenKM//DTD Property Groups 2.1//EN"
 +
"http://www.openkm.com/dtd/property-groups-2.1.dtd">
 +
<property-groups>
 +
    <property-group label="Metadata" name="okg:metadata">
 +
        <input label="Document ID" name="okp:metadata.value" type="text"/>
 +
    </property-group>
 +
</property-groups>
 +
</source>
  
== Document exist verification ==
 
That script verifies that documents exists in repository:
 
  
 
<source lang="java">
 
<source lang="java">
Line 22: Line 44:
 
import com.openkm.api.OKMSearch;
 
import com.openkm.api.OKMSearch;
 
import com.openkm.util.FileLogger;
 
import com.openkm.util.FileLogger;
 +
import com.openkm.api.OKMPropertyGroup;
 +
import com.openkm.util.PathUtils;
  
 +
String grpName = "okg:metadata";
 
String FILE_LOG_NAME = "CSVLOG";
 
String FILE_LOG_NAME = "CSVLOG";
 
String META_PATH = "/home/openkm/csv/";
 
String META_PATH = "/home/openkm/csv/";
 
String META_FILE_NAME = "metadata.csv";
 
String META_FILE_NAME = "metadata.csv";
 
int uniqueFileName = 0;
 
int uniqueFileName = 0;
int yearColumn = 2;
+
int valueColumn = 1;
  
 
// Format defintion
 
// Format defintion
Line 50: Line 75:
 
for (Iterator it = data.listIterator(); it.hasNext();) {
 
for (Iterator it = data.listIterator(); it.hasNext();) {
 
         String[] row = (String[]) it.next();
 
         String[] row = (String[]) it.next();
         String docName = row[uniqueFileName];
+
         String docPath = row[uniqueFileName];
     print(count + ">>>> " + docName);
+
     print(count + ">>>> " + docPath);
  
         if (docName != null && !docName.equals("")) {
+
         if (docPath != null && !docPath.equals("")) {
 
             QueryParams queryParams = new QueryParams();
 
             QueryParams queryParams = new QueryParams();
 
             queryParams.setDomain(QueryParams.DOCUMENT);
 
             queryParams.setDomain(QueryParams.DOCUMENT);
             queryParams.setName(docName + ".*");
+
             queryParams.setName(PathUtils.getName(docPath));
 
             Collection results = OKMSearch.getInstance().find(null, queryParams);
 
             Collection results = OKMSearch.getInstance().find(null, queryParams);
  
Line 64: Line 89:
 
                     print("found");
 
                     print("found");
 
                     countFound++;
 
                     countFound++;
 +
                    // Add Group
 +
    OKMPropertyGroup.getInstance().addGroup(null, docPath, grpName);
 +
    // Add metadata
 +
    Map map = new HashMap();
 +
    map.put("okp:metadata.value", row[valueColumn]);
 +
    OKMPropertyGroup.getInstance().setPropertiesSimple(null, docPath, grpName, map);
 
                 } else {
 
                 } else {
 
                     print("error is not document");
 
                     print("error is not document");
Line 93: Line 124:
 
print("Error name empty:" + notFound + "</br>");
 
print("Error name empty:" + notFound + "</br>");
 
</source>
 
</source>
 +
 +
 +
== Exemple ==
 +
'''Register property group:'''
 +
 +
[[File:User_guide_536.png|center]]
 +
 +
 +
'''Files at /okm:root:'''
 +
 +
[[File:User_guide_537.png|center]]
 +
 +
 +
'''Execute script:'''
 +
 +
[[File:User_guide_538.png|center|900px]]
 +
 +
 +
'''Script results:'''
 +
 +
[[File:User_guide_539.png|center]]
 +
 +
 +
'''Imported metadata:'''
 +
 +
[[File:User_guide_540.png|center]]
  
 
[[Category: Utilities]]
 
[[Category: Utilities]]

Latest revision as of 19:01, 31 December 2013

Import metadata from csv file. The script can be execute from administration scripting ( or used as crontab script too ) to import metadata values to OpenKM.

Description:

  • There's a metadata file with two columns, one for document path and other for metadatada value.
  • Script search document by document name. In theory only there's one document with same name in the system, otherside script shows error. Here could be used document path from first csv column directly, but to get more complete example has been used search engine to find document.
  • For each document found, script add metadata property group with CSV value.

Nota clasica.png The script can be easily changed to use more than two columns.


CSV File CSV file has two colum, first column contains document path and second column has metadata value. You can download from here File:Metadata.csv.zip.

User guide 535.png


Property Group definition

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE property-groups PUBLIC "-//OpenKM//DTD Property Groups 2.1//EN"
"http://www.openkm.com/dtd/property-groups-2.1.dtd">
<property-groups>
    <property-group label="Metadata" name="okg:metadata">
        <input label="Document ID" name="okp:metadata.value" type="text"/>
    </property-group>
</property-groups>


import java.io.FileReader;
import java.io.Reader;
import java.util.Iterator;
import java.util.List;
import java.util.Collection;

import com.googlecode.jcsv.CSVStrategy;
import com.googlecode.jcsv.reader.CSVReader;
import com.googlecode.jcsv.reader.internal.CSVReaderBuilder;
import com.googlecode.jcsv.reader.internal.DefaultCSVEntryParser;

import com.openkm.dao.bean.QueryParams;
import com.openkm.bean.QueryResult;
import com.openkm.api.OKMSearch;
import com.openkm.util.FileLogger;
import com.openkm.api.OKMPropertyGroup;
import com.openkm.util.PathUtils;

String grpName = "okg:metadata";
String FILE_LOG_NAME = "CSVLOG";
String META_PATH = "/home/openkm/csv/";
String META_FILE_NAME = "metadata.csv";
int uniqueFileName = 0;
int valueColumn = 1;

// Format defintion
char delimiter = ',';
char quoteCharacter = '"';
char commentIndicator = '#';
boolean skipHeader = true;
boolean ignoreEmptyLines = true;
CSVStrategy strategy = new CSVStrategy(delimiter, quoteCharacter, commentIndicator, skipHeader, ignoreEmptyLines);
// File reader
Reader reader = new FileReader(META_PATH + META_FILE_NAME);
// CSV reader		
CSVReader csvParser = new CSVReaderBuilder(reader).strategy(strategy).entryParser(new DefaultCSVEntryParser()).build();
List data = csvParser.readAll();
int count = 1;
int countFound = 0;
int countNotDocument = 0;
int moreThanOneDocumentFound = 0;
int notFound = 0;
int noName = 0;

for (Iterator it = data.listIterator(); it.hasNext();) {
        String[] row = (String[]) it.next();
        String docPath = row[uniqueFileName];
    	print(count + ">>>> " + docPath);

        if (docPath != null && !docPath.equals("")) {
            QueryParams queryParams = new QueryParams();
            queryParams.setDomain(QueryParams.DOCUMENT);
            queryParams.setName(PathUtils.getName(docPath));
            Collection results = OKMSearch.getInstance().find(null, queryParams);

            if (results.size() == 1) {
                QueryResult queryResult = (QueryResult) results.iterator().next();
                if (queryResult.getDocument() != null) {
                    print("found");
                    countFound++;
                    // Add Group
		    OKMPropertyGroup.getInstance().addGroup(null, docPath, grpName);
		    // Add metadata
		    Map map = new HashMap();
		    map.put("okp:metadata.value", row[valueColumn]);
		    OKMPropertyGroup.getInstance().setPropertiesSimple(null, docPath, grpName, map);
                } else {
                    print("error is not document");
                    countNotDocument++;
                }
            } else if (results.size() > 1) {
                print("error more than one document found can not decide");
                moreThanOneDocumentFound++;
            } else {
                print("not found");
                notFound++;
            }
        } else {
            print("error document has no name");
            noName++;
        }

        print("</br>");
        
        //FileLogger.info(FILE_LOG_NAME, "Document name ''{0}'' to ''{1}''", row[0], row[posDocRevNo]);
        count++;
}

print("Total:" + count + "</br>");
print("Found:" + countFound + "</br>");
print("Error not document:" + countNotDocument + "</br>");
print("Error more then one document found:" + moreThanOneDocumentFound + "</br>");
print("Error not found:" + notFound + "</br>");
print("Error name empty:" + notFound + "</br>");


Exemple

Register property group:

User guide 536.png


Files at /okm:root:

User guide 537.png


Execute script:

User guide 538.png


Script results:

User guide 539.png


Imported metadata:

User guide 540.png