Crontab xml importer

From OpenKM Documentation
Jump to: navigation, search

Import files with metadata values from XML files. This is a crontab task base in Java class packaged in a JAR.

Description:

  • Files to be imported are stored at /home/openkm/import/Output OpenKM server. (Variable SYSTEM_FOLDER+Output).
  • XML files with metadata values are stored at /home/openkm/import/logfile OpenKM server. (Variable SYSTEM_FOLDER+logfile).
  • Files are imported at /okm:root/import. (Variable OPENKM_FOLDER).

Execution description: The first action when crontab task run, is import all file into /home/openkm/import/Output and delete it after has being imported. Then parse xml files located at /home/openkm/import/logfile and inserts corresponding metadata to documents. Finally xml parsed files are deleted also

Metadata ( property groups )

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE property-groups PUBLIC "-//OpenKM//DTD Property Groups 2.1//EN"
                                 "http://www.openkm.com/dtd/property-groups-2.1.dtd">
<property-groups>
        <property-group label="Data" name="okg:data">
            <input label="Folio" name="okp:data.folio" type="text"/>
	    <input label="Rut" name="okp:data.rut" type="text"/>
            <input label="Placa patente" name="okp:data.placa" type="text"/>
	    <input label="Nombre" name="okp:data.nombres" type="text"/>
            <input label="Apellido parterno" name="okp:data.apellido_padre" type="text"/>
            <input label="Apellido materno" name="okp:data.apellido_madre" type="text"/>
            <select label="Año contable" name="okp:data.year" type="simple">
	    	<option value="2012" label="2012" />
		<option value="2013" label="2013" />
	    </select>
            <input label="Tipo documento" name="okp:data.documento" type="text"/>
        </property-group>
</property-groups>

XML structure

<?xml version="1.0" encoding="UTF-16"?>
<INDEX_LOG>
	<Batch BatchID="2013-04-16">
		<File>
			<Filename>10201200365212 - 01.pdf</Filename>
			<Fields>
				<Field>
					<Name>Folio</Name>
					<Value>10201200365245</Value>
				</Field>
				<Field>
					<Name>Rut</Name>
					<Value>9419475-JK</Value>
				</Field>
				<Field>
					<Name>Placa Patente</Name>
					<Value>XG412190</Value>
				</Field>
				<Field>
					<Name>Nombres</Name>
					<Value>JOSEP</Value>
				</Field>
				<Field>
					<Name>Apellido Paterno</Name>
					<Value>LLORT</Value>
				</Field>
				<Field>
					<Name>Apellido Materno</Name>
					<Value>TELLA</Value>
				</Field>
				<Field>
					<Name>Año</Name>
					<Value>2012</Value>
				</Field>
				<Field>
					<Name>Tipo de Documento</Name>
					<Value>Comprobante de Permiso</Value>
				</Field>
			</Fields>
		</File>
		etc....
 </Batch>
</INDEX_LOG>

Java class

package com.openkm.cron;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import com.openkm.api.OKMDocument;
import com.openkm.api.OKMPropertyGroup;
import com.openkm.api.OKMSearch;
import com.openkm.automation.AutomationException;
import com.openkm.bean.Document;
import com.openkm.bean.PropertyGroup;
import com.openkm.bean.QueryResult;
import com.openkm.core.AccessDeniedException;
import com.openkm.core.DatabaseException;
import com.openkm.core.FileSizeExceededException;
import com.openkm.core.ItemExistsException;
import com.openkm.core.LockException;
import com.openkm.core.NoSuchGroupException;
import com.openkm.core.NoSuchPropertyException;
import com.openkm.core.ParseException;
import com.openkm.core.PathNotFoundException;
import com.openkm.core.RepositoryException;
import com.openkm.core.UnsupportedMimeTypeException;
import com.openkm.core.UserQuotaExceededException;
import com.openkm.core.VirusDetectedException;
import com.openkm.dao.bean.QueryParams;
import com.openkm.extension.core.ExtensionException;
import com.openkm.module.db.stuff.DbSessionManager;

/**
 * XMLImporter
 * 
 */
public class XMLImporter {
	private static final String SYSTEM_FOLDER = "/home/openkm/import";
	private static final String OPENKM_FOLDER = "/okm:root/import";
	private static final String ATTRIBUTE_FILENAME = "Filename";
	private static final String ATTRIBUTE_FIELDS = "Fields";
	private static final String ATTRIBUTE_FIELD = "Field";
	private static final String ATTRIBUTE_NAME = "Name";
	private static final String ATTRIBUTE_VALUE = "Value";
	private static final String FIELD_NAME_FOLIO = "folio";
	private static final String FIELD_NAME_RUT = "rut";
	private static final String FIELD_NAME_PLACA_PATENTE = "placa patente";
	private static final String FIELD_NAME_NOMBRES = "nombres";
	private static final String FIELD_NAME_APELLIDO_PATERNO = "apellido paterno";
	private static final String FIELD_NAME_APELLIDO_MATERNO = "apellido materno";
	private static final String FIELD_NAME_ANO = "año";
	private static final String FIELD_NAME_TIPO_DOCUMENTO = "tipo de documento";
	
	public static void main(String[] args) {
		System.out.println(cronTask(args));
	}
	
	public static String cronTask(String[] systemToken) {
		try {
			importFiles();
			importMetadata();
		} catch (UnsupportedMimeTypeException e) {
			e.printStackTrace();
		} catch (FileSizeExceededException e) {
			e.printStackTrace();
		} catch (UserQuotaExceededException e) {
			e.printStackTrace();
		} catch (VirusDetectedException e) {
			e.printStackTrace();
		} catch (ItemExistsException e) {
			e.printStackTrace();
		} catch (PathNotFoundException e) {
			e.printStackTrace();
		} catch (AccessDeniedException e) {
			e.printStackTrace();
		} catch (RepositoryException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (DatabaseException e) {
			e.printStackTrace();
		} catch (ExtensionException e) {
			e.printStackTrace();
		} catch (AutomationException e) {
			e.printStackTrace();
		} catch (ParserConfigurationException e) {
			e.printStackTrace();
		} catch (SAXException e) {
			e.printStackTrace();
		} catch (ParseException e) {
			e.printStackTrace();
		} catch (NoSuchGroupException e) {
			e.printStackTrace();
		} catch (LockException e) {
			e.printStackTrace();
		} catch (NoSuchPropertyException e) {
			e.printStackTrace();
		} 
		return "";
	}
	
	/**
	 * importMetadata
	 */
	public static void importMetadata() throws ParserConfigurationException, SAXException, IOException, ParseException,
			RepositoryException, DatabaseException, PathNotFoundException, NoSuchGroupException, LockException, AccessDeniedException,
			ExtensionException, NoSuchPropertyException, UnsupportedMimeTypeException, FileSizeExceededException,
			UserQuotaExceededException, VirusDetectedException, ItemExistsException, AutomationException {
		String fileName = "";
		String folio = "";
		String rut = "";
		String placaPatente = "";
		String nombres = "";
		String apellidoPaterno = "";
		String apellidoMaterno = "";
		String ano = "";
		String tipoDocumento = "";
		
		File folder = new File(SYSTEM_FOLDER + "/logfile");
		File[] listOfFiles = folder.listFiles();
		
		for (int i = 0; i < listOfFiles.length; i++) {
			File xmlFile = listOfFiles[i];
			if (xmlFile.isFile() && xmlFile.getName().toLowerCase().endsWith("indexlog.xml")) {
				DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
				dbf.setNamespaceAware(true);
				dbf.setAttribute("http://xml.org/sax/features/namespaces", Boolean.TRUE);
				
				DocumentBuilder db = dbf.newDocumentBuilder();
				org.w3c.dom.Document xmlDoc = db.parse(xmlFile);
				xmlDoc.getDocumentElement().normalize();
				NodeList fileNodesList = xmlDoc.getElementsByTagName("File");
				
				for (int x = 0; x < fileNodesList.getLength(); x++) {
					fileName = "";
					folio = "";
					rut = "";
					placaPatente = "";
					nombres = "";
					apellidoPaterno = "";
					apellidoMaterno = "";
					ano = "";
					tipoDocumento = "";
					Node childFildNode = fileNodesList.item(x);
					if (childFildNode.getNodeType() == Node.ELEMENT_NODE) {
						Element fileElement = (Element) childFildNode; // attibute
																		// node
						fileName = fileElement.getElementsByTagName(ATTRIBUTE_FILENAME).item(0).getTextContent();
						Node fieldsNode = fileElement.getElementsByTagName(ATTRIBUTE_FIELDS).item(0); // Only
																										// one
																										// fields
						if (fieldsNode.getNodeType() == Node.ELEMENT_NODE) {
							Element fieldsElement = (Element) fieldsNode; // fields
																			// node
							NodeList fieldList = fieldsElement.getElementsByTagName(ATTRIBUTE_FIELD);
							for (int y = 0; y < fieldList.getLength(); y++) {
								Node fieldNode = fieldList.item(y);
								if (fieldNode.getNodeType() == Node.ELEMENT_NODE) {
									Element fieldElement = (Element) fieldNode; // attibute
																				// node
									String name = fieldElement.getElementsByTagName(ATTRIBUTE_NAME).item(0).getTextContent().trim();
									String value = fieldElement.getElementsByTagName(ATTRIBUTE_VALUE).item(0).getTextContent().trim();
									if (name.toLowerCase().equals(FIELD_NAME_FOLIO)) {
										folio = value;
									} else if (name.toLowerCase().equals(FIELD_NAME_RUT)) {
										rut = value;
									} else if (name.toLowerCase().equals(FIELD_NAME_PLACA_PATENTE)) {
										placaPatente = value;
									} else if (name.toLowerCase().equals(FIELD_NAME_NOMBRES)) {
										nombres = value;
									} else if (name.toLowerCase().equals(FIELD_NAME_APELLIDO_PATERNO)) {
										apellidoPaterno = value;
									} else if (name.toLowerCase().equals(FIELD_NAME_APELLIDO_MATERNO)) {
										apellidoMaterno = value;
									} else if (name.toLowerCase().equals(FIELD_NAME_ANO)) {
										ano = value;
									} else if (name.toLowerCase().equals(FIELD_NAME_TIPO_DOCUMENTO)) {
										tipoDocumento = value;
									}
								}
							}
						}
						
						if (fileName != null && !fileName.equals("")) {
							QueryParams queryParams = new QueryParams();
							queryParams.setDomain(QueryParams.DOCUMENT);
							queryParams.setName(fileName);
							Collection<QueryResult> results = OKMSearch.getInstance().find(null, queryParams);
							if (results.size() == 1) {
								for (QueryResult queryResult : results) {
									if (queryResult.getDocument() != null) {
										System.out.println(fileName + " -ok");
										boolean found = false;
										for (PropertyGroup group : OKMPropertyGroup.getInstance().getGroups(null,
												queryResult.getDocument().getPath())) {
											if (group.getName().equals("okg:data")) {
												found = true;
											}
										}
										if (!found) {
											OKMPropertyGroup.getInstance().addGroup(null, queryResult.getDocument().getPath(), "okg:data");
										}
										
										Map<String, String> propertiesMap = new HashMap<String, String>();
										propertiesMap.put("okp:data.folio", folio);
										propertiesMap.put("okp:data.rut", rut);
										propertiesMap.put("okp:data.placa", placaPatente);
										propertiesMap.put("okp:data.nombres", nombres);
										propertiesMap.put("okp:data.apellido_padre", apellidoPaterno);
										propertiesMap.put("okp:data.apellido_madre", apellidoMaterno);
										propertiesMap.put("okp:data.year", ano);
										propertiesMap.put("okp:data.documento", tipoDocumento);
										OKMPropertyGroup.getInstance().setPropertiesSimple(null, queryResult.getDocument().getPath(),
												"okg:data", propertiesMap);
									}
								}
							} else if (results.size() > 1) {
								System.out.println(fileName + " - error");
							} else {
								System.out.println(fileName + " - not found");
							}
						}
					}
				}
				xmlFile.delete();
			}
		}
	}
	
	/**
	 * importFiles
	 */
	public static void importFiles() throws UnsupportedMimeTypeException, FileSizeExceededException, UserQuotaExceededException,
			VirusDetectedException, ItemExistsException, PathNotFoundException, AccessDeniedException, RepositoryException, IOException,
			DatabaseException, ExtensionException, AutomationException {
		String systemToken = DbSessionManager.getInstance().getSystemToken();
		// Loading files
		File folder = new File(SYSTEM_FOLDER + "/Output");
		File[] listOfFiles = folder.listFiles();
		for (int i = 0; i < listOfFiles.length; i++) {
			File file = listOfFiles[i];
			if (file.isFile() && file.getName().toLowerCase().endsWith(".pdf")) {
				Document doc = new Document();
				doc.setPath(OPENKM_FOLDER + "/" + file.getName());
				FileInputStream fis = new FileInputStream(file);
				doc = OKMDocument.getInstance().create(systemToken, doc, fis);
				file.delete();
			}
		}
	}
}

Example

Create .jar file:

Right click the XMLImporter.java file and select export option.


Okm user guide 411.png


Select .jar file and click next button.


Okm user guide 412.png Okm user guide 413.png
Okm user guide 414.png Okm user guide 415.png


Register XMLImporter.jar in crontab tasks:


Okm user guide 416.png


Register metadata:


Okm user guide 417.png


Files in openkm server at /home/openkm/import/logfile folder


Okm user guide 418.png

Files in openkm server at /home/openkm/import/Output folder


Okm user guide 419.png


Imported documents view from OpenKM desktop:


Okm user guide 420.png