Difference between revisions of "Simple autotagging"

From OpenKM Documentation
Jump to: navigation, search
Line 1: Line 1:
 
The script assign keywords to documents name based in database metadata values present into document content.  
 
The script assign keywords to documents name based in database metadata values present into document content.  
'''
+
 
Database metadata:'''
+
'''Description:'''
 +
* There's a database metadata table '''doc_type''' with document type values.
 +
* There's a '''script''' which looking for key values into documents.
 +
* There's an '''automation task''' - based in scripting - executed after uploaded document which tags documents.
 +
 
 +
'''Database metadata:'''
 
<source lang="sql">
 
<source lang="sql">
 
-- DOCS TYPE
 
-- DOCS TYPE

Revision as of 13:11, 24 May 2013

The script assign keywords to documents name based in database metadata values present into document content.

Description:

  • There's a database metadata table doc_type with document type values.
  • There's a script which looking for key values into documents.
  • There's an automation task - based in scripting - executed after uploaded document which tags documents.

Database metadata:

-- DOCS TYPE
DELETE FROM OKM_DB_METADATA_TYPE WHERE DMT_TABLE='doc_type';
INSERT INTO OKM_DB_METADATA_TYPE (DMT_TABLE, DMT_REAL_CoLUMN, DMT_TYPE, DMT_VIRTUAL_CoLUMN) VALUES ('doc_type', 'col00', 'text', 'dt_id');
INSERT INTO OKM_DB_METADATA_TYPE (DMT_TABLE, DMT_REAL_CoLUMN, DMT_TYPE, DMT_VIRTUAL_CoLUMN) VALUES ('doc_type', 'col01', 'text', 'dt_description');
 
-- VALUES
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','1','Article');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','25','Audio');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','6','Broker Note');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','8','Case Study');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','5','Company Information');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','4','Conference Report');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','3','Course Material');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','7','Dissertation');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','12','Form');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','11','Image');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','26','Infographics');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','16','Interview');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','17','Presentation');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','19','Report');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','14','Video');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','0','_Other');

Code:

import com.openkm.dao.bean.NodeDocumentVersion;
import com.openkm.dao.bean.NodeDocument;
import com.openkm.extractor.TextExtractorWork;
import com.openkm.api.OKMRepository;
import com.openkm.dao.NodeDocumentVersionDAO;
import com.openkm.dao.NodeDocumentDAO;
import org.hibernate.Session;
import com.openkm.dao.HibernateUtil;
import org.hibernate.Query;
import com.openkm.dao.bean.DatabaseMetadataValue;
import com.openkm.module.db.stuff.DbSessionManager;
import com.openkm.api.OKMProperty;
import com.openkm.dao.HibernateUtil;
import com.openkm.core.DatabaseException;
import org.hibernate.HibernateException;

String systemToken = DbSessionManager.getInstance().getSystemToken();
//String uuid = "7f8b48ee-5efd-48dc-82ec-70053f8ab709";
// Getting path
String docPath = OKMRepository.getInstance().getNodePath(null, uuid);
// Getting doc version uuid
NodeDocumentVersion currentVersion = NodeDocumentVersionDAO.getInstance().findCurrentVersion(uuid);
String docVerUuuid = currentVersion.getUuid();
// Document extractor
TextExtractorWork tew = new TextExtractorWork();
tew.setDocUuid(uuid);
tew.setDocPath(docPath);
tew.setDocVerUuid(docVerUuuid);
//tew.setTenant(1);
// Execute extractor
NodeDocumentDAO.getInstance().textExtractorHelper(tew);
// Getting extracted text
NodeDocument docNode = NodeDocumentDAO.getInstance().findByPk(uuid);
// Text to lowercase
String text = docNode.getText().toLowerCase();
// Looking for metadata description values
String qs = "from DatabaseMetadataValue";
Session session = HibernateUtil.getSessionFactory().openSession();
try {
	Query q = session.createQuery(qs);
	List ret = q.list();
	for (DatabaseMetadataValue dmv : ret ) {
	  if (text.contains(dmv.getCol01().toLowerCase())) {
		OKMProperty.getInstance().addKeyword(systemToken, docPath, dmv.getCol01().toLowerCase());
	  }
	}
} catch (HibernateException e) {
	throw new DatabaseException(e.getMessage(), e);
} finally {
	HibernateUtil.close(session);
}