Difference between revisions of "Simple autotagging"

From OpenKM Documentation
Jump to: navigation, search
(Created page with 'The script assign keywords to documents name based in database metadata values present into document content. <source lang="sql"> -- DOCS TYPE DELETE FROM OKM_DB_METADATA_TYPE …')
 
 
(10 intermediate revisions by 2 users not shown)
Line 1: Line 1:
 +
{{TOCright}} __TOC__
 +
 
The script assign keywords to documents name based in database metadata values present into document content.  
 
The script assign keywords to documents name based in database metadata values present into document content.  
  
 +
'''Description:'''
 +
* There's a database metadata table '''doc_type''' with document type values.
 +
* There's a '''script''' which looking for key values into documents.
 +
* There's an '''automation task''' - based in scripting - executed after uploaded document which tags documents.
 +
 +
'''Database metadata:'''
 
<source lang="sql">
 
<source lang="sql">
 
-- DOCS TYPE
 
-- DOCS TYPE
Line 26: Line 34:
 
</source>
 
</source>
  
 +
'''Code:'''
 
<source lang="java">
 
<source lang="java">
 
import com.openkm.dao.bean.NodeDocumentVersion;
 
import com.openkm.dao.bean.NodeDocumentVersion;
Line 44: Line 53:
  
 
String systemToken = DbSessionManager.getInstance().getSystemToken();
 
String systemToken = DbSessionManager.getInstance().getSystemToken();
//String uuid = "7f8b48ee-5efd-48dc-82ec-70053f8ab709";
+
 
// Getting path
+
// Get path
 
String docPath = OKMRepository.getInstance().getNodePath(null, uuid);
 
String docPath = OKMRepository.getInstance().getNodePath(null, uuid);
// Getting doc version uuid
+
 
 +
// Get doc version uuid
 
NodeDocumentVersion currentVersion = NodeDocumentVersionDAO.getInstance().findCurrentVersion(uuid);
 
NodeDocumentVersion currentVersion = NodeDocumentVersionDAO.getInstance().findCurrentVersion(uuid);
 
String docVerUuuid = currentVersion.getUuid();
 
String docVerUuuid = currentVersion.getUuid();
 +
 
// Document extractor
 
// Document extractor
 
TextExtractorWork tew = new TextExtractorWork();
 
TextExtractorWork tew = new TextExtractorWork();
Line 55: Line 66:
 
tew.setDocPath(docPath);
 
tew.setDocPath(docPath);
 
tew.setDocVerUuid(docVerUuuid);
 
tew.setDocVerUuid(docVerUuuid);
//tew.setTenant(1);
+
 
 
// Execute extractor
 
// Execute extractor
 
NodeDocumentDAO.getInstance().textExtractorHelper(tew);
 
NodeDocumentDAO.getInstance().textExtractorHelper(tew);
// Getting extracted text
+
 
 +
// Get extracted text
 
NodeDocument docNode = NodeDocumentDAO.getInstance().findByPk(uuid);
 
NodeDocument docNode = NodeDocumentDAO.getInstance().findByPk(uuid);
// Text to lowercase
 
 
String text = docNode.getText().toLowerCase();
 
String text = docNode.getText().toLowerCase();
 +
 
// Looking for metadata description values
 
// Looking for metadata description values
 
String qs = "from DatabaseMetadataValue";
 
String qs = "from DatabaseMetadataValue";
 
Session session = HibernateUtil.getSessionFactory().openSession();
 
Session session = HibernateUtil.getSessionFactory().openSession();
 +
 
try {
 
try {
Query q = session.createQuery(qs);
+
  Query q = session.createQuery(qs);
List ret = q.list();
+
  List ret = q.list();
for (DatabaseMetadataValue dmv : ret ) {
+
 
  if (text.contains(dmv.getCol01().toLowerCase())) {
+
  for (DatabaseMetadataValue dmv : ret ) {
OKMProperty.getInstance().addKeyword(systemToken, docPath, dmv.getCol01().toLowerCase());
+
    if (text.contains(dmv.getCol01().toLowerCase())) {
  }
+
      OKMProperty.getInstance().addKeyword(systemToken, docPath, dmv.getCol01().toLowerCase());
}
+
    }
 +
  }
 
} catch (HibernateException e) {
 
} catch (HibernateException e) {
throw new DatabaseException(e.getMessage(), e);
+
  throw new DatabaseException(e.getMessage(), e);
 
} finally {
 
} finally {
HibernateUtil.close(session);
+
  HibernateUtil.close(session);
 
}
 
}
 
</source>
 
</source>
 +
 +
== Example ==
 +
'''Register database metadata values:'''
 +
 +
[[File:Okm_user_guide_451.png|center]]
 +
 +
 +
'''Register automation task:'''
 +
 +
[[File:Okm_user_guide_452.png|center]]
 +
 +
 +
[[File:Okm_user_guide_453.png|center]]
 +
 +
 +
[[File:Okm_user_guide_455.png|center|800px]]
 +
 +
 +
'''Autogging uploaded file:'''
 +
 +
[[File:Okm_user_guide_455.png|center|800px]]
 +
 +
 +
[[File:Okm_user_guide_456.png|center|800px]]
 +
 +
 +
[[Category: Utilities]]

Latest revision as of 20:17, 14 September 2013

Contents

The script assign keywords to documents name based in database metadata values present into document content.

Description:

  • There's a database metadata table doc_type with document type values.
  • There's a script which looking for key values into documents.
  • There's an automation task - based in scripting - executed after uploaded document which tags documents.

Database metadata:

-- DOCS TYPE
DELETE FROM OKM_DB_METADATA_TYPE WHERE DMT_TABLE='doc_type';
INSERT INTO OKM_DB_METADATA_TYPE (DMT_TABLE, DMT_REAL_CoLUMN, DMT_TYPE, DMT_VIRTUAL_CoLUMN) VALUES ('doc_type', 'col00', 'text', 'dt_id');
INSERT INTO OKM_DB_METADATA_TYPE (DMT_TABLE, DMT_REAL_CoLUMN, DMT_TYPE, DMT_VIRTUAL_CoLUMN) VALUES ('doc_type', 'col01', 'text', 'dt_description');
 
-- VALUES
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','1','Article');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','25','Audio');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','6','Broker Note');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','8','Case Study');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','5','Company Information');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','4','Conference Report');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','3','Course Material');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','7','Dissertation');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','12','Form');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','11','Image');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','26','Infographics');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','16','Interview');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','17','Presentation');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','19','Report');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','14','Video');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','0','_Other');

Code:

import com.openkm.dao.bean.NodeDocumentVersion;
import com.openkm.dao.bean.NodeDocument;
import com.openkm.extractor.TextExtractorWork;
import com.openkm.api.OKMRepository;
import com.openkm.dao.NodeDocumentVersionDAO;
import com.openkm.dao.NodeDocumentDAO;
import org.hibernate.Session;
import com.openkm.dao.HibernateUtil;
import org.hibernate.Query;
import com.openkm.dao.bean.DatabaseMetadataValue;
import com.openkm.module.db.stuff.DbSessionManager;
import com.openkm.api.OKMProperty;
import com.openkm.dao.HibernateUtil;
import com.openkm.core.DatabaseException;
import org.hibernate.HibernateException;

String systemToken = DbSessionManager.getInstance().getSystemToken();

// Get path
String docPath = OKMRepository.getInstance().getNodePath(null, uuid);

// Get doc version uuid
NodeDocumentVersion currentVersion = NodeDocumentVersionDAO.getInstance().findCurrentVersion(uuid);
String docVerUuuid = currentVersion.getUuid();

// Document extractor
TextExtractorWork tew = new TextExtractorWork();
tew.setDocUuid(uuid);
tew.setDocPath(docPath);
tew.setDocVerUuid(docVerUuuid);

// Execute extractor
NodeDocumentDAO.getInstance().textExtractorHelper(tew);

// Get extracted text
NodeDocument docNode = NodeDocumentDAO.getInstance().findByPk(uuid);
String text = docNode.getText().toLowerCase();

// Looking for metadata description values
String qs = "from DatabaseMetadataValue";
Session session = HibernateUtil.getSessionFactory().openSession();

try {
  Query q = session.createQuery(qs);
  List ret = q.list();
  
  for (DatabaseMetadataValue dmv : ret ) {
    if (text.contains(dmv.getCol01().toLowerCase())) {
      OKMProperty.getInstance().addKeyword(systemToken, docPath, dmv.getCol01().toLowerCase());
    }
  }
} catch (HibernateException e) {
  throw new DatabaseException(e.getMessage(), e);
} finally {
  HibernateUtil.close(session);
}

Example

Register database metadata values:

Okm user guide 451.png


Register automation task:

Okm user guide 452.png


Okm user guide 453.png


Okm user guide 455.png


Autogging uploaded file:

Okm user guide 455.png


Okm user guide 456.png