Difference between revisions of "Simple autotagging"
From OpenKM Documentation
(4 intermediate revisions by 2 users not shown) | |||
Line 1: | Line 1: | ||
+ | {{TOCright}} __TOC__ | ||
+ | |||
The script assign keywords to documents name based in database metadata values present into document content. | The script assign keywords to documents name based in database metadata values present into document content. | ||
Line 51: | Line 53: | ||
String systemToken = DbSessionManager.getInstance().getSystemToken(); | String systemToken = DbSessionManager.getInstance().getSystemToken(); | ||
− | + | ||
− | // | + | // Get path |
String docPath = OKMRepository.getInstance().getNodePath(null, uuid); | String docPath = OKMRepository.getInstance().getNodePath(null, uuid); | ||
− | // | + | |
+ | // Get doc version uuid | ||
NodeDocumentVersion currentVersion = NodeDocumentVersionDAO.getInstance().findCurrentVersion(uuid); | NodeDocumentVersion currentVersion = NodeDocumentVersionDAO.getInstance().findCurrentVersion(uuid); | ||
String docVerUuuid = currentVersion.getUuid(); | String docVerUuuid = currentVersion.getUuid(); | ||
+ | |||
// Document extractor | // Document extractor | ||
TextExtractorWork tew = new TextExtractorWork(); | TextExtractorWork tew = new TextExtractorWork(); | ||
Line 62: | Line 66: | ||
tew.setDocPath(docPath); | tew.setDocPath(docPath); | ||
tew.setDocVerUuid(docVerUuuid); | tew.setDocVerUuid(docVerUuuid); | ||
− | + | ||
// Execute extractor | // Execute extractor | ||
NodeDocumentDAO.getInstance().textExtractorHelper(tew); | NodeDocumentDAO.getInstance().textExtractorHelper(tew); | ||
− | // | + | |
+ | // Get extracted text | ||
NodeDocument docNode = NodeDocumentDAO.getInstance().findByPk(uuid); | NodeDocument docNode = NodeDocumentDAO.getInstance().findByPk(uuid); | ||
− | |||
String text = docNode.getText().toLowerCase(); | String text = docNode.getText().toLowerCase(); | ||
+ | |||
// Looking for metadata description values | // Looking for metadata description values | ||
String qs = "from DatabaseMetadataValue"; | String qs = "from DatabaseMetadataValue"; | ||
Session session = HibernateUtil.getSessionFactory().openSession(); | Session session = HibernateUtil.getSessionFactory().openSession(); | ||
+ | |||
try { | try { | ||
− | + | Query q = session.createQuery(qs); | |
− | + | List ret = q.list(); | |
− | + | ||
− | + | for (DatabaseMetadataValue dmv : ret ) { | |
− | + | if (text.contains(dmv.getCol01().toLowerCase())) { | |
− | + | OKMProperty.getInstance().addKeyword(systemToken, docPath, dmv.getCol01().toLowerCase()); | |
− | + | } | |
+ | } | ||
} catch (HibernateException e) { | } catch (HibernateException e) { | ||
− | + | throw new DatabaseException(e.getMessage(), e); | |
} finally { | } finally { | ||
− | + | HibernateUtil.close(session); | |
} | } | ||
</source> | </source> | ||
Line 92: | Line 99: | ||
[[File:Okm_user_guide_451.png|center]] | [[File:Okm_user_guide_451.png|center]] | ||
− | '''Register | + | |
+ | '''Register automation task:''' | ||
[[File:Okm_user_guide_452.png|center]] | [[File:Okm_user_guide_452.png|center]] | ||
+ | |||
[[File:Okm_user_guide_453.png|center]] | [[File:Okm_user_guide_453.png|center]] | ||
− | [[File:Okm_user_guide_455.png|center]] | + | |
+ | [[File:Okm_user_guide_455.png|center|800px]] | ||
+ | |||
'''Autogging uploaded file:''' | '''Autogging uploaded file:''' | ||
− | [[File:Okm_user_guide_455.png|center]] | + | [[File:Okm_user_guide_455.png|center|800px]] |
+ | |||
+ | |||
+ | [[File:Okm_user_guide_456.png|center|800px]] | ||
+ | |||
− | [[ | + | [[Category: Utilities]] |
Latest revision as of 20:17, 14 September 2013
Contents |
The script assign keywords to documents name based in database metadata values present into document content.
Description:
- There's a database metadata table doc_type with document type values.
- There's a script which looking for key values into documents.
- There's an automation task - based in scripting - executed after uploaded document which tags documents.
Database metadata:
-- DOCS TYPE
DELETE FROM OKM_DB_METADATA_TYPE WHERE DMT_TABLE='doc_type';
INSERT INTO OKM_DB_METADATA_TYPE (DMT_TABLE, DMT_REAL_CoLUMN, DMT_TYPE, DMT_VIRTUAL_CoLUMN) VALUES ('doc_type', 'col00', 'text', 'dt_id');
INSERT INTO OKM_DB_METADATA_TYPE (DMT_TABLE, DMT_REAL_CoLUMN, DMT_TYPE, DMT_VIRTUAL_CoLUMN) VALUES ('doc_type', 'col01', 'text', 'dt_description');
-- VALUES
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','1','Article');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','25','Audio');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','6','Broker Note');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','8','Case Study');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','5','Company Information');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','4','Conference Report');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','3','Course Material');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','7','Dissertation');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','12','Form');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','11','Image');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','26','Infographics');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','16','Interview');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','17','Presentation');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','19','Report');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','14','Video');
INSERT INTO OKM_DB_METADATA_VALUE (DMV_TABLE, DMV_COL00, DMV_COL01) VALUES ('doc_type','0','_Other');
Code:
import com.openkm.dao.bean.NodeDocumentVersion;
import com.openkm.dao.bean.NodeDocument;
import com.openkm.extractor.TextExtractorWork;
import com.openkm.api.OKMRepository;
import com.openkm.dao.NodeDocumentVersionDAO;
import com.openkm.dao.NodeDocumentDAO;
import org.hibernate.Session;
import com.openkm.dao.HibernateUtil;
import org.hibernate.Query;
import com.openkm.dao.bean.DatabaseMetadataValue;
import com.openkm.module.db.stuff.DbSessionManager;
import com.openkm.api.OKMProperty;
import com.openkm.dao.HibernateUtil;
import com.openkm.core.DatabaseException;
import org.hibernate.HibernateException;
String systemToken = DbSessionManager.getInstance().getSystemToken();
// Get path
String docPath = OKMRepository.getInstance().getNodePath(null, uuid);
// Get doc version uuid
NodeDocumentVersion currentVersion = NodeDocumentVersionDAO.getInstance().findCurrentVersion(uuid);
String docVerUuuid = currentVersion.getUuid();
// Document extractor
TextExtractorWork tew = new TextExtractorWork();
tew.setDocUuid(uuid);
tew.setDocPath(docPath);
tew.setDocVerUuid(docVerUuuid);
// Execute extractor
NodeDocumentDAO.getInstance().textExtractorHelper(tew);
// Get extracted text
NodeDocument docNode = NodeDocumentDAO.getInstance().findByPk(uuid);
String text = docNode.getText().toLowerCase();
// Looking for metadata description values
String qs = "from DatabaseMetadataValue";
Session session = HibernateUtil.getSessionFactory().openSession();
try {
Query q = session.createQuery(qs);
List ret = q.list();
for (DatabaseMetadataValue dmv : ret ) {
if (text.contains(dmv.getCol01().toLowerCase())) {
OKMProperty.getInstance().addKeyword(systemToken, docPath, dmv.getCol01().toLowerCase());
}
}
} catch (HibernateException e) {
throw new DatabaseException(e.getMessage(), e);
} finally {
HibernateUtil.close(session);
}
Example
Register database metadata values:
Register automation task:
Autogging uploaded file: