package com.openkm.kea.metadata;

import com.openkm.bean.kea.MetadataDTO;
import com.openkm.util.WebUtils;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.Set;
import org.ontoware.rdf2go.RDF2Go;
import org.ontoware.rdf2go.model.Model;
import org.ontoware.rdf2go.model.node.Node;
import org.ontoware.rdf2go.model.node.URI;
import org.ontoware.rdf2go.model.node.impl.URIImpl;
import org.semanticdesktop.aperture.extractor.ExtractorException;
import org.semanticdesktop.aperture.extractor.ExtractorFactory;
import org.semanticdesktop.aperture.extractor.impl.DefaultExtractorRegistry;
import org.semanticdesktop.aperture.mime.identifier.magic.MagicMimeTypeIdentifier;
import org.semanticdesktop.aperture.rdf.RDFContainer;
import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl;
import org.semanticdesktop.aperture.util.IOUtil;
import org.semanticdesktop.aperture.vocabulary.NCO;
import org.semanticdesktop.aperture.vocabulary.NFO;
import org.semanticdesktop.aperture.vocabulary.NIE;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/openkm/kea/metadata/MetadataExtractor.class */
public class MetadataExtractor {
    private static Logger log = LoggerFactory.getLogger(MetadataExtractor.class);
    private MetadataDTO mdDTO;
    private File tempFile;
    private RDFContainer rdf;
    private SubjectExtractor subjectExtractor;
    private boolean se;

    public MetadataExtractor(boolean z) throws MetadataExtractionException {
        this.se = true;
        this.mdDTO = new MetadataDTO();
        this.se = z;
        if (z) {
            this.subjectExtractor = new SubjectExtractor();
        }
    }

    public MetadataExtractor(int i) throws MetadataExtractionException {
        this.se = true;
        this.mdDTO = new MetadataDTO();
        this.subjectExtractor = new SubjectExtractor(i);
    }

    public File getTempFile() {
        return this.tempFile;
    }

    public String getOriginalFileName() {
        return this.mdDTO.getFileName();
    }

    public MetadataDTO getMdDTO() {
        return this.mdDTO;
    }

    public MetadataDTO extract(File file) throws MetadataExtractionException {
        try {
            this.tempFile = file;
            loadRDF();
            extractMetadataFromRDF();
            if (this.se) {
                extractSuggestedSubjects();
            }
            this.rdf.dispose();
            return this.mdDTO;
        } catch (MetadataExtractionException e) {
            log.error("Metadata Extraction error: ");
            log.error(e.getMessage(), e);
            throw e;
        }
    }

    private void loadRDF() {
        MagicMimeTypeIdentifier magicMimeTypeIdentifier = new MagicMimeTypeIdentifier();
        DefaultExtractorRegistry defaultExtractorRegistry = new DefaultExtractorRegistry();
        try {
            BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(this.tempFile));
            byte[] readBytes = IOUtil.readBytes(bufferedInputStream, magicMimeTypeIdentifier.getMinArrayLength());
            bufferedInputStream.close();
            String identify = magicMimeTypeIdentifier.identify(readBytes, this.tempFile.getPath(), (URI) null);
            if (identify == null) {
                throw new MetadataExtractionException("Unable to extract MimeType for: " + this.mdDTO.getFileName());
            }
            this.mdDTO.setMimeType(identify);
            URIImpl uRIImpl = new URIImpl(this.tempFile.toURI().toString());
            Model createModel = RDF2Go.getModelFactory().createModel();
            createModel.open();
            this.rdf = new RDFContainerImpl(createModel, uRIImpl);
            Set extractorFactories = defaultExtractorRegistry.getExtractorFactories(identify);
            if (extractorFactories == null || extractorFactories.isEmpty()) {
                throw new MetadataExtractionException("Unable to find extractor factory for: " + identify);
            }
            ((ExtractorFactory) extractorFactories.iterator().next()).get().extract(uRIImpl, new BufferedInputStream(new FileInputStream(this.tempFile), 8192), (Charset) null, identify, this.rdf);
        } catch (MetadataExtractionException e) {
            log.error(e.getMessage(), e);
        } catch (ExtractorException e2) {
            log.error("Aperture extraction error: " + e2.getMessage(), e2);
        } catch (FileNotFoundException e3) {
            log.error("Unable to locate the workspace file for: " + this.mdDTO.getFileName(), e3);
        } catch (IOException e4) {
            log.error("Unable to read workspace file for: " + this.mdDTO.getFileName(), e4);
        }
    }

    private void extractMetadataFromRDF() {
        String str = WebUtils.EMPTY_STRING;
        Iterator it = this.rdf.getAll(NCO.creator).iterator();
        while (it.hasNext()) {
            str = new RDFContainerImpl(this.rdf.getModel(), ((Node) it.next()).asURI()).getString(NCO.fullname);
            if (str != null && !str.equals(WebUtils.EMPTY_STRING)) {
                break;
            }
        }
        this.mdDTO.setTitle(this.rdf.getString(NIE.title));
        this.mdDTO.setCreator(str);
        this.mdDTO.addSubject(this.rdf.getString(NIE.subject));
        this.mdDTO.setGenerator(this.rdf.getString(NIE.generator));
        this.mdDTO.setContentCreated(this.rdf.getDate(NIE.contentCreated));
        this.mdDTO.setContentLastModified(this.rdf.getDate(NIE.contentLastModified));
        this.mdDTO.setPageCount(this.rdf.getInteger(NFO.pageCount).intValue());
        this.mdDTO.setKeyword(this.rdf.getString(NIE.keyword));
    }

    private void extractSuggestedSubjects() throws MetadataExtractionException {
        Iterator<String> it = this.subjectExtractor.extractSuggestedSubjects(this.rdf.getString(NIE.plainTextContent)).iterator();
        while (it.hasNext()) {
            this.mdDTO.addSubject(it.next());
        }
    }
}
