package com.ngdata.hbaseindexer.parse.tika;

import com.google.common.collect.ImmutableMap;
import com.ngdata.hbaseindexer.Configurable;
import com.ngdata.hbaseindexer.parse.ByteArrayExtractor;
import com.ngdata.hbaseindexer.parse.SolrDocumentExtractor;
import com.ngdata.hbaseindexer.parse.SolrInputDocumentBuilder;
import java.io.ByteArrayInputStream;
import java.util.Iterator;
import java.util.Map;
import org.apache.hadoop.hbase.client.Result;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.handler.extraction.SolrContentHandler;
import org.apache.solr.schema.IndexSchema;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;

/* loaded from: input_file:com/ngdata/hbaseindexer/parse/tika/TikaSolrDocumentExtractor.class */
public class TikaSolrDocumentExtractor implements SolrDocumentExtractor, Configurable {
    Map<String, String> DEFAULT_CELL_PARAMS = ImmutableMap.of("lowernames", "true", "fmap.content_encoding", "ignored_field", "fmap.content_type", "ignored_field");
    private IndexSchema indexSchema;
    private ByteArrayExtractor extractor;
    private String fieldNamePrefix;
    private String mimeType;
    private AutoDetectParser parser;
    private Map<String, String> params;

    public TikaSolrDocumentExtractor(IndexSchema indexSchema, ByteArrayExtractor byteArrayExtractor, String str, String str2) {
        this.indexSchema = indexSchema;
        this.extractor = byteArrayExtractor;
        this.fieldNamePrefix = str == null ? "" : str;
        this.mimeType = str2;
        this.parser = new AutoDetectParser();
        this.parser.setDetector(new LiteralMimeDetector(this.parser.getDetector()));
    }

    @Override // com.ngdata.hbaseindexer.parse.SolrDocumentExtractor
    public void extractDocument(Result result, SolrInputDocument solrInputDocument) {
        SolrInputDocumentBuilder solrInputDocumentBuilder = new SolrInputDocumentBuilder(solrInputDocument);
        Iterator<byte[]> it = this.extractor.extract(result).iterator();
        while (it.hasNext()) {
            solrInputDocumentBuilder.add(extractInternal(it.next()), this.fieldNamePrefix);
        }
    }

    private SolrInputDocument extractInternal(byte[] bArr) {
        Metadata metadata = new Metadata();
        metadata.add(LiteralMimeDetector.MIME_TYPE, this.mimeType);
        SolrContentHandler solrContentHandler = new SolrContentHandler(metadata, new MapSolrParams((this.params == null || this.params.isEmpty()) ? this.DEFAULT_CELL_PARAMS : this.params), this.indexSchema);
        try {
            this.parser.parse(new ByteArrayInputStream(bArr), solrContentHandler, metadata, new ParseContext());
            return solrContentHandler.newDocument();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public void configure(Map<String, String> map) {
        this.params = map;
    }
}
