package org.apache.poi.hwpf.converter;

import java.io.File;
import java.io.FileWriter;
import java.io.StringWriter;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.usermodel.Bookmark;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.OfficeDrawing;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Section;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

/* loaded from: input_file:WEB-INF/lib/poi-scratchpad-3.9.jar:org/apache/poi/hwpf/converter/WordToTextConverter.class */
public class WordToTextConverter extends AbstractWordConverter {
    private static final POILogger logger = POILogFactory.getLogger(WordToTextConverter.class);
    private AtomicInteger noteCounters;
    private Element notes;
    private boolean outputSummaryInformation;
    private final TextDocumentFacade textDocumentFacade;

    public static String getText(DirectoryNode directoryNode) throws Exception {
        return getText(AbstractWordUtils.loadDoc(directoryNode));
    }

    public static String getText(File file) throws Exception {
        return getText(AbstractWordUtils.loadDoc(file));
    }

    public static String getText(HWPFDocumentCore hWPFDocumentCore) throws Exception {
        WordToTextConverter wordToTextConverter = new WordToTextConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        wordToTextConverter.processDocument(hWPFDocumentCore);
        return wordToTextConverter.getText();
    }

    public static void main(String[] strArr) {
        if (strArr.length < 2) {
            System.err.println("Usage: WordToTextConverter <inputFile.doc> <saveTo.txt>");
            return;
        }
        System.out.println("Converting " + strArr[0]);
        System.out.println("Saving output to " + strArr[1]);
        try {
            Document process = process(new File(strArr[0]));
            FileWriter fileWriter = new FileWriter(strArr[1]);
            DOMSource dOMSource = new DOMSource(process);
            StreamResult streamResult = new StreamResult(fileWriter);
            Transformer newTransformer = TransformerFactory.newInstance().newTransformer();
            newTransformer.setOutputProperty("encoding", "UTF-8");
            newTransformer.setOutputProperty("indent", "no");
            newTransformer.setOutputProperty("method", "text");
            newTransformer.transform(dOMSource, streamResult);
            fileWriter.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    static Document process(File file) throws Exception {
        HWPFDocumentCore loadDoc = AbstractWordUtils.loadDoc(file);
        WordToTextConverter wordToTextConverter = new WordToTextConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        wordToTextConverter.processDocument(loadDoc);
        return wordToTextConverter.getDocument();
    }

    public WordToTextConverter() throws ParserConfigurationException {
        this.noteCounters = new AtomicInteger(1);
        this.notes = null;
        this.outputSummaryInformation = false;
        this.textDocumentFacade = new TextDocumentFacade(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
    }

    public WordToTextConverter(Document document) {
        this.noteCounters = new AtomicInteger(1);
        this.notes = null;
        this.outputSummaryInformation = false;
        this.textDocumentFacade = new TextDocumentFacade(document);
    }

    public WordToTextConverter(TextDocumentFacade textDocumentFacade) {
        this.noteCounters = new AtomicInteger(1);
        this.notes = null;
        this.outputSummaryInformation = false;
        this.textDocumentFacade = textDocumentFacade;
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void afterProcess() {
        if (this.notes != null) {
            this.textDocumentFacade.getBody().appendChild(this.notes);
        }
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    public Document getDocument() {
        return this.textDocumentFacade.getDocument();
    }

    public String getText() throws Exception {
        StringWriter stringWriter = new StringWriter();
        DOMSource dOMSource = new DOMSource(getDocument());
        StreamResult streamResult = new StreamResult(stringWriter);
        Transformer newTransformer = TransformerFactory.newInstance().newTransformer();
        newTransformer.setOutputProperty("encoding", "UTF-8");
        newTransformer.setOutputProperty("indent", "no");
        newTransformer.setOutputProperty("method", "text");
        newTransformer.transform(dOMSource, streamResult);
        return stringWriter.toString();
    }

    public boolean isOutputSummaryInformation() {
        return this.outputSummaryInformation;
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void outputCharacters(Element element, CharacterRun characterRun, String str) {
        element.appendChild(this.textDocumentFacade.createText(str));
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void processBookmarks(HWPFDocumentCore hWPFDocumentCore, Element element, Range range, int i, List<Bookmark> list) {
        processCharacters(hWPFDocumentCore, i, range, element);
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void processDocumentInformation(SummaryInformation summaryInformation) {
        if (isOutputSummaryInformation()) {
            if (AbstractWordUtils.isNotEmpty(summaryInformation.getTitle())) {
                this.textDocumentFacade.setTitle(summaryInformation.getTitle());
            }
            if (AbstractWordUtils.isNotEmpty(summaryInformation.getAuthor())) {
                this.textDocumentFacade.addAuthor(summaryInformation.getAuthor());
            }
            if (AbstractWordUtils.isNotEmpty(summaryInformation.getComments())) {
                this.textDocumentFacade.addDescription(summaryInformation.getComments());
            }
            if (AbstractWordUtils.isNotEmpty(summaryInformation.getKeywords())) {
                this.textDocumentFacade.addKeywords(summaryInformation.getKeywords());
            }
        }
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    public void processDocumentPart(HWPFDocumentCore hWPFDocumentCore, Range range) {
        super.processDocumentPart(hWPFDocumentCore, range);
        afterProcess();
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void processDrawnObject(HWPFDocument hWPFDocument, CharacterRun characterRun, OfficeDrawing officeDrawing, String str, Element element) {
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void processEndnoteAutonumbered(HWPFDocument hWPFDocument, int i, Element element, Range range) {
        processNote(hWPFDocument, element, range);
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void processFootnoteAutonumbered(HWPFDocument hWPFDocument, int i, Element element, Range range) {
        processNote(hWPFDocument, element, range);
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void processHyperlink(HWPFDocumentCore hWPFDocumentCore, Element element, Range range, int i, String str) {
        processCharacters(hWPFDocumentCore, i, range, element);
        element.appendChild(this.textDocumentFacade.createText(" (\u200b" + str.replaceAll("\\/", "\u200b\\/\u200b") + (char) 8203 + DefaultExpressionEngine.DEFAULT_INDEX_END));
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void processImage(Element element, boolean z, Picture picture) {
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void processImage(Element element, boolean z, Picture picture, String str) {
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void processImageWithoutPicturesManager(Element element, boolean z, Picture picture) {
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void processLineBreak(Element element, CharacterRun characterRun) {
        element.appendChild(this.textDocumentFacade.createText("\n"));
    }

    protected void processNote(HWPFDocument hWPFDocument, Element element, Range range) {
        int andIncrement = this.noteCounters.getAndIncrement();
        element.appendChild(this.textDocumentFacade.createText("\u200b[" + andIncrement + "]\u200b"));
        if (this.notes == null) {
            this.notes = this.textDocumentFacade.createBlock();
        }
        Element createBlock = this.textDocumentFacade.createBlock();
        this.notes.appendChild(createBlock);
        createBlock.appendChild(this.textDocumentFacade.createText("^" + andIncrement + "\t "));
        processCharacters(hWPFDocument, Integer.MIN_VALUE, range, createBlock);
        createBlock.appendChild(this.textDocumentFacade.createText("\n"));
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected boolean processOle2(HWPFDocument hWPFDocument, Element element, Entry entry) throws Exception {
        if (!(entry instanceof DirectoryNode)) {
            return false;
        }
        DirectoryNode directoryNode = (DirectoryNode) entry;
        if (directoryNode.hasEntry("WordDocument")) {
            element.appendChild(this.textDocumentFacade.createText((char) 8203 + getText((DirectoryNode) entry) + (char) 8203));
            return true;
        }
        try {
            Object invoke = Class.forName("org.apache.poi.extractor.ExtractorFactory").getMethod("createExtractor", DirectoryNode.class).invoke(null, directoryNode);
            try {
                element.appendChild(this.textDocumentFacade.createText((char) 8203 + ((String) invoke.getClass().getMethod("getText", new Class[0]).invoke(invoke, new Object[0])) + (char) 8203));
                return true;
            } catch (Exception e) {
                logger.log(7, (Object) "Unable to extract text from OLE entry '", (Object) entry.getName(), (Object) "': ", (Object) e, (Throwable) e);
                return false;
            }
        } catch (Error e2) {
            logger.log(5, "There is an OLE object entry '", entry.getName(), "', but there is no text extractor for this object type ", "or text extractor factory is not available: ", "" + e2);
            return false;
        }
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void processPageBreak(HWPFDocumentCore hWPFDocumentCore, Element element) {
        Element createBlock = this.textDocumentFacade.createBlock();
        createBlock.appendChild(this.textDocumentFacade.createText("\n"));
        element.appendChild(createBlock);
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void processPageref(HWPFDocumentCore hWPFDocumentCore, Element element, Range range, int i, String str) {
        processCharacters(hWPFDocumentCore, i, range, element);
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void processParagraph(HWPFDocumentCore hWPFDocumentCore, Element element, int i, Paragraph paragraph, String str) {
        Element createParagraph = this.textDocumentFacade.createParagraph();
        createParagraph.appendChild(this.textDocumentFacade.createText(str));
        processCharacters(hWPFDocumentCore, i, paragraph, createParagraph);
        createParagraph.appendChild(this.textDocumentFacade.createText("\n"));
        element.appendChild(createParagraph);
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void processSection(HWPFDocumentCore hWPFDocumentCore, Section section, int i) {
        Element createBlock = this.textDocumentFacade.createBlock();
        processParagraphes(hWPFDocumentCore, createBlock, section, Integer.MIN_VALUE);
        createBlock.appendChild(this.textDocumentFacade.createText("\n"));
        this.textDocumentFacade.body.appendChild(createBlock);
    }

    @Override // org.apache.poi.hwpf.converter.AbstractWordConverter
    protected void processTable(HWPFDocumentCore hWPFDocumentCore, Element element, Table table) {
        int numRows = table.numRows();
        for (int i = 0; i < numRows; i++) {
            TableRow row = table.getRow(i);
            Element createTableRow = this.textDocumentFacade.createTableRow();
            int numCells = row.numCells();
            for (int i2 = 0; i2 < numCells; i2++) {
                TableCell cell = row.getCell(i2);
                Element createTableCell = this.textDocumentFacade.createTableCell();
                if (i2 != 0) {
                    createTableCell.appendChild(this.textDocumentFacade.createText("\t"));
                }
                processCharacters(hWPFDocumentCore, table.getTableLevel(), cell, createTableCell);
                createTableRow.appendChild(createTableCell);
            }
            createTableRow.appendChild(this.textDocumentFacade.createText("\n"));
            element.appendChild(createTableRow);
        }
    }

    public void setOutputSummaryInformation(boolean z) {
        this.outputSummaryInformation = z;
    }
}
