package ws.palladian.retrieval.parser;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.nio.charset.Charset;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.retrieval.HttpResult;

/* loaded from: input_file:ws/palladian/retrieval/parser/BaseDocumentParser.class */
public abstract class BaseDocumentParser implements DocumentParser {
    private static final Logger LOGGER = LoggerFactory.getLogger(BaseDocumentParser.class);

    @Override // ws.palladian.retrieval.parser.DocumentParser
    public Document parse(InputStream inputStream) throws ParserException {
        return parse(new InputSource(inputStream));
    }

    @Override // ws.palladian.retrieval.parser.DocumentParser
    public Document parse(HttpResult httpResult) throws ParserException {
        byte[] content = httpResult.getContent();
        if (content.length == 0) {
            throw new ParserException("HttpResult has no content");
        }
        InputSource inputSource = new InputSource(new ByteArrayInputStream(content));
        String charset = httpResult.getCharset();
        boolean isSupportedCharset = isSupportedCharset(charset);
        if (isSupportedCharset) {
            inputSource.setEncoding(charset);
        }
        LOGGER.debug("Encoding of HttpResult: {}, is supported: {}", charset, Boolean.valueOf(isSupportedCharset));
        Document parse = parse(inputSource);
        parse.setDocumentURI((String) CollectionHelper.getLast(httpResult.getLocations()));
        return parse;
    }

    private static boolean isSupportedCharset(String str) {
        if (str == null) {
            return false;
        }
        try {
            Charset.isSupported(str);
            return true;
        } catch (Exception e) {
            return false;
        }
    }

    @Override // ws.palladian.retrieval.parser.DocumentParser
    public Document parse(File file) throws ParserException {
        try {
            Document parse = parse(new FileInputStream(file));
            parse.setDocumentURI(file.toURI().toString());
            return parse;
        } catch (FileNotFoundException e) {
            throw new ParserException("File " + file + " not found", e);
        }
    }
}
