package pl.edu.icm.coansys.kwdextraction;

import java.io.InputStream;
import java.text.BreakIterator;
import java.util.Arrays;
import java.util.List;
import pl.edu.icm.cermine.PdfRawTextExtractor;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.coansys.importers.models.DocumentProtos;

/* loaded from: input_file:pl/edu/icm/coansys/kwdextraction/RakeExtractor.class */
public class RakeExtractor {
    private List<String> keywords;

    public RakeExtractor(String str) {
        extractKeywords(str);
    }

    public RakeExtractor(InputStream inputStream) throws AnalysisException {
        extractKeywords((String) new PdfRawTextExtractor().extractText(inputStream));
    }

    public RakeExtractor(DocumentProtos.DocumentWrapper documentWrapper) {
        StringBuilder sb = new StringBuilder();
        for (DocumentProtos.Media media : documentWrapper.getMediaContainer().getMediaList()) {
            if (media.getMediaType().equals("media.type.pdf")) {
                try {
                    sb.append((String) new PdfRawTextExtractor().extractText(media.getContent().newInput()));
                } catch (AnalysisException e) {
                }
            } else if (media.getMediaType().equals("media.type.txt")) {
                sb.append(media.getContent().toStringUtf8());
            }
        }
        extractKeywords(sb.toString());
    }

    private void extractKeywords(String str) {
        BreakIterator.getWordInstance();
        this.keywords = Arrays.asList("keyword1", "keyword2");
    }

    public List<String> getKeywords() {
        return this.keywords;
    }
}
