package ws.palladian.extraction.feature;

import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.util.Iterator;
import java.util.Set;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.commons.lang3.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ws.palladian.helper.StopWatch;
import ws.palladian.helper.collection.Bag;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.io.LineAction;

/* loaded from: input_file:ws/palladian/extraction/feature/MapTermCorpus.class */
public final class MapTermCorpus extends AbstractTermCorpus {
    private static final Logger LOGGER = LoggerFactory.getLogger(MapTermCorpus.class);
    private static final String SEPARATOR = "#";
    private int numDocs;
    private final Bag<String> terms;

    public MapTermCorpus() {
        this(new Bag(), 0);
    }

    public MapTermCorpus(Bag<String> bag, int i) {
        this.numDocs = i;
        this.terms = bag;
    }

    public void addTermsFromDocument(Set<String> set) {
        this.terms.addAll(set);
        this.numDocs++;
    }

    @Override // ws.palladian.extraction.feature.TermCorpus
    public int getCount(String str) {
        return this.terms.count(str);
    }

    @Override // ws.palladian.extraction.feature.TermCorpus
    public int getNumDocs() {
        return this.numDocs;
    }

    @Override // ws.palladian.extraction.feature.TermCorpus
    public int getNumTerms() {
        return this.terms.size();
    }

    @Override // ws.palladian.extraction.feature.TermCorpus
    public int getNumUniqueTerms() {
        return this.terms.unique().size();
    }

    public static MapTermCorpus load(File file) throws IOException {
        Validate.notNull(file, "filePath must not be null", new Object[0]);
        GZIPInputStream gZIPInputStream = null;
        try {
            gZIPInputStream = new GZIPInputStream(new FileInputStream(file));
            MapTermCorpus load = load(gZIPInputStream);
            FileHelper.close(new Closeable[]{gZIPInputStream});
            return load;
        } catch (Throwable th) {
            FileHelper.close(new Closeable[]{gZIPInputStream});
            throw th;
        }
    }

    public static MapTermCorpus load(InputStream inputStream) {
        Validate.notNull(inputStream, "inputStream must not be null", new Object[0]);
        final int[] iArr = new int[1];
        final Bag bag = new Bag();
        StopWatch stopWatch = new StopWatch();
        FileHelper.performActionOnEveryLine(inputStream, new LineAction() { // from class: ws.palladian.extraction.feature.MapTermCorpus.1
            public void performAction(String str, int i) {
                String[] split = str.split(MapTermCorpus.SEPARATOR);
                if (i > 1) {
                    if (split.length != 2) {
                        return;
                    }
                    bag.add(split[0], Integer.parseInt(split[1]));
                } else if (str.startsWith("numDocs#")) {
                    iArr[0] = Integer.parseInt(split[1]);
                }
            }
        });
        LOGGER.debug("Loaded {} terms in {}", Integer.valueOf(bag.unique().size()), stopWatch);
        return new MapTermCorpus(bag, iArr[0]);
    }

    public void save(File file) throws IOException {
        GZIPOutputStream gZIPOutputStream = null;
        PrintWriter printWriter = null;
        try {
            gZIPOutputStream = new GZIPOutputStream(new FileOutputStream(file));
            printWriter = new PrintWriter(gZIPOutputStream);
            printWriter.println("numDocs#" + getNumDocs());
            printWriter.println();
            for (String str : this.terms.uniqueItems()) {
                printWriter.println(str + SEPARATOR + this.terms.count(str));
            }
            FileHelper.close(new Closeable[]{printWriter, gZIPOutputStream});
        } catch (Throwable th) {
            FileHelper.close(new Closeable[]{printWriter, gZIPOutputStream});
            throw th;
        }
    }

    public void clear() {
        this.numDocs = 0;
        this.terms.clear();
    }

    public MapTermCorpus getFilteredCorpus(int i) {
        Bag bag = new Bag();
        for (String str : this.terms.uniqueItems()) {
            int count = this.terms.count(str);
            if (count >= i) {
                bag.add(str, count);
            }
        }
        return new MapTermCorpus(bag, this.numDocs);
    }

    public MapTermCorpus getReducedCorpus(int i) {
        Validate.isTrue(i > 0, "maxSize must be greater zero.", new Object[0]);
        Bag bag = new Bag();
        int i2 = 0;
        for (String str : this.terms.createSorted(CollectionHelper.Order.DESCENDING).uniqueItems()) {
            i2++;
            if (i2 > i) {
                break;
            }
            bag.add(str, this.terms.count(str));
        }
        return new MapTermCorpus(bag, this.numDocs);
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("TermCorpus");
        sb.append(" numDocs=").append(getNumDocs());
        sb.append(" numUniqueTerms=").append(this.terms.unique().size());
        sb.append(" numTerms=").append(this.terms.size());
        return sb.toString();
    }

    @Override // ws.palladian.extraction.feature.AbstractTermCorpus, java.lang.Iterable
    public Iterator<String> iterator() {
        return this.terms.uniqueItems().iterator();
    }
}
