package pl.edu.icm.coansys.kwdextraction.stat;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import pl.edu.icm.coansys.kwdextraction.utils.IntStringPair;
import pl.edu.icm.coansys.kwdextraction.utils.IntWritablePair;

/* loaded from: input_file:pl/edu/icm/coansys/kwdextraction/stat/StatMap.class */
public class StatMap extends MapReduceBase implements Mapper<IntStringPair, IntStringPair, Text, IntWritablePair> {
    public static int MAX_LENGTH;
    public static String PATH_TO_STOPWORDS;
    protected Set<String> stopwords = getStopwords();

    private Set<String> getStopwords() {
        ArrayList arrayList = new ArrayList();
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(PATH_TO_STOPWORDS));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                arrayList.add(readLine);
            }
            bufferedReader.close();
        } catch (IOException e) {
        }
        return new HashSet(arrayList);
    }

    protected String cleanWord(String str) {
        return str.trim().toLowerCase();
    }

    protected boolean isNum(String str) {
        try {
            Double.parseDouble(str);
            return true;
        } catch (NumberFormatException e) {
            return false;
        }
    }

    private boolean toAdd(String str) {
        boolean z = true;
        for (String str2 : str.split(" ")) {
            if (this.stopwords.contains(str2) || str2.matches("\\W+") || isNum(str2)) {
                z = false;
            }
            if (!str2.matches(str2.replaceAll("[^a-zA-Z0-9-\\s]", ""))) {
                z = false;
            }
        }
        return z;
    }

    public void map(IntStringPair intStringPair, IntStringPair intStringPair2, OutputCollector<Text, IntWritablePair> outputCollector, Reporter reporter) throws IOException {
        String second = intStringPair2.getSecond();
        int intValue = intStringPair2.getFirst().intValue();
        Locale locale = new Locale("en");
        IntWritable intWritable = new IntWritable(intStringPair.getFirst().intValue());
        BreakIterator wordInstance = BreakIterator.getWordInstance(locale);
        wordInstance.setText(second);
        int first = wordInstance.first();
        ArrayList arrayList = new ArrayList();
        while (true) {
            int next = wordInstance.next();
            if (next == -1) {
                break;
            }
            String cleanWord = cleanWord(second.substring(first, next));
            first = next;
            if (!cleanWord.isEmpty() && !cleanWord.matches("\\W") && !cleanWord.matches("_")) {
                arrayList.add(cleanWord);
            }
        }
        for (int i = MAX_LENGTH; i > 0; i--) {
            String str = "";
            if (arrayList.size() >= i) {
                for (int i2 = 0; i2 < i; i2++) {
                    str = str + ((String) arrayList.get(i2)) + " ";
                }
                int i3 = 0 + 1;
                if (toAdd(str)) {
                    outputCollector.collect(new Text(str), new IntWritablePair(new IntWritable(intValue + i3), intWritable));
                }
                for (int i4 = i; i4 < arrayList.size(); i4++) {
                    i3++;
                    str = str.substring(str.indexOf(" ") + 1).concat(((String) arrayList.get(i4)).concat(" "));
                    if (toAdd(str)) {
                        outputCollector.collect(new Text(str), new IntWritablePair(new IntWritable(intValue + i3), intWritable));
                    }
                }
            }
        }
    }

    public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
        map((IntStringPair) obj, (IntStringPair) obj2, (OutputCollector<Text, IntWritablePair>) outputCollector, reporter);
    }
}
