package edu.umass.cs.mallet.grmm.learning.templates;

import edu.umass.cs.mallet.base.types.Alphabet;
import edu.umass.cs.mallet.base.types.AugmentableFeatureVector;
import edu.umass.cs.mallet.base.types.FeatureVector;
import edu.umass.cs.mallet.base.types.FeatureVectorSequence;
import edu.umass.cs.mallet.base.types.LabelsSequence;
import edu.umass.cs.mallet.grmm.learning.ACRF;
import edu.umass.cs.mallet.grmm.types.Variable;
import edu.umass.cs.mallet.grmm.util.THashMultiMap;
import gnu.trove.THashMap;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;

/* loaded from: input_file:WEB-INF/lib/mallet-0.1.3.jar:edu/umass/cs/mallet/grmm/learning/templates/SimilarTokensTemplate.class */
public class SimilarTokensTemplate extends ACRF.SequenceTemplate {
    private static final boolean debug = true;
    private int factor;
    private boolean distinguishEndpts;
    private boolean wordFeaturesOnly;
    private boolean excludeAdjacent;
    private FeatureVectorBinner binner;
    private transient THashMap instanceCache;
    private static final long serialVersionUID = 1;
    private static final int CURRENT_SERIAL_VERSION = 1;
    static final boolean $assertionsDisabled;
    static Class class$edu$umass$cs$mallet$grmm$learning$templates$SimilarTokensTemplate;

    /* loaded from: input_file:WEB-INF/lib/mallet-0.1.3.jar:edu/umass/cs/mallet/grmm/learning/templates/SimilarTokensTemplate$CapWordsBinner.class */
    public static class CapWordsBinner extends WordFeatureBinner {
        public CapWordsBinner() {
            super(Pattern.compile("[A-Z][A-Za-z]*"));
        }
    }

    /* loaded from: input_file:WEB-INF/lib/mallet-0.1.3.jar:edu/umass/cs/mallet/grmm/learning/templates/SimilarTokensTemplate$FeatureVectorBinner.class */
    public interface FeatureVectorBinner {
        String computeBin(FeatureVector featureVector);
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/mallet-0.1.3.jar:edu/umass/cs/mallet/grmm/learning/templates/SimilarTokensTemplate$TokenInfo.class */
    public static class TokenInfo {
        String featureName;
        FeatureVector fv;
        int pos;

        public TokenInfo(String str, FeatureVector featureVector, int i) {
            this.featureName = str;
            this.fv = featureVector;
            this.pos = i;
        }
    }

    /* loaded from: input_file:WEB-INF/lib/mallet-0.1.3.jar:edu/umass/cs/mallet/grmm/learning/templates/SimilarTokensTemplate$WordFeatureBinner.class */
    public static class WordFeatureBinner implements FeatureVectorBinner, Serializable {
        private Pattern findWordPtn1;
        private Pattern findWordPtn2;
        private Pattern findWordExcludePtn;
        private Pattern wordIncludePattern;
        private static final long serialVersionUID = 1;
        private static final int CURRENT_SERIAL_VERSION = 2;

        public WordFeatureBinner() {
            this.findWordPtn1 = Pattern.compile("WORD=(.*)");
            this.findWordPtn2 = Pattern.compile("W=(.*)");
            this.findWordExcludePtn = Pattern.compile(".*(?:@-?\\d+|_&_).*");
            this.wordIncludePattern = null;
        }

        public WordFeatureBinner(Pattern pattern) {
            this.findWordPtn1 = Pattern.compile("WORD=(.*)");
            this.findWordPtn2 = Pattern.compile("W=(.*)");
            this.findWordExcludePtn = Pattern.compile(".*(?:@-?\\d+|_&_).*");
            this.wordIncludePattern = null;
            this.wordIncludePattern = pattern;
        }

        @Override // edu.umass.cs.mallet.grmm.learning.templates.SimilarTokensTemplate.FeatureVectorBinner
        public String computeBin(FeatureVector featureVector) {
            String intuitTokenText = intuitTokenText(featureVector);
            if (intuitTokenText == null) {
                return null;
            }
            if (this.wordIncludePattern == null || this.wordIncludePattern.matcher(intuitTokenText).matches()) {
                return intuitTokenText;
            }
            return null;
        }

        private String intuitTokenText(FeatureVector featureVector) {
            Alphabet alphabet = featureVector.getAlphabet();
            for (int i = 0; i < featureVector.numLocations(); i++) {
                String valueOf = String.valueOf(alphabet.lookupObject(featureVector.indexAtLocation(i)));
                Matcher matcher = this.findWordPtn1.matcher(valueOf);
                if (matcher.matches()) {
                    if (!this.findWordExcludePtn.matcher(valueOf).matches()) {
                        return matcher.group(1);
                    }
                } else if (this.findWordPtn2 != null) {
                    Matcher matcher2 = this.findWordPtn2.matcher(valueOf);
                    if (matcher2.matches() && !this.findWordExcludePtn.matcher(valueOf).matches()) {
                        return matcher2.group(1);
                    }
                } else {
                    continue;
                }
            }
            return null;
        }

        private void writeObject(ObjectOutputStream objectOutputStream) throws IOException {
            objectOutputStream.defaultWriteObject();
            objectOutputStream.writeInt(2);
        }

        private void readObject(ObjectInputStream objectInputStream) throws IOException, ClassNotFoundException {
            objectInputStream.defaultReadObject();
            if (objectInputStream.readInt() == 1) {
                throw new RuntimeException();
            }
        }
    }

    public SimilarTokensTemplate(int i) {
        this(i, false);
    }

    public SimilarTokensTemplate(int i, boolean z) {
        this(i, z, new CapWordsBinner());
    }

    public SimilarTokensTemplate(int i, boolean z, FeatureVectorBinner featureVectorBinner) {
        this.distinguishEndpts = true;
        this.wordFeaturesOnly = false;
        this.excludeAdjacent = true;
        this.instanceCache = new THashMap();
        this.factor = i;
        this.distinguishEndpts = z;
        this.binner = featureVectorBinner;
    }

    @Override // edu.umass.cs.mallet.grmm.learning.ACRF.SequenceTemplate
    public void addInstantiatedCliques(ACRF.UnrolledGraph unrolledGraph, FeatureVectorSequence featureVectorSequence, LabelsSequence labelsSequence) {
        THashMultiMap constructFvByWord = constructFvByWord(featureVectorSequence);
        int i = 0;
        for (String str : constructFvByWord.keySet()) {
            List list = (List) constructFvByWord.get(str);
            int size = list.size();
            if (size > 1) {
                System.err.print(new StringBuffer().append("Processing list of size ").append(size).append(" (").append(str).append(DefaultExpressionEngine.DEFAULT_INDEX_END).toString());
            }
            for (int i2 = 0; i2 < size; i2++) {
                for (int i3 = i2 + 1; i3 < size; i3++) {
                    if (!this.excludeAdjacent || i2 + 1 != i3) {
                        TokenInfo tokenInfo = (TokenInfo) list.get(i2);
                        TokenInfo tokenInfo2 = (TokenInfo) list.get(i3);
                        Variable varForLabel = unrolledGraph.getVarForLabel(tokenInfo.pos, this.factor);
                        Variable varForLabel2 = unrolledGraph.getVarForLabel(tokenInfo2.pos, this.factor);
                        Variable[] variableArr = {varForLabel, varForLabel2};
                        if (!$assertionsDisabled && varForLabel == null) {
                            throw new AssertionError(new StringBuffer().append("Couldn't get label factor ").append(this.factor).append(" time ").append(i2).toString());
                        }
                        if (!$assertionsDisabled && varForLabel2 == null) {
                            throw new AssertionError(new StringBuffer().append("Couldn't get label factor ").append(this.factor).append(" time ").append(i3).toString());
                        }
                        unrolledGraph.addClique(new ACRF.UnrolledVarSet(unrolledGraph, this, variableArr, combineFv(str, tokenInfo.fv, tokenInfo2.fv)));
                        i++;
                    }
                }
            }
            if (size > 1) {
                System.err.println("...done.");
            }
        }
        System.err.println(new StringBuffer().append("SimilarTokensTemplate: Total skip edges = ").append(i).toString());
    }

    private THashMultiMap constructFvByWord(FeatureVectorSequence featureVectorSequence) {
        THashMultiMap tHashMultiMap = new THashMultiMap(featureVectorSequence.size());
        int size = featureVectorSequence.size();
        for (int i = 0; i < size; i++) {
            FeatureVector featureVector = featureVectorSequence.getFeatureVector(i);
            String computeBin = this.binner.computeBin(featureVector);
            if (computeBin != null) {
                tHashMultiMap.put(computeBin, new TokenInfo(computeBin, featureVector, i));
            }
        }
        return tHashMultiMap;
    }

    private FeatureVector combineFv(String str, FeatureVector featureVector, FeatureVector featureVector2) {
        Alphabet alphabet = featureVector.getAlphabet();
        AugmentableFeatureVector augmentableFeatureVector = new AugmentableFeatureVector(alphabet, true);
        if (this.wordFeaturesOnly) {
            augmentableFeatureVector.add(alphabet.lookupIndex(str), 1.0d);
        } else if (this.distinguishEndpts) {
            augmentableFeatureVector.add(featureVector, "S:");
            augmentableFeatureVector.add(featureVector2, "E:");
        } else {
            augmentableFeatureVector.add(featureVector);
            augmentableFeatureVector.add(featureVector2);
        }
        return augmentableFeatureVector;
    }

    public void setBinner(FeatureVectorBinner featureVectorBinner) {
        this.binner = featureVectorBinner;
    }

    public boolean isExcludeAdjacent() {
        return this.excludeAdjacent;
    }

    public void setExcludeAdjacent(boolean z) {
        this.excludeAdjacent = z;
    }

    private void writeObject(ObjectOutputStream objectOutputStream) throws IOException {
        objectOutputStream.defaultWriteObject();
        objectOutputStream.writeInt(1);
    }

    private void readObject(ObjectInputStream objectInputStream) throws IOException, ClassNotFoundException {
        objectInputStream.defaultReadObject();
        objectInputStream.readInt();
        this.instanceCache = new THashMap();
    }

    static Class class$(String str) {
        try {
            return Class.forName(str);
        } catch (ClassNotFoundException e) {
            throw new NoClassDefFoundError().initCause(e);
        }
    }

    static {
        Class cls;
        if (class$edu$umass$cs$mallet$grmm$learning$templates$SimilarTokensTemplate == null) {
            cls = class$("edu.umass.cs.mallet.grmm.learning.templates.SimilarTokensTemplate");
            class$edu$umass$cs$mallet$grmm$learning$templates$SimilarTokensTemplate = cls;
        } else {
            cls = class$edu$umass$cs$mallet$grmm$learning$templates$SimilarTokensTemplate;
        }
        $assertionsDisabled = !cls.desiredAssertionStatus();
    }
}
