package pl.edu.icm.cermine.bibref;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import pl.edu.icm.cermine.bibref.extraction.features.PrevEndsWithDotFeature;
import pl.edu.icm.cermine.bibref.extraction.features.RelativeLengthFeature;
import pl.edu.icm.cermine.bibref.extraction.features.RelativeStartTresholdFeature;
import pl.edu.icm.cermine.bibref.extraction.features.SpaceBetweenLinesFeature;
import pl.edu.icm.cermine.bibref.extraction.features.StartsWithNumberFeature;
import pl.edu.icm.cermine.bibref.extraction.model.BxDocumentBibReferences;
import pl.edu.icm.cermine.bibref.extraction.tools.BibRefExtractionUtils;
import pl.edu.icm.cermine.bibref.extraction.tools.BibRefLinesClusteringEvaluator;
import pl.edu.icm.cermine.exception.AnalysisException;
import pl.edu.icm.cermine.structure.model.BxDocument;
import pl.edu.icm.cermine.structure.model.BxLine;
import pl.edu.icm.cermine.tools.classification.clustering.CompleteLinkageClusterizer;
import pl.edu.icm.cermine.tools.classification.clustering.FeatureVectorClusterizer;
import pl.edu.icm.cermine.tools.classification.features.FeatureVector;
import pl.edu.icm.cermine.tools.classification.features.FeatureVectorBuilder;
import pl.edu.icm.cermine.tools.classification.metrics.FeatureVectorEuclideanMetric;

/* loaded from: input_file:WEB-INF/lib/cermine-impl-1.1.jar:pl/edu/icm/cermine/bibref/ClusteringBibReferenceExtractor.class */
public class ClusteringBibReferenceExtractor implements BibReferenceExtractor {
    public static final double DEFAULT_BEST_REF_RATIO = 0.4d;
    public static final int DEFAULT_MAX_REF_LINES = 10;
    private static final double MIN_CANDIDATE_WINDOW = 0.05d;
    private double bestRefRatio = 0.4d;
    private int maxRefLines = 10;
    private static final FeatureVectorBuilder<BxLine, BxDocumentBibReferences> VECTOR_BUILDER = new FeatureVectorBuilder<>();

    @Override // pl.edu.icm.cermine.bibref.BibReferenceExtractor
    public String[] extractBibReferences(BxDocument bxDocument) throws AnalysisException {
        String str;
        BxDocumentBibReferences extractBibRefLines = BibRefExtractionUtils.extractBibRefLines(bxDocument);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (BxLine bxLine : extractBibRefLines.getLines()) {
            arrayList2.add(VECTOR_BUILDER.getFeatureVector(bxLine, extractBibRefLines));
            arrayList.add(bxLine.toText());
        }
        if (arrayList.isEmpty()) {
            return new String[0];
        }
        double findBestDistance = findBestDistance(arrayList2);
        FeatureVectorClusterizer featureVectorClusterizer = new FeatureVectorClusterizer();
        featureVectorClusterizer.setClusterizer(new CompleteLinkageClusterizer(new BibRefLinesClusteringEvaluator()));
        int[] clusterize = featureVectorClusterizer.clusterize((FeatureVector[]) arrayList2.toArray(new FeatureVector[arrayList2.size()]), VECTOR_BUILDER, new FeatureVectorEuclideanMetric(), findBestDistance, false);
        ArrayList arrayList3 = new ArrayList();
        int i = clusterize[0];
        String str2 = "";
        for (int i2 = 0; i2 < clusterize.length; i2++) {
            if (clusterize[i2] == i) {
                if (!str2.isEmpty()) {
                    arrayList3.add(str2);
                }
                str = (String) arrayList.get(i2);
            } else {
                str = (str2 + " ") + ((String) arrayList.get(i2));
            }
            str2 = str;
        }
        if (!str2.isEmpty()) {
            arrayList3.add(str2);
        }
        return (String[]) arrayList3.toArray(new String[arrayList3.size()]);
    }

    private double findBestDistance(List<FeatureVector> list) {
        FeatureVectorEuclideanMetric featureVectorEuclideanMetric = new FeatureVectorEuclideanMetric();
        FeatureVector featureVector = list.get(0);
        ArrayList<Double> arrayList = new ArrayList();
        Iterator<FeatureVector> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(Double.valueOf(featureVectorEuclideanMetric.getDistance(featureVector, it.next())));
        }
        Collections.sort(arrayList);
        HashSet hashSet = new HashSet();
        Double d = null;
        for (Double d2 : arrayList) {
            if (d != null && d2.doubleValue() - d.doubleValue() > MIN_CANDIDATE_WINDOW) {
                hashSet.add(Double.valueOf(d.doubleValue() + ((d2.doubleValue() - d.doubleValue()) / 2.0d)));
            }
            d = d2;
        }
        double d3 = 0.0d;
        double d4 = 0.0d;
        Iterator it2 = hashSet.iterator();
        while (it2.hasNext()) {
            double doubleValue = ((Double) it2.next()).doubleValue();
            int i = 0;
            int i2 = 0;
            int i3 = 0;
            for (FeatureVector featureVector2 : list) {
                if (new FeatureVectorEuclideanMetric().getDistance(featureVector, featureVector2) < doubleValue) {
                    i++;
                    if (list.indexOf(featureVector2) - i2 > i3) {
                        i3 = list.indexOf(featureVector2) - i2;
                    }
                    i2 = list.indexOf(featureVector2);
                }
            }
            if (list.size() - i2 > i3) {
                i3 = list.size() - i2;
            }
            if (i3 <= this.maxRefLines && Math.abs(this.bestRefRatio - (i / list.size())) < Math.abs(this.bestRefRatio - d4)) {
                d3 = doubleValue;
                d4 = i / list.size();
            }
        }
        return d3;
    }

    public void setBestRefRatio(double d) {
        this.bestRefRatio = d;
    }

    public void setMaxRefLines(int i) {
        this.maxRefLines = i;
    }

    static {
        VECTOR_BUILDER.setFeatureCalculators(Arrays.asList(new PrevEndsWithDotFeature(), new RelativeLengthFeature(), new RelativeStartTresholdFeature(), new SpaceBetweenLinesFeature(), new StartsWithNumberFeature()));
    }
}
