package weka.attributeSelection;

import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.Vector;
import no.uib.cipr.matrix.Matrices;
import no.uib.cipr.matrix.SymmDenseEVD;
import no.uib.cipr.matrix.UpperSymmDenseMatrix;
import weka.classifiers.lazy.kstar.KStarConstants;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.TestInstances;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Center;
import weka.filters.unsupervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.Remove;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
import weka.filters.unsupervised.attribute.Standardize;
import weka.gui.knowledgeflow.KnowledgeFlowApp;

/* loaded from: input_file:weka/attributeSelection/PrincipalComponents.class */
public class PrincipalComponents extends UnsupervisedAttributeEvaluator implements AttributeTransformer, OptionHandler {
    private static final long serialVersionUID = -3675307197777734007L;
    private Instances m_trainInstances;
    private Instances m_trainHeader;
    private Instances m_transformedFormat;
    private Instances m_originalSpaceFormat;
    private boolean m_hasClass;
    private int m_classIndex;
    private int m_numAttribs;
    private int m_numInstances;
    private UpperSymmDenseMatrix m_correlation;
    private double[] m_means;
    private double[] m_stdDevs;
    private double[][] m_eigenvectors;
    private int[] m_sortedEigens;
    private ReplaceMissingValues m_replaceMissingFilter;
    private NominalToBinary m_nominalToBinFilter;
    private Remove m_attributeFilter;
    private Center m_centerFilter;
    private Standardize m_standardizeFilter;
    private double[][] m_eTranspose;
    private boolean m_center = false;
    private double[] m_eigenvalues = null;
    private double m_sumOfEigenValues = KStarConstants.FLOOR;
    private int m_outputNumAtts = -1;
    private double m_coverVariance = 0.95d;
    private boolean m_transBackToOriginal = false;
    private int m_maxAttrsInName = 5;

    public String globalInfo() {
        return "Performs a principal components analysis and transformation of the data. Use in conjunction with a Ranker search. Dimensionality reduction is accomplished by choosing enough eigenvectors to account for some percentage of the variance in the original data---default 0.95 (95%). Attribute noise can be filtered by transforming to the PC space, eliminating some of the worst eigenvectors, and then transforming back to the original space.";
    }

    @Override // weka.core.OptionHandler
    public Enumeration<Option> listOptions() {
        Vector vector = new Vector(4);
        vector.addElement(new Option("\tCenter (rather than standardize) the\n\tdata and compute PCA using the covariance (rather\n\t than the correlation) matrix.", "C", 0, "-C"));
        vector.addElement(new Option("\tRetain enough PC attributes to account \n\tfor this proportion of variance in the original data.\n\t(default = 0.95)", "R", 1, "-R"));
        vector.addElement(new Option("\tTransform through the PC space and \n\tback to the original space.", "O", 0, "-O"));
        vector.addElement(new Option("\tMaximum number of attributes to include in \n\ttransformed attribute names. (-1 = include all)", "A", 1, "-A"));
        return vector.elements();
    }

    @Override // weka.core.OptionHandler
    public void setOptions(String[] strArr) throws Exception {
        resetOptions();
        String option = Utils.getOption('R', strArr);
        if (option.length() != 0) {
            setVarianceCovered(Double.valueOf(option).doubleValue());
        }
        String option2 = Utils.getOption('A', strArr);
        if (option2.length() != 0) {
            setMaximumAttributeNames(Integer.parseInt(option2));
        }
        setTransformBackToOriginal(Utils.getFlag('O', strArr));
        setCenterData(Utils.getFlag('C', strArr));
    }

    private void resetOptions() {
        this.m_coverVariance = 0.95d;
        this.m_sumOfEigenValues = KStarConstants.FLOOR;
        this.m_transBackToOriginal = false;
    }

    public String centerDataTipText() {
        return "Center (rather than standardize) the data. PCA will be computed from the covariance (rather than correlation) matrix";
    }

    public void setCenterData(boolean z) {
        this.m_center = z;
    }

    public boolean getCenterData() {
        return this.m_center;
    }

    public String varianceCoveredTipText() {
        return "Retain enough PC attributes to account for this proportion of variance.";
    }

    public void setVarianceCovered(double d) {
        this.m_coverVariance = d;
    }

    public double getVarianceCovered() {
        return this.m_coverVariance;
    }

    public String maximumAttributeNamesTipText() {
        return "The maximum number of attributes to include in transformed attribute names.";
    }

    public void setMaximumAttributeNames(int i) {
        this.m_maxAttrsInName = i;
    }

    public int getMaximumAttributeNames() {
        return this.m_maxAttrsInName;
    }

    public String transformBackToOriginalTipText() {
        return "Transform through the PC space and back to the original space. If only the best n PCs are retained (by setting varianceCovered < 1) then this option will give a dataset in the original space but with less attribute noise.";
    }

    public void setTransformBackToOriginal(boolean z) {
        this.m_transBackToOriginal = z;
    }

    public boolean getTransformBackToOriginal() {
        return this.m_transBackToOriginal;
    }

    @Override // weka.core.OptionHandler
    public String[] getOptions() {
        Vector vector = new Vector();
        if (getCenterData()) {
            vector.add("-C");
        }
        vector.add("-R");
        vector.add(KnowledgeFlowApp.KnowledgeFlowGeneralDefaults.LAF + getVarianceCovered());
        vector.add("-A");
        vector.add(KnowledgeFlowApp.KnowledgeFlowGeneralDefaults.LAF + getMaximumAttributeNames());
        if (getTransformBackToOriginal()) {
            vector.add("-O");
        }
        return (String[]) vector.toArray(new String[0]);
    }

    @Override // weka.attributeSelection.ASEvaluation, weka.core.CapabilitiesHandler
    public Capabilities getCapabilities() {
        Capabilities capabilities = super.getCapabilities();
        capabilities.disableAll();
        capabilities.enable(Capabilities.Capability.NOMINAL_ATTRIBUTES);
        capabilities.enable(Capabilities.Capability.NUMERIC_ATTRIBUTES);
        capabilities.enable(Capabilities.Capability.DATE_ATTRIBUTES);
        capabilities.enable(Capabilities.Capability.MISSING_VALUES);
        capabilities.enable(Capabilities.Capability.NOMINAL_CLASS);
        capabilities.enable(Capabilities.Capability.UNARY_CLASS);
        capabilities.enable(Capabilities.Capability.NUMERIC_CLASS);
        capabilities.enable(Capabilities.Capability.DATE_CLASS);
        capabilities.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        capabilities.enable(Capabilities.Capability.NO_CLASS);
        return capabilities;
    }

    @Override // weka.attributeSelection.ASEvaluation
    public void buildEvaluator(Instances instances) throws Exception {
        getCapabilities().testWithFail(instances);
        buildAttributeConstructor(instances);
    }

    public void initializeAndComputeMatrix(Instances instances) throws Exception {
        this.m_eigenvalues = null;
        this.m_outputNumAtts = -1;
        this.m_attributeFilter = null;
        this.m_nominalToBinFilter = null;
        this.m_sumOfEigenValues = KStarConstants.FLOOR;
        this.m_trainInstances = new Instances(instances);
        this.m_trainHeader = new Instances(this.m_trainInstances, 0);
        this.m_replaceMissingFilter = new ReplaceMissingValues();
        this.m_replaceMissingFilter.setInputFormat(this.m_trainInstances);
        this.m_trainInstances = Filter.useFilter(this.m_trainInstances, this.m_replaceMissingFilter);
        this.m_nominalToBinFilter = new NominalToBinary();
        this.m_nominalToBinFilter.setInputFormat(this.m_trainInstances);
        this.m_trainInstances = Filter.useFilter(this.m_trainInstances, this.m_nominalToBinFilter);
        Vector vector = new Vector();
        for (int i = 0; i < this.m_trainInstances.numAttributes(); i++) {
            if (this.m_trainInstances.numDistinctValues(i) <= 1) {
                vector.addElement(new Integer(i));
            }
        }
        if (this.m_trainInstances.classIndex() >= 0) {
            this.m_hasClass = true;
            this.m_classIndex = this.m_trainInstances.classIndex();
            vector.addElement(new Integer(this.m_classIndex));
        }
        if (vector.size() > 0) {
            this.m_attributeFilter = new Remove();
            int[] iArr = new int[vector.size()];
            for (int i2 = 0; i2 < vector.size(); i2++) {
                iArr[i2] = ((Integer) vector.elementAt(i2)).intValue();
            }
            this.m_attributeFilter.setAttributeIndicesArray(iArr);
            this.m_attributeFilter.setInvertSelection(false);
            this.m_attributeFilter.setInputFormat(this.m_trainInstances);
            this.m_trainInstances = Filter.useFilter(this.m_trainInstances, this.m_attributeFilter);
        }
        getCapabilities().testWithFail(this.m_trainInstances);
        this.m_numInstances = this.m_trainInstances.numInstances();
        this.m_numAttribs = this.m_trainInstances.numAttributes();
        fillCovariance();
    }

    private void buildAttributeConstructor(Instances instances) throws Exception {
        initializeAndComputeMatrix(instances);
        SymmDenseEVD factorize = SymmDenseEVD.factorize(this.m_correlation);
        this.m_eigenvectors = Matrices.getArray(factorize.getEigenvectors());
        this.m_eigenvalues = factorize.getEigenvalues();
        for (int i = 0; i < this.m_eigenvalues.length; i++) {
            if (this.m_eigenvalues[i] < KStarConstants.FLOOR) {
                this.m_eigenvalues[i] = 0.0d;
            }
        }
        this.m_sortedEigens = Utils.sort(this.m_eigenvalues);
        this.m_sumOfEigenValues = Utils.sum(this.m_eigenvalues);
        this.m_transformedFormat = setOutputFormat();
        if (this.m_transBackToOriginal) {
            this.m_originalSpaceFormat = setOutputFormatOriginal();
            int numAttributes = this.m_transformedFormat.classIndex() < 0 ? this.m_transformedFormat.numAttributes() : this.m_transformedFormat.numAttributes() - 1;
            double[][] dArr = new double[this.m_eigenvectors.length][numAttributes + 1];
            for (int i2 = this.m_numAttribs - 1; i2 > (this.m_numAttribs - numAttributes) - 1; i2--) {
                for (int i3 = 0; i3 < this.m_numAttribs; i3++) {
                    dArr[i3][this.m_numAttribs - i2] = this.m_eigenvectors[i3][this.m_sortedEigens[i2]];
                }
            }
            int length = dArr.length;
            int length2 = dArr[0].length;
            this.m_eTranspose = new double[length2][length];
            for (int i4 = 0; i4 < length2; i4++) {
                for (int i5 = 0; i5 < length; i5++) {
                    this.m_eTranspose[i4][i5] = dArr[i5][i4];
                }
            }
        }
    }

    @Override // weka.attributeSelection.AttributeTransformer
    public Instances transformedHeader() throws Exception {
        if (this.m_eigenvalues == null) {
            throw new Exception("Principal components hasn't been built yet");
        }
        return this.m_transBackToOriginal ? this.m_originalSpaceFormat : this.m_transformedFormat;
    }

    public Instances getFilteredInputFormat() {
        return new Instances(this.m_trainInstances, 0);
    }

    public double[][] getCorrelationMatrix() {
        return Matrices.getArray(this.m_correlation);
    }

    public double[][] getUnsortedEigenVectors() {
        return this.m_eigenvectors;
    }

    public double[] getEigenValues() {
        return this.m_eigenvalues;
    }

    @Override // weka.attributeSelection.AttributeTransformer
    public Instances transformedData(Instances instances) throws Exception {
        if (this.m_eigenvalues == null) {
            throw new Exception("Principal components hasn't been built yet");
        }
        Instances instances2 = this.m_transBackToOriginal ? new Instances(this.m_originalSpaceFormat) : new Instances(this.m_transformedFormat);
        for (int i = 0; i < instances.numInstances(); i++) {
            instances2.add(convertInstance(instances.instance(i)));
        }
        return instances2;
    }

    @Override // weka.attributeSelection.AttributeEvaluator
    public double evaluateAttribute(int i) throws Exception {
        if (this.m_eigenvalues == null) {
            throw new Exception("Principal components hasn't been built yet!");
        }
        if (this.m_transBackToOriginal) {
            return 1.0d;
        }
        double d = 0.0d;
        for (int i2 = this.m_numAttribs - 1; i2 >= (this.m_numAttribs - i) - 1; i2--) {
            d += this.m_eigenvalues[this.m_sortedEigens[i2]];
        }
        return 1.0d - (d / this.m_sumOfEigenValues);
    }

    private void fillCovariance() throws Exception {
        this.m_means = new double[this.m_trainInstances.numAttributes()];
        this.m_stdDevs = new double[this.m_trainInstances.numAttributes()];
        for (int i = 0; i < this.m_trainInstances.numAttributes(); i++) {
            this.m_means[i] = this.m_trainInstances.meanOrMode(i);
            this.m_stdDevs[i] = Math.sqrt(Utils.variance(this.m_trainInstances.attributeToDoubleArray(i)));
        }
        if (this.m_center) {
            this.m_centerFilter = new Center();
            this.m_centerFilter.setInputFormat(this.m_trainInstances);
            this.m_trainInstances = Filter.useFilter(this.m_trainInstances, this.m_centerFilter);
        } else {
            this.m_standardizeFilter = new Standardize();
            this.m_standardizeFilter.setInputFormat(this.m_trainInstances);
            this.m_trainInstances = Filter.useFilter(this.m_trainInstances, this.m_standardizeFilter);
        }
        this.m_correlation = new UpperSymmDenseMatrix(this.m_numAttribs);
        for (int i2 = 0; i2 < this.m_numAttribs; i2++) {
            for (int i3 = i2; i3 < this.m_numAttribs; i3++) {
                double d = 0.0d;
                Iterator<Instance> it = this.m_trainInstances.iterator();
                while (it.hasNext()) {
                    Instance next = it.next();
                    d += next.value(i2) * next.value(i3);
                }
                this.m_correlation.set(i2, i3, d / (this.m_trainInstances.numInstances() - 1));
            }
        }
    }

    private String principalComponentsSummary() {
        StringBuffer stringBuffer = new StringBuffer();
        double d = 0.0d;
        Instances instances = null;
        int i = 0;
        try {
            instances = setOutputFormat();
            i = instances.classIndex() < 0 ? instances.numAttributes() : instances.numAttributes() - 1;
        } catch (Exception e) {
        }
        stringBuffer.append((this.m_center ? "Covariance " : "Correlation ") + "matrix\n" + matrixToString(Matrices.getArray(this.m_correlation)) + "\n\n");
        stringBuffer.append("eigenvalue\tproportion\tcumulative\n");
        for (int i2 = this.m_numAttribs - 1; i2 > (this.m_numAttribs - i) - 1; i2--) {
            d += this.m_eigenvalues[this.m_sortedEigens[i2]];
            stringBuffer.append(Utils.doubleToString(this.m_eigenvalues[this.m_sortedEigens[i2]], 9, 5) + "\t" + Utils.doubleToString(this.m_eigenvalues[this.m_sortedEigens[i2]] / this.m_sumOfEigenValues, 9, 5) + "\t" + Utils.doubleToString(d / this.m_sumOfEigenValues, 9, 5) + "\t" + instances.attribute((this.m_numAttribs - i2) - 1).name() + "\n");
        }
        stringBuffer.append("\nEigenvectors\n");
        for (int i3 = 1; i3 <= i; i3++) {
            stringBuffer.append(" V" + i3 + '\t');
        }
        stringBuffer.append("\n");
        for (int i4 = 0; i4 < this.m_numAttribs; i4++) {
            for (int i5 = this.m_numAttribs - 1; i5 > (this.m_numAttribs - i) - 1; i5--) {
                stringBuffer.append(Utils.doubleToString(this.m_eigenvectors[i4][this.m_sortedEigens[i5]], 7, 4) + "\t");
            }
            stringBuffer.append(this.m_trainInstances.attribute(i4).name() + '\n');
        }
        if (this.m_transBackToOriginal) {
            stringBuffer.append("\nPC space transformed back to original space.\n(Note: can't evaluate attributes in the original space)\n");
        }
        return stringBuffer.toString();
    }

    public String toString() {
        return this.m_eigenvalues == null ? "Principal components hasn't been built yet!" : "\tPrincipal Components Attribute Transformer\n\n" + principalComponentsSummary();
    }

    public static String matrixToString(double[][] dArr) {
        StringBuffer stringBuffer = new StringBuffer();
        int length = dArr.length - 1;
        for (int i = 0; i <= length; i++) {
            for (int i2 = 0; i2 <= length; i2++) {
                stringBuffer.append(Utils.doubleToString(dArr[i][i2], 6, 2) + TestInstances.DEFAULT_SEPARATORS);
                if (i2 == length) {
                    stringBuffer.append('\n');
                }
            }
        }
        return stringBuffer.toString();
    }

    private Instance convertInstanceToOriginal(Instance instance) throws Exception {
        double[] dArr = this.m_hasClass ? new double[this.m_numAttribs + 1] : new double[this.m_numAttribs];
        if (this.m_hasClass) {
            dArr[this.m_numAttribs] = instance.value(instance.numAttributes() - 1);
        }
        for (int i = 0; i < this.m_eTranspose[0].length; i++) {
            double d = 0.0d;
            for (int i2 = 1; i2 < this.m_eTranspose.length; i2++) {
                d += this.m_eTranspose[i2][i] * instance.value(i2 - 1);
            }
            dArr[i] = d;
            if (!this.m_center) {
                double[] dArr2 = dArr;
                int i3 = i;
                dArr2[i3] = dArr2[i3] * this.m_stdDevs[i];
            }
            double[] dArr3 = dArr;
            int i4 = i;
            dArr3[i4] = dArr3[i4] + this.m_means[i];
        }
        return instance instanceof SparseInstance ? new SparseInstance(instance.weight(), dArr) : new DenseInstance(instance.weight(), dArr);
    }

    @Override // weka.attributeSelection.AttributeTransformer
    public Instance convertInstance(Instance instance) throws Exception {
        Instance output;
        if (this.m_eigenvalues == null) {
            throw new Exception("convertInstance: Principal components not built yet");
        }
        double[] dArr = new double[this.m_outputNumAtts];
        Instance instance2 = (Instance) instance.copy();
        if (!instance.dataset().equalHeaders(this.m_trainHeader)) {
            throw new Exception("Can't convert instance: header's don't match: PrincipalComponents\n" + instance.dataset().equalHeadersMsg(this.m_trainHeader));
        }
        this.m_replaceMissingFilter.input(instance2);
        this.m_replaceMissingFilter.batchFinished();
        this.m_nominalToBinFilter.input(this.m_replaceMissingFilter.output());
        this.m_nominalToBinFilter.batchFinished();
        Instance output2 = this.m_nominalToBinFilter.output();
        if (this.m_attributeFilter != null) {
            this.m_attributeFilter.input(output2);
            this.m_attributeFilter.batchFinished();
            output2 = this.m_attributeFilter.output();
        }
        if (this.m_center) {
            this.m_centerFilter.input(output2);
            this.m_centerFilter.batchFinished();
            output = this.m_centerFilter.output();
        } else {
            this.m_standardizeFilter.input(output2);
            this.m_standardizeFilter.batchFinished();
            output = this.m_standardizeFilter.output();
        }
        if (this.m_hasClass) {
            dArr[this.m_outputNumAtts - 1] = instance.value(instance.classIndex());
        }
        double d = 0.0d;
        for (int i = this.m_numAttribs - 1; i >= 0; i--) {
            double d2 = 0.0d;
            for (int i2 = 0; i2 < this.m_numAttribs; i2++) {
                d2 += this.m_eigenvectors[i2][this.m_sortedEigens[i]] * output.value(i2);
            }
            dArr[(this.m_numAttribs - i) - 1] = d2;
            d += this.m_eigenvalues[this.m_sortedEigens[i]];
            if (d / this.m_sumOfEigenValues >= this.m_coverVariance) {
                break;
            }
        }
        return !this.m_transBackToOriginal ? instance instanceof SparseInstance ? new SparseInstance(instance.weight(), dArr) : new DenseInstance(instance.weight(), dArr) : instance instanceof SparseInstance ? convertInstanceToOriginal(new SparseInstance(instance.weight(), dArr)) : convertInstanceToOriginal(new DenseInstance(instance.weight(), dArr));
    }

    private Instances setOutputFormatOriginal() throws Exception {
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < this.m_numAttribs; i++) {
            arrayList.add(new Attribute(this.m_trainInstances.attribute(i).name()));
        }
        if (this.m_hasClass) {
            arrayList.add((Attribute) this.m_trainHeader.classAttribute().copy());
        }
        Instances instances = new Instances(this.m_trainHeader.relationName() + "->PC->original space", (ArrayList<Attribute>) arrayList, 0);
        if (this.m_hasClass) {
            instances.setClassIndex(instances.numAttributes() - 1);
        }
        return instances;
    }

    private Instances setOutputFormat() throws Exception {
        int[] iArr;
        if (this.m_eigenvalues == null) {
            return null;
        }
        double d = 0.0d;
        ArrayList arrayList = new ArrayList();
        for (int i = this.m_numAttribs - 1; i >= 0; i--) {
            StringBuffer stringBuffer = new StringBuffer();
            double[] dArr = new double[this.m_numAttribs];
            for (int i2 = 0; i2 < this.m_numAttribs; i2++) {
                dArr[i2] = -Math.abs(this.m_eigenvectors[i2][this.m_sortedEigens[i]]);
            }
            int min = this.m_maxAttrsInName > 0 ? Math.min(this.m_numAttribs, this.m_maxAttrsInName) : this.m_numAttribs;
            if (this.m_numAttribs > 0) {
                iArr = Utils.sort(dArr);
            } else {
                iArr = new int[this.m_numAttribs];
                for (int i3 = 0; i3 < this.m_numAttribs; i3++) {
                    iArr[i3] = i3;
                }
            }
            for (int i4 = 0; i4 < min; i4++) {
                double d2 = this.m_eigenvectors[iArr[i4]][this.m_sortedEigens[i]];
                if (i4 > 0 && d2 >= KStarConstants.FLOOR) {
                    stringBuffer.append("+");
                }
                stringBuffer.append(Utils.doubleToString(d2, 5, 3) + this.m_trainInstances.attribute(iArr[i4]).name());
            }
            if (min < this.m_numAttribs) {
                stringBuffer.append("...");
            }
            arrayList.add(new Attribute(stringBuffer.toString()));
            d += this.m_eigenvalues[this.m_sortedEigens[i]];
            if (d / this.m_sumOfEigenValues >= this.m_coverVariance) {
                break;
            }
        }
        if (this.m_hasClass) {
            arrayList.add((Attribute) this.m_trainHeader.classAttribute().copy());
        }
        Instances instances = new Instances(this.m_trainInstances.relationName() + "_principal components", (ArrayList<Attribute>) arrayList, 0);
        if (this.m_hasClass) {
            instances.setClassIndex(instances.numAttributes() - 1);
        }
        this.m_outputNumAtts = instances.numAttributes();
        return instances;
    }

    @Override // weka.attributeSelection.ASEvaluation, weka.core.RevisionHandler
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 15471 $");
    }

    public static void main(String[] strArr) {
        runEvaluator(new PrincipalComponents(), strArr);
    }
}
