package org.apache.mahout.utils.clustering;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.OptionException;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.option.DefaultOption;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.jobcontrol.Job;
import org.apache.mahout.clustering.ClusterBase;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.FileLineIterator;
import org.apache.mahout.matrix.Vector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/mahout/utils/clustering/ClusterDumper.class */
public final class ClusterDumper {
    private static final Logger log = LoggerFactory.getLogger(ClusterDumper.class);
    private static final String LINE_SEP = System.getProperty("line.separator");
    private static final Pattern TAB_PATTERN = Pattern.compile("\t");

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/mahout/utils/clustering/ClusterDumper$TermIndexWeight.class */
    public static class TermIndexWeight {
        public int index;
        public double weight;

        TermIndexWeight(int i, double d) {
            this.index = -1;
            this.weight = 0.0d;
            this.index = i;
            this.weight = d;
        }
    }

    private ClusterDumper() {
    }

    public static void main(String[] strArr) throws IOException, IllegalAccessException, InstantiationException {
        DefaultOptionBuilder defaultOptionBuilder = new DefaultOptionBuilder();
        ArgumentBuilder argumentBuilder = new ArgumentBuilder();
        GroupBuilder groupBuilder = new GroupBuilder();
        DefaultOption create = defaultOptionBuilder.withLongName("seqFileDir").withRequired(false).withArgument(argumentBuilder.withName("seqFileDir").withMinimum(1).withMaximum(1).create()).withDescription("The directory containing Sequence Files for the Clusters").withShortName("s").create();
        DefaultOption create2 = defaultOptionBuilder.withLongName("output").withRequired(false).withArgument(argumentBuilder.withName("output").withMinimum(1).withMaximum(1).create()).withDescription("The output file.  If not specified, dumps to the console").withShortName("o").create();
        DefaultOption create3 = defaultOptionBuilder.withLongName("substring").withRequired(false).withArgument(argumentBuilder.withName("substring").withMinimum(1).withMaximum(1).create()).withDescription("The number of chars of the asFormatString() to print").withShortName("b").create();
        DefaultOption create4 = defaultOptionBuilder.withLongName("pointsDir").withRequired(false).withArgument(argumentBuilder.withName("pointsDir").withMinimum(1).withMaximum(1).create()).withDescription("The directory contaning points sequence files mapping input vectors to their cluster.  If specified, then the program will output the points associated with a cluster").withShortName("p").create();
        DefaultOption create5 = defaultOptionBuilder.withLongName("dictionary").withRequired(false).withArgument(argumentBuilder.withName("dictionary").withMinimum(1).withMaximum(1).create()).withDescription("The dictionary file. ").withShortName("d").create();
        DefaultOption create6 = defaultOptionBuilder.withLongName("help").withDescription("Print out help").withShortName("h").create();
        Group create7 = groupBuilder.withName("Options").withOption(create).withOption(create2).withOption(create3).withOption(create4).withOption(create5).create();
        try {
            Parser parser = new Parser();
            parser.setGroup(create7);
            CommandLine parse = parser.parse(strArr);
            if (parse.hasOption(create6)) {
                CommandLineUtil.printHelp(create7);
                return;
            }
            ArrayList<String> termDict = parse.hasOption(create5) ? getTermDict(parse.getValue(create5).toString()) : null;
            if (parse.hasOption(create)) {
                JobClient jobClient = new JobClient();
                JobConf jobConf = new JobConf(Job.class);
                jobClient.setConf(jobConf);
                Map<String, List<String>> readPoints = parse.hasOption(create4) ? readPoints(parse.getValue(create4).toString(), jobConf) : Collections.emptyMap();
                OutputStreamWriter fileWriter = parse.hasOption(create2) ? new FileWriter(parse.getValue(create2).toString()) : new OutputStreamWriter(System.out);
                int parseInt = parse.hasOption(create3) ? Integer.parseInt(parse.getValue(create3).toString()) : Integer.MAX_VALUE;
                for (File file : new File(parse.getValue(create).toString()).listFiles()) {
                    if (file.isFile()) {
                        Path path = new Path(file.getAbsolutePath());
                        System.out.println("Input Path: " + path);
                        SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(path.toUri(), jobConf), path, jobConf);
                        Writable writable = (Writable) reader.getKeyClass().newInstance();
                        ClusterBase clusterBase = (ClusterBase) reader.getValueClass().newInstance();
                        while (reader.next(writable, clusterBase)) {
                            Vector center = clusterBase.getCenter();
                            String asFormatString = center.asFormatString();
                            fileWriter.append((CharSequence) String.valueOf(clusterBase.getId())).append((CharSequence) ":").append((CharSequence) "name:").append((CharSequence) center.getName()).append((CharSequence) ":").append((CharSequence) asFormatString.substring(0, Math.min(parseInt, asFormatString.length()))).append((CharSequence) LINE_SEP);
                            if (termDict != null) {
                                String topFeatures = getTopFeatures(center, termDict, 10);
                                fileWriter.write("\tTop Terms: ");
                                fileWriter.write(topFeatures);
                                fileWriter.write(LINE_SEP);
                            }
                            List<String> list = readPoints.get(String.valueOf(clusterBase.getId()));
                            if (list != null) {
                                fileWriter.write("\tPoints: ");
                                Iterator<String> it = list.iterator();
                                while (it.hasNext()) {
                                    fileWriter.append((CharSequence) it.next());
                                    if (it.hasNext()) {
                                        fileWriter.append((CharSequence) ", ");
                                    }
                                }
                                fileWriter.write(LINE_SEP);
                            }
                            fileWriter.flush();
                        }
                        reader.close();
                    }
                }
                if (parse.hasOption(create2)) {
                    fileWriter.flush();
                    fileWriter.close();
                }
            }
        } catch (OptionException e) {
            log.error("Exception", e);
            CommandLineUtil.printHelp(create7);
        }
    }

    private static Map<String, List<String>> readPoints(String str, JobConf jobConf) throws IOException {
        HashMap hashMap = new HashMap();
        for (File file : new File(str).listFiles()) {
            if (file.isFile()) {
                Path path = new Path(file.getAbsolutePath());
                SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(path.toUri(), jobConf), path, jobConf);
                try {
                    Text text = (Text) reader.getKeyClass().newInstance();
                    Text text2 = (Text) reader.getValueClass().newInstance();
                    while (reader.next(text, text2)) {
                        String text3 = text2.toString();
                        List list = (List) hashMap.get(text3);
                        if (list == null) {
                            list = new ArrayList();
                            hashMap.put(text3, list);
                        }
                        list.add(text.toString());
                    }
                } catch (IllegalAccessException e) {
                    log.error("Exception", e);
                } catch (InstantiationException e2) {
                    log.error("Exception", e2);
                }
            }
        }
        return hashMap;
    }

    private static ArrayList<String> getTermDict(String str) throws IOException {
        FileLineIterator fileLineIterator = new FileLineIterator(new File(str));
        int parseInt = Integer.parseInt(fileLineIterator.next());
        System.out.println(parseInt);
        ArrayList<String> arrayList = new ArrayList<>();
        for (int i = 0; i < parseInt; i++) {
            arrayList.add("dummyentry");
        }
        while (fileLineIterator.hasNext()) {
            String next = fileLineIterator.next();
            if (!next.startsWith("#")) {
                String[] split = TAB_PATTERN.split(next);
                if (split.length >= 3) {
                    arrayList.set(Integer.parseInt(split[2]), split[0]);
                }
            }
        }
        return arrayList;
    }

    private static String getTopFeatures(Vector vector, ArrayList<String> arrayList, int i) {
        ArrayList arrayList2 = new ArrayList();
        Iterator iterateNonZero = vector.iterateNonZero();
        while (iterateNonZero.hasNext()) {
            Vector.Element element = (Vector.Element) iterateNonZero.next();
            arrayList2.add(new TermIndexWeight(element.index(), element.get()));
        }
        Collections.sort(arrayList2, new Comparator<TermIndexWeight>() { // from class: org.apache.mahout.utils.clustering.ClusterDumper.1
            @Override // java.util.Comparator
            public int compare(TermIndexWeight termIndexWeight, TermIndexWeight termIndexWeight2) {
                return Double.compare(termIndexWeight2.weight, termIndexWeight.weight);
            }
        });
        LinkedList linkedList = new LinkedList();
        for (int i2 = 0; i2 < arrayList2.size() && i2 < i; i2++) {
            int i3 = ((TermIndexWeight) arrayList2.get(i2)).index;
            String str = arrayList.get(i3);
            if (str == null) {
                log.error("Dictionary entry missing for " + i3);
            } else {
                linkedList.add(str);
            }
        }
        StringBuilder sb = new StringBuilder();
        Iterator it = linkedList.iterator();
        while (it.hasNext()) {
            sb.append((String) it.next());
            if (it.hasNext()) {
                sb.append(", ");
            }
        }
        return sb.toString();
    }
}
