package pl.edu.icm.coansys.citations.util;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FilenameFilter;
import org.jdom.Element;
import pl.edu.icm.ceon.scala_commons.xml.package$;
import pl.edu.icm.cermine.bibref.model.BibEntry;
import resource.Resource$;
import scala.Array$;
import scala.Option;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.Tuple2;
import scala.collection.Iterator;
import scala.collection.JavaConversions$;
import scala.collection.LinearSeqOptimized;
import scala.collection.TraversableLike;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ListBuffer;
import scala.collection.mutable.StringBuilder;
import scala.io.Codec$;
import scala.io.Source;
import scala.io.Source$;
import scala.reflect.ClassTag$;
import scala.reflect.ManifestFactory$;
import scala.runtime.ObjectRef;
import scala.util.matching.Regex;

/* compiled from: dataset_readers.scala */
/* loaded from: input_file:pl/edu/icm/coansys/citations/util/dataset_readers$.class */
public final class dataset_readers$ {
    public static final dataset_readers$ MODULE$ = null;
    private final Map<String, String> citeseerTagMapping;
    private final Map<String, String> corarefTagMapping;

    static {
        new dataset_readers$();
    }

    public List<Tuple2<String, String>> getTaggedReferenceListFromCorarefSource(Source source) {
        ListBuffer listBuffer = new ListBuffer();
        ObjectRef objectRef = new ObjectRef(new StringBuffer());
        source.getLines().foreach(new dataset_readers$$anonfun$getTaggedReferenceListFromCorarefSource$1(listBuffer, objectRef));
        listBuffer.append(Predef$.MODULE$.wrapRefArray(new String[]{((StringBuffer) objectRef.elem).toString()}));
        return (List) ((ListBuffer) listBuffer.filterNot(new dataset_readers$$anonfun$1())).toList().map(new dataset_readers$$anonfun$getTaggedReferenceListFromCorarefSource$2(), List$.MODULE$.canBuildFrom());
    }

    public List<Tuple2<String, String>> getTaggedReferenceListFromCiteseerSource(Source source) {
        return parse$1(source.getLines(), "", new StringBuilder(), new ListBuffer());
    }

    public BibEntry taggedReferenceToBibEntry(String str, Map<String, String> map) {
        BibEntry bibEntry = new BibEntry();
        StringBuffer stringBuffer = new StringBuffer();
        package$.MODULE$.xmlToElems(str).foreach(new dataset_readers$$anonfun$taggedReferenceToBibEntry$1(map, bibEntry, stringBuffer, new ObjectRef(Nil$.MODULE$)));
        return bibEntry.setText(stringBuffer.toString());
    }

    public Map<String, String> citeseerTagMapping() {
        return this.citeseerTagMapping;
    }

    public List<BibEntry> importBibEntriesFromCiteseerFile(String str) {
        return (List) ((TraversableLike) getTaggedReferenceListFromCiteseerSource(Source$.MODULE$.fromFile(str, Codec$.MODULE$.fallbackSystemCodec())).unzip(Predef$.MODULE$.conforms())._2()).map(new dataset_readers$$anonfun$importBibEntriesFromCiteseerFile$1(), List$.MODULE$.canBuildFrom());
    }

    public Map<String, String> corarefTagMapping() {
        return this.corarefTagMapping;
    }

    public List<BibEntry> importBibEntriesFromCorarefFile(String str) {
        return (List) ((TraversableLike) getTaggedReferenceListFromCorarefSource(Source$.MODULE$.fromFile(str, Codec$.MODULE$.ISO8859())).unzip(Predef$.MODULE$.conforms())._2()).map(new dataset_readers$$anonfun$importBibEntriesFromCorarefFile$1(), List$.MODULE$.canBuildFrom());
    }

    public void findAndCollapseStringName(Element element) {
        String name = element.getName();
        if (name != null ? !name.equals("string-name") : "string-name" != 0) {
            JavaConversions$.MODULE$.asScalaBuffer(element.getChildren()).foreach(new dataset_readers$$anonfun$findAndCollapseStringName$1());
        } else {
            collapseContent(element, "surname");
        }
    }

    public void collapseContent(Element element, String str) {
        Element text = new Element(str).setText(element.getValue());
        element.removeContent();
        element.addContent(text);
    }

    public void main(String[] strArr) {
        resource.package$.MODULE$.managed(new dataset_readers$$anonfun$main$1("C:\\Users\\matfed\\Desktop\\mixed-citations-citeseer.xml"), Resource$.MODULE$.closeableResource(), ManifestFactory$.MODULE$.classType(BufferedWriter.class)).foreach(new dataset_readers$$anonfun$main$2((String[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(new File("C:\\Users\\matfed\\Desktop\\citeseer.ie.raw.tar").listFiles(new FilenameFilter() { // from class: pl.edu.icm.coansys.citations.util.dataset_readers$$anon$1
            @Override // java.io.FilenameFilter
            public boolean accept(File file, String str) {
                return str.endsWith("Out");
            }
        })).map(new dataset_readers$$anonfun$2(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))).flatMap(new dataset_readers$$anonfun$3(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(BibEntry.class)))).map(new dataset_readers$$anonfun$4(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))));
    }

    private final List parse$1(Iterator iterator, String str, StringBuilder stringBuilder, ListBuffer listBuffer) {
        while (iterator.hasNext()) {
            Regex r = new StringOps(Predef$.MODULE$.augmentString("<meta reference_no=\"\\d+\" cluster_no=\"\\d+\" true_id=\"([^\"]+)\"></meta>")).r();
            Regex r2 = new StringOps(Predef$.MODULE$.augmentString("<meta ref_no=\"[^\"]+\" class_no=\"([^\"]+)\" bib_no=\"[^\"]+\"></meta>")).r();
            String str2 = (String) iterator.next();
            Option unapplySeq = r.unapplySeq(str2);
            if (unapplySeq.isEmpty() || unapplySeq.get() == null || ((LinearSeqOptimized) unapplySeq.get()).lengthCompare(1) != 0) {
                Option unapplySeq2 = r2.unapplySeq(str2);
                if (unapplySeq2.isEmpty() || unapplySeq2.get() == null || ((LinearSeqOptimized) unapplySeq2.get()).lengthCompare(1) != 0) {
                    listBuffer = listBuffer;
                    stringBuilder = stringBuilder.append(str2).append(' ');
                    str = str;
                    iterator = iterator;
                } else {
                    String str3 = (String) ((LinearSeqOptimized) unapplySeq2.get()).apply(0);
                    listBuffer = stringBuilder.isEmpty() ? listBuffer : listBuffer.$plus$eq(new Tuple2(str, stringBuilder.toString()));
                    stringBuilder = new StringBuilder();
                    str = str3;
                    iterator = iterator;
                }
            } else {
                String str4 = (String) ((LinearSeqOptimized) unapplySeq.get()).apply(0);
                listBuffer = stringBuilder.isEmpty() ? listBuffer : listBuffer.$plus$eq(new Tuple2(str, stringBuilder.toString()));
                stringBuilder = new StringBuilder();
                str = str4;
                iterator = iterator;
            }
        }
        return listBuffer.$plus$eq(new Tuple2(str, stringBuilder.toString())).toList();
    }

    private dataset_readers$() {
        MODULE$ = this;
        this.citeseerTagMapping = Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.any2ArrowAssoc("author"), "author"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.any2ArrowAssoc("title"), "title"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.any2ArrowAssoc("journal"), "journal"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.any2ArrowAssoc("booktitle"), "journal"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.any2ArrowAssoc("tech"), "journal"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.any2ArrowAssoc("pages"), "pages"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.any2ArrowAssoc("date"), "year")}));
        this.corarefTagMapping = Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.any2ArrowAssoc("author"), "author"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.any2ArrowAssoc("booktitle"), "journal"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.any2ArrowAssoc("date"), "year"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.any2ArrowAssoc("journal"), "journal"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.any2ArrowAssoc("pages"), "pages"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.any2ArrowAssoc("tech"), "journal"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.any2ArrowAssoc("title"), "title"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.any2ArrowAssoc("type"), "journal"), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.any2ArrowAssoc("year"), "year")}));
    }
}
