package uk.ac.cam.ch.wwmm.acpgeo;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.List;
import nu.xom.Builder;
import nu.xom.Document;
import nu.xom.Element;
import nu.xom.Nodes;
import nu.xom.ParsingException;
import nu.xom.ValidityException;
import uk.ac.cam.ch.wwmm.chemicaltagger.ExtractFromXML;

/* loaded from: input_file:uk/ac/cam/ch/wwmm/acpgeo/ExtractInformation.class */
public class ExtractInformation {
    private String outputFolder = "target/extractedInfo/";
    private static String STATION_COORDS_FILE = "dictionaries/StationCoords.csv";
    private CoordinatesLoader gawCoordinates;

    public String getOutputFolder() {
        return this.outputFolder;
    }

    public void setOutputFolder(String str) {
        this.outputFolder = str;
    }

    public ExtractInformation(String str) {
        runQueries(new File(str).listFiles(new FilenameFilter() { // from class: uk.ac.cam.ch.wwmm.acpgeo.ExtractInformation.1
            @Override // java.io.FilenameFilter
            public boolean accept(File file, String str2) {
                return str2.toLowerCase().startsWith("cp");
            }
        }));
    }

    public void runQueries(File[] fileArr) {
        getQuery(fileArr, "//CAMPAIGN", "CAMPAIGN.csv");
        getQuery(fileArr, "//TransitionPhrase", "transitionalPhrase.csv");
        getQuery(fileArr, "//JJ[.='Eulerian']", "Eulerian.csv");
        getQuery(fileArr, "//*[.='Lagrangian']", "Lagrangian.csv");
        getQuery(fileArr, "//IN", "in.csv");
        getQuery(fileArr, "//CITATION", "CITATION.csv");
        getQuery(fileArr, "//AEROSOL", "AEROSOL.csv");
        getQuery(fileArr, "//QUANTITY", "QUANTITY.csv");
        getQuery(fileArr, "//QUANTITY-TIME", "QUANTITY-TIME.csv");
        getQuery(fileArr, "//UNITS", "UNITS.csv");
        getQuery(fileArr, "//UnmatchedPhrase", "UnmatchedPhrase.csv");
        getQuery(fileArr, "//ResolutionPhrase", "resolution.csv");
        getQuery(fileArr, "//HorizontalGrid", "HorizontalGrid.csv");
        getQuery(fileArr, "//EquationName", "equationName.csv");
        getQuery(fileArr, "//VerticalResolution", "Vresolution.csv");
        getQuery(fileArr, "//PALAEOTIME", "PALAEOTIME.csv");
        getQuery(fileArr, "//NNP-ACRONYMPHRASESTART", "ACRONYMPHRASESTART.csv");
        getQuery(fileArr, "//NN-TEMP", "NN-TEMP.csv");
        getQuery(fileArr, "//NN-RESOLUTION", "NN-RESOLUTION.csv");
        getQuery(fileArr, "//TempPhrase", "TempPhrase.csv");
        getQuery(fileArr, "//PrepPhrase", "PrepPhrase.csv");
        getQuery(fileArr, "//VB-HEAT", "VB-Heat.csv");
        getQuery(fileArr, "//VB-INCREASE", "VB-Increase.csv");
        getQuery(fileArr, "//NNP-TIMEPERIOD", "NNP-TIMEPERIOD.csv");
        getQuery(fileArr, "//VB-COOL", "VB-COOL.csv");
        getQuery(fileArr, "//RATIO", "RATIO.csv");
        getQuery(fileArr, "//APPARATUS", "APPARATUS.csv");
        getQuery(fileArr, "//NN-IDENTIFIER", "IDENTIFIER.csv");
        getQuery(fileArr, "//CD", "CD.csv");
        getQuery(fileArr, "//CD-ALPHANUM", "CD-ALPHANUM.csv");
        getQuery(fileArr, "//MATHEXPRESSION", "MATHEXPRESSION.csv");
        getQuery(fileArr, "//MATHEQUATION", "MATHEQUATION.csv");
        getQuery(fileArr, "//NN-MODEL", "NN-MODEL.csv");
        getQuery(fileArr, "//MODEL", "MODEL.csv");
        getQuery(fileArr, "//PrepPhrase/NounPhrase/MODEL", "PrepMODEL.csv");
        getQuery(fileArr, "//LocationPhrase/NounPhrase/MODEL", "LocMODEL.csv");
        getQuery(fileArr, "//NNP-MODEL", "NNP-MODEL.csv");
        getQuery(fileArr, "//NN-AEROSOL", "NN-AEROSOL.csv");
        getQuery(fileArr, "//NN-PHYSICAL", "NN-PHYSICAL.csv");
        getQuery(fileArr, "//NN-UNITS", "NN-UNITS.csv");
        getQuery(fileArr, "//NN-PERAREA", "NN-PERAREA.csv");
        getQuery(fileArr, "//NN-PERTIMEUNIT", "NN-PERTIMEUNIT.csv");
        getQuery(fileArr, "//NN-TIMEUNIT", "NN-TIMEUNIT.csv");
        getQuery(fileArr, "//ReferencePhrase", "ReferencePhrase.csv");
        getQuery(fileArr, "//CD-YEAR", "CDYEARS.csv");
        getQuery(fileArr, "//YEARS", "YEARS.csv");
        getQuery(fileArr, "//CD-YEAR-RANGE", "YEAR-RANGE.csv");
        getQuery(fileArr, "//CD-YEAR[not(preceding::CD-YEAR >= . or following::CD-YEAR >= . or descendant::CD-YEAR >= .)]", "YEAR-MAX.csv");
        getQuery(fileArr, "//NN-PALAEOTIMEUNIT[not(ancestor::PALAEOTIME)]", "PALAEOTIMEUNIT.csv");
        getQuery(fileArr, "//NN-TIMEPERIODQUALIFIER[not(ancestor::PALAEOTIME)]", "TIMEPERIODQUALIFIER.csv");
        getQuery(fileArr, "//CD-YEAR[not(ancestor::ReferencePhrase)]", "YEAR-notREf.csv");
        getQuery(fileArr, "//SetAcronymPhrase", "SETACRONYMPHRASES.csv");
        getQuery(fileArr, "//SetAcronymPhrase[(ancestor::CAMPAIGN)]", "CAMPAIGN_SETACRONYMPHRASES.csv");
        getQuery(fileArr, "//SetAcronymPhrase[(ancestor::MODEL)]", "MODEL_SETACRONYMPHRASES.csv");
        getQuery(fileArr, "//AcronymPhrase", "ACRONYMPHRASES.csv");
        getQuery(fileArr, "//NNP-ACRONYM", "NNP-ACRONYM.csv");
        getQuery(fileArr, "//ParentheticalPhrase", "PARENTHETICALPHRASE.csv");
        getQuery(fileArr, "//ParentheticalPhraseSimple", "PARENTHETICALPHRASESimple.csv");
        getQuery(fileArr, "//LOCATION", "LOCATION.csv");
        getQuery(fileArr, "//LocationPhrase", "LocationPhrase.csv");
        getQuery(fileArr, "//TimePhrase", "TimePhrase.csv");
        getQuery(fileArr, "//TIME", "TIME.csv");
        getQuery(fileArr, "//NNP-TIMEPERIOD", "NNP-TIMEPERIOD.csv");
        getQuery(fileArr, "//TIME[YEARS and not(*[not(self::YEARS)])]", "TIMEYears.csv");
        getQuery(fileArr, "//QUANTITY/TIME[YEARS and not(*[not(self::YEARS)])]", "QuantityTimeYears.csv");
        getQuery(fileArr, "//QuantityTime", "QuantityTime1.csv");
        getQuery(fileArr, "//QUANTITY/CDYEAR", "QuantityCDYear.csv");
        getQuery(fileArr, "//TIME/YEARS", "TimeQuantity3.csv");
        getQuery(fileArr, "//MOLECULE", "MOLECULE.csv");
        getQuery(fileArr, "//MOLECULE[not(child::JJ-ACP)][not(child::JJ)][not(child::JJ-CHEM)]", "MOLECULE1.csv");
        getQuery(fileArr, "//MOLECULE[child::JJ-ACP|JJ|JJ-CHEM]/OSCARCM[1]", "MOLECULE2.csv");
        getQuery(fileArr, "//MOLECULE/OSCARCM[1]", "MOLECULE3.csv");
        getQuery(fileArr, "//CD-DEGREES", "CD-DEGREES.csv");
        getQuery(fileArr, "//CD-ALTITUDE", "CD-ALTIUDE.csv");
        getQuery(fileArr, "//QUANTITY", "QUANTITY.csv");
        getQuery(fileArr, "//NN-PARTS", "NN-PARTS.csv");
        getQuery(fileArr, "//NNP-STATION", "NNP-STATION.csv");
        getQuery(fileArr, "//NNP-STATION[not(.=preceding::NNP-STATION/.)]", "NNP-STATION1.csv");
        getQuery(fileArr, "//LOCATION[descendant-or-self::CD-DEGREES[2]]", "LOCATIONDEGREES.csv");
        getQuery(fileArr, "//LOCATION[descendant-or-self::NNP-STATION][not(descendant-or-self::CD-DEGREES)]", "LOCATIONSTATION.csv");
        getQuery(fileArr, "//LOCATION[descendant-or-self::NN-STATION]", "LOCATIONSTATION1.csv");
        getQuery(fileArr, "//ActionPhrase[@type='Measurement']", "ACTIONPHRASEmeasurement.csv");
        getQuery(fileArr, "//ActionPhrase[@type='Measurement'][descendant-or-self::MOLECULE]", "ACTIONPHRASEmeasurementMOLECULE.csv");
        getQuery(fileArr, "//ActionPhrase[@type='Observation']", "ACTIONPHRASEobservation.csv");
        getQuery(fileArr, "//ActionPhrase[@type='Observation'][descendant-or-self::MOLECULE]", "ACTIONPHRASEobservationMOLECULE.csv");
        FileWriter fileWriter = null;
        if (!new File(this.outputFolder).exists()) {
            new File(this.outputFolder).mkdir();
        }
        try {
            fileWriter = new FileWriter(new File(this.outputFolder + "MappingDataAll.csv"));
        } catch (IOException e) {
            e.printStackTrace();
        }
        try {
            getDetails(fileArr, Arrays.asList("//LOCATION/NNP-STATION[not(.=preceding::LOCATION/NNP-STATION/.)] //CD-YEAR[not(.>=preceding::CD-YEAR)][not(.>=following::CD-YEAR)][not(.>=descendant::CD-YEAR)] //CD-YEAR[not(.<=preceding::CD-YEAR)][not(.<=following::CD-YEAR)][not(.<=descendant::CD-YEAR)] //CAMPAIGN //MOLECULE/OSCARCM[1][not(.=preceding::MOLECULE/OSCARCM[1]/.)] //affiliation //publication_year".split(" ")), fileWriter);
        } catch (IOException e2) {
            e2.printStackTrace();
        }
    }

    private void getDetails(File[] fileArr, List<String> list, FileWriter fileWriter) throws IOException {
        BufferedReader bufferedReader = null;
        fileWriter.write("FileName\tLat\tLon\tAltitude\tTitle\tYearMin\tYearMax\tCampaign\tMeasuredCompounds\tInstituteAddresses\tPublicationYear");
        this.gawCoordinates = new CoordinatesLoader(STATION_COORDS_FILE);
        for (File file : fileArr) {
            if (file.getName().endsWith("xml")) {
                try {
                    bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                } catch (UnsupportedEncodingException e2) {
                    e2.printStackTrace();
                }
                try {
                    Document build = new Builder().build(bufferedReader, "UTF-8");
                    String str = list.get(0);
                    Object obj = "astring";
                    Nodes query = build.query(str);
                    for (int i = 0; i < query.size(); i++) {
                        Element element = query.get(i);
                        if (str.equals("//LOCATION/NNP-STATION[not(.=preceding::LOCATION/NNP-STATION/.)]")) {
                            String str2 = this.gawCoordinates.getSiteCoordsMapA().get(ExtractFromXML.getStringValue(element, "")) + "";
                            if (!(str2.equals("null") | str2.equals(obj))) {
                                obj = str2;
                                fileWriter.write("\n");
                                fileWriter.write(file.getCanonicalPath() + "\t" + str2 + "\t" + ExtractFromXML.getStringValue(element, "") + "\t");
                            }
                            for (String str3 : list) {
                                Nodes query2 = build.query(str3);
                                if (!str3.equals("//LOCATION/NNP-STATION[not(.=preceding::LOCATION/NNP-STATION/.)]") && !str2.equals("null")) {
                                    for (int i2 = 0; i2 < query2.size(); i2++) {
                                        Element element2 = query2.get(i2);
                                        if (i2 == 0) {
                                            if (str3.equals("//CD-YEAR[not(.>=preceding::CD-YEAR)][not(.>=following::CD-YEAR)][not(.>=descendant::CD-YEAR)]")) {
                                                fileWriter.write(ExtractFromXML.getStringValue(element2, "").trim().substring(0, 4));
                                            } else if (!str3.equals("//CD-YEAR[not(.<=preceding::CD-YEAR)][not(.<=following::CD-YEAR)][not(.<=descendant::CD-YEAR)]")) {
                                                fileWriter.write(ExtractFromXML.getStringValue(element2, " "));
                                            } else if (i2 == query2.size() - 1) {
                                                int length = ExtractFromXML.getStringValue(element2, "").trim().replace("s", "").trim().length();
                                                fileWriter.write(ExtractFromXML.getStringValue(element2, "").trim().substring(length - 4, length));
                                            }
                                        } else if (!str3.equals("//CD-YEAR[not(.>=preceding::CD-YEAR)][not(.>=following::CD-YEAR)][not(.>=descendant::CD-YEAR)]")) {
                                            if (!str3.equals("//CD-YEAR[not(.<=preceding::CD-YEAR)][not(.<=following::CD-YEAR)][not(.<=descendant::CD-YEAR)]")) {
                                                fileWriter.write("; " + ExtractFromXML.getStringValue(element2, " "));
                                            } else if (i2 == query2.size() - 1) {
                                                int length2 = ExtractFromXML.getStringValue(element2, "").trim().replace("s", "").trim().length();
                                                fileWriter.write(ExtractFromXML.getStringValue(element2, "").trim().substring(length2 - 4, length2));
                                            }
                                        }
                                    }
                                    fileWriter.flush();
                                }
                                fileWriter.write("\t");
                            }
                            fileWriter.write("\n");
                        }
                    }
                } catch (ParsingException e3) {
                    e3.printStackTrace();
                } catch (ValidityException e4) {
                    e4.printStackTrace();
                } catch (IOException e5) {
                    e5.printStackTrace();
                }
            }
        }
        try {
            fileWriter.close();
        } catch (IOException e6) {
            e6.printStackTrace();
        }
    }

    private void getQuery(File[] fileArr, String str, String str2) {
        BufferedReader bufferedReader = null;
        FileWriter fileWriter = null;
        if (!new File(this.outputFolder).exists()) {
            new File(this.outputFolder).mkdir();
        }
        try {
            fileWriter = new FileWriter(new File(this.outputFolder + str2));
        } catch (IOException e) {
            e.printStackTrace();
        }
        for (File file : fileArr) {
            if (file.getName().endsWith("xml")) {
                try {
                    bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
                } catch (FileNotFoundException e2) {
                    e2.printStackTrace();
                } catch (UnsupportedEncodingException e3) {
                    e3.printStackTrace();
                }
                try {
                    Nodes query = new Builder().build(bufferedReader, "UTF-8").query(str);
                    for (int i = 0; i < query.size(); i++) {
                        fileWriter.write(file.getCanonicalPath() + "\t" + ExtractFromXML.getStringValue(query.get(i), " ") + "\n");
                        fileWriter.flush();
                    }
                } catch (ValidityException e4) {
                    e4.printStackTrace();
                } catch (IOException e5) {
                    e5.printStackTrace();
                } catch (ParsingException e6) {
                    e6.printStackTrace();
                }
            }
        }
        try {
            fileWriter.close();
        } catch (IOException e7) {
            e7.printStackTrace();
        }
    }
}
