package pl.edu.icm.coansys.output.merge.doc;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import pl.edu.icm.coansys.commons.java.DiacriticsRemover;
import pl.edu.icm.coansys.commons.java.Pair;
import pl.edu.icm.coansys.models.DocumentProtos;

/* loaded from: input_file:pl/edu/icm/coansys/output/merge/doc/AdvancedDuplicatesMerger.class */
public class AdvancedDuplicatesMerger implements DuplicatesMerger {
    private Map<String, Integer> collectionPreferences;

    @Override // pl.edu.icm.coansys.output.merge.doc.DuplicatesMerger
    public void setup(String str) {
        this.collectionPreferences = new HashMap();
        for (String str2 : str.split(",")) {
            Matcher matcher = Pattern.compile("^(-?\\d+):(.+)").matcher(str2.trim());
            if (matcher.matches()) {
                this.collectionPreferences.put(matcher.group(2), Integer.valueOf(Integer.parseInt(matcher.group(1))));
            }
        }
    }

    @Override // pl.edu.icm.coansys.output.merge.doc.DuplicatesMerger
    public DocumentProtos.DocumentWrapper merge(List<DocumentProtos.DocumentWrapper> list) {
        List<DocumentProtos.Author> matchAuthors;
        if (list == null || list.isEmpty()) {
            throw new RuntimeException("Nothing to merge");
        }
        if (list.size() == 1) {
            return list.get(0);
        }
        int chooseTheBestIndex = chooseTheBestIndex(list);
        ArrayList arrayList = new ArrayList(list.size());
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        TreeSet treeSet = new TreeSet();
        ArrayList arrayList4 = new ArrayList();
        for (DocumentProtos.DocumentWrapper documentWrapper : list) {
            DocumentProtos.DocumentMetadata documentMetadata = documentWrapper.getDocumentMetadata();
            arrayList.add(documentWrapper.getRowId());
            ArrayList arrayList5 = new ArrayList(documentMetadata.getCollectionList());
            if (arrayList5.isEmpty()) {
                arrayList5.add("unknown");
            }
            Iterator it = arrayList5.iterator();
            while (it.hasNext()) {
                treeSet.add((String) it.next());
            }
            arrayList2.addAll(documentMetadata.getExtIdList());
            arrayList3.addAll(documentMetadata.getAuxiliarInfoList());
            arrayList4.addAll(documentMetadata.getKeywordsList());
        }
        Collections.sort(arrayList);
        String uuid = UUID.nameUUIDFromBytes(StringUtils.join(arrayList, "+").getBytes()).toString();
        DocumentProtos.DocumentWrapper.Builder newBuilder = DocumentProtos.DocumentWrapper.newBuilder(list.get(chooseTheBestIndex));
        newBuilder.setRowId(uuid);
        DocumentProtos.DocumentMetadata.Builder documentMetadataBuilder = newBuilder.getDocumentMetadataBuilder();
        DocumentProtos.BasicMetadata.Builder basicMetadataBuilder = documentMetadataBuilder.getBasicMetadataBuilder();
        documentMetadataBuilder.setKey(uuid);
        documentMetadataBuilder.addAllOrigKey(arrayList);
        documentMetadataBuilder.addAllCollection(treeSet);
        List<DocumentProtos.Author.Builder> authorBuilderList = basicMetadataBuilder.getAuthorBuilderList();
        for (DocumentProtos.Author.Builder builder : authorBuilderList) {
            String replaceAll = builder.getKey().replaceAll(".*(#c\\d+)", "$1");
            builder.setDocId(uuid);
            builder.setKey(uuid + replaceAll);
        }
        ArrayList arrayList6 = new ArrayList();
        for (int i = 0; i < list.size(); i++) {
            if (i != chooseTheBestIndex && (matchAuthors = matchAuthors(authorBuilderList, list.get(i).getDocumentMetadata().getBasicMetadata().getAuthorList())) != null) {
                arrayList6.add(matchAuthors);
            }
        }
        mergeAuthors(authorBuilderList, arrayList6);
        documentMetadataBuilder.clearExtId();
        documentMetadataBuilder.addAllExtId(mergeKeyValues(arrayList2));
        documentMetadataBuilder.addAllAuxiliarInfo(mergeKeyValues(arrayList3));
        documentMetadataBuilder.addAllKeywords(mergeKeywords(arrayList4));
        return newBuilder.build();
    }

    protected void mergeAuthors(List<DocumentProtos.Author.Builder> list, List<List<DocumentProtos.Author>> list2) {
        for (int i = 0; i < list.size(); i++) {
            DocumentProtos.Author.Builder builder = list.get(i);
            ArrayList arrayList = new ArrayList();
            arrayList.addAll(builder.getExtIdList());
            Iterator<List<DocumentProtos.Author>> it = list2.iterator();
            while (it.hasNext()) {
                DocumentProtos.Author author = it.next().get(i);
                if (author != null) {
                    arrayList.addAll(author.getExtIdList());
                }
            }
            builder.clearExtId();
            builder.addAllExtId(mergeKeyValues(arrayList));
        }
    }

    protected List<DocumentProtos.Author> matchAuthors(List<DocumentProtos.Author.Builder> list, List<DocumentProtos.Author> list2) {
        ArrayList arrayList = new ArrayList(list.size());
        ArrayList<DocumentProtos.Author> arrayList2 = new ArrayList(list2);
        for (DocumentProtos.Author.Builder builder : list) {
            DocumentProtos.Author author = null;
            for (DocumentProtos.Author author2 : arrayList2) {
                if (equalsIgnoreCaseIgnoreDiacritics(builder.getName(), author2.getName()) || (equalsIgnoreCaseIgnoreDiacritics(builder.getForenames(), author2.getForenames()) && equalsIgnoreCaseIgnoreDiacritics(builder.getSurname(), author2.getSurname()))) {
                    author = author2;
                    break;
                }
            }
            if (author != null) {
                arrayList.add(author);
                arrayList2.remove(author);
            } else {
                arrayList.add(null);
            }
        }
        if (arrayList.size() == list.size()) {
            return arrayList;
        }
        return null;
    }

    private boolean equalsIgnoreCaseIgnoreDiacritics(String str, String str2) {
        if (str.isEmpty() || str2.isEmpty()) {
            return false;
        }
        return DiacriticsRemover.removeDiacritics(str).equalsIgnoreCase(DiacriticsRemover.removeDiacritics(str2));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static List<DocumentProtos.KeyValue> mergeKeyValues(List<DocumentProtos.KeyValue> list) {
        HashMap hashMap = new HashMap();
        for (DocumentProtos.KeyValue keyValue : list) {
            Pair pair = new Pair(keyValue.getKey(), keyValue.getValue());
            String comment = keyValue.getComment();
            if (!hashMap.containsKey(pair)) {
                hashMap.put(pair, comment);
            } else if (!comment.isEmpty()) {
                String str = (String) hashMap.get(pair);
                if (str.isEmpty()) {
                    hashMap.put(pair, comment);
                } else {
                    hashMap.put(pair, str + "\t" + comment);
                }
            }
        }
        ArrayList arrayList = new ArrayList();
        for (Map.Entry entry : hashMap.entrySet()) {
            DocumentProtos.KeyValue.Builder newBuilder = DocumentProtos.KeyValue.newBuilder();
            newBuilder.setKey((String) ((Pair) entry.getKey()).getX());
            newBuilder.setValue((String) ((Pair) entry.getKey()).getY());
            String str2 = (String) entry.getValue();
            if (!str2.isEmpty()) {
                newBuilder.setComment(str2);
            }
            arrayList.add(newBuilder.build());
        }
        return arrayList;
    }

    protected int chooseTheBestIndex(List<DocumentProtos.DocumentWrapper> list) {
        if (this.collectionPreferences == null || this.collectionPreferences.isEmpty()) {
            return 0;
        }
        int i = 0;
        int i2 = Integer.MIN_VALUE;
        for (int i3 = 0; i3 < list.size(); i3++) {
            for (String str : list.get(i3).getDocumentMetadata().getCollectionList()) {
                int intValue = this.collectionPreferences.containsKey(str) ? this.collectionPreferences.get(str).intValue() : 0;
                if (intValue > i2) {
                    i2 = intValue;
                    i = i3;
                }
            }
        }
        return i;
    }

    private List<DocumentProtos.KeywordsList> mergeKeywords(List<DocumentProtos.KeywordsList> list) {
        Pair pair;
        HashMap hashMap = new HashMap();
        for (DocumentProtos.KeywordsList keywordsList : list) {
            Pair pair2 = new Pair(keywordsList.getType(), keywordsList.getLanguage());
            String comment = keywordsList.getComment();
            if (hashMap.containsKey(pair2)) {
                pair = (Pair) hashMap.get(pair2);
                if (!comment.isEmpty()) {
                    String str = (String) pair.getY();
                    if (str.isEmpty()) {
                        pair.setY(comment);
                    } else {
                        pair.setY(str + "\t" + comment);
                    }
                }
            } else {
                pair = new Pair(new HashSet(), comment);
                hashMap.put(pair2, pair);
            }
            ((Set) pair.getX()).addAll(keywordsList.getKeywordsList());
        }
        ArrayList arrayList = new ArrayList();
        for (Map.Entry entry : hashMap.entrySet()) {
            DocumentProtos.KeywordsList.Builder newBuilder = DocumentProtos.KeywordsList.newBuilder();
            String str2 = (String) ((Pair) entry.getKey()).getX();
            String str3 = (String) ((Pair) entry.getKey()).getY();
            Set set = (Set) ((Pair) entry.getValue()).getX();
            String str4 = (String) ((Pair) entry.getValue()).getY();
            if (str2 != null && !str2.isEmpty()) {
                newBuilder.setType(str2);
            }
            if (str3 != null && !str3.isEmpty()) {
                newBuilder.setLanguage(str3);
            }
            if (str4 != null && !str4.isEmpty()) {
                newBuilder.setComment(str4);
            }
            newBuilder.addAllKeywords(set);
            arrayList.add(newBuilder.build());
        }
        return arrayList;
    }
}
