package pl.edu.icm.coansys.deduplication.document;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import pl.edu.icm.coansys.commons.java.DocumentWrapperUtils;
import pl.edu.icm.coansys.commons.spring.DiReduceService;
import pl.edu.icm.coansys.deduplication.document.keygenerator.WorkKeyGenerator;
import pl.edu.icm.coansys.models.DocumentProtos;

@Service("duplicateWorkDetectReduceService")
/* loaded from: input_file:pl/edu/icm/coansys/deduplication/document/DuplicateWorkDetectReduceService.class */
public class DuplicateWorkDetectReduceService implements DiReduceService<Text, BytesWritable, Text, Text> {
    private static Logger log = LoggerFactory.getLogger(DuplicateWorkDetectReduceService.class);

    @Autowired
    private DuplicateWorkService duplicateWorkService;

    @Autowired
    private WorkKeyGenerator keyGen;
    private int initialMaxDocsSetSize;
    private int maxDocsSetSizeInc;
    private int maxSplitLevel;

    /* loaded from: input_file:pl/edu/icm/coansys/deduplication/document/DuplicateWorkDetectReduceService$UnparseableIssue.class */
    enum UnparseableIssue {
        UNPARSEABLE
    }

    public void reduce(Text text, Iterable<BytesWritable> iterable, Reducer<Text, BytesWritable, Text, Text>.Context context) throws IOException, InterruptedException {
        log.info("starting reduce, key: " + text.toString());
        List<DocumentProtos.DocumentMetadata> extractDocumentMetadata = DocumentWrapperUtils.extractDocumentMetadata(text, iterable);
        long time = new Date().getTime();
        Configuration configuration = context.getConfiguration();
        this.initialMaxDocsSetSize = configuration.getInt("INITIAL_MAX_DOCS_SET_SIZE", this.initialMaxDocsSetSize);
        this.maxDocsSetSizeInc = configuration.getInt("MAX_DOCS_SET_SIZE_INC", this.maxDocsSetSizeInc);
        this.maxSplitLevel = configuration.getInt("MAX_SPLIT_LEVEL", this.maxSplitLevel);
        process(text, context, extractDocumentMetadata, 0, this.initialMaxDocsSetSize);
        log.info("time [msec]: " + (new Date().getTime() - time));
    }

    void process(Text text, Reducer<Text, BytesWritable, Text, Text>.Context context, List<DocumentProtos.DocumentMetadata> list, int i, int i2) throws IOException, InterruptedException {
        String dashes = getDashes(i);
        log.info(dashes + "start process, key: {}, number of documents: {}", text.toString(), Integer.valueOf(list.size()));
        if (list.size() < 2) {
            log.info(dashes + "one document only, ommiting");
            return;
        }
        int i3 = i + 1;
        int i4 = i2;
        if (list.size() > i4) {
            Map<Text, List<DocumentProtos.DocumentMetadata>> splitDocuments = splitDocuments(text, list, i3);
            log.info(dashes + "documents split into: {} packs", Integer.valueOf(splitDocuments.size()));
            for (Map.Entry<Text, List<DocumentProtos.DocumentMetadata>> entry : splitDocuments.entrySet()) {
                if (entry.getValue().size() == list.size()) {
                    i4 += this.maxDocsSetSizeInc;
                }
                process(entry.getKey(), context, entry.getValue(), i3, i4);
            }
        } else {
            if (isDebugMode(context.getConfiguration())) {
                this.duplicateWorkService.findDuplicates(list, context);
            } else {
                saveDuplicatesToContext(this.duplicateWorkService.findDuplicates(list, null), text, context);
            }
            context.progress();
        }
        log.info(dashes + "end process, key: {}", text);
    }

    private String getDashes(int i) {
        StringBuilder sb = new StringBuilder();
        for (int i2 = 0; i2 <= i; i2++) {
            sb.append("-");
        }
        return sb.toString();
    }

    private boolean isDebugMode(Configuration configuration) {
        if (configuration == null) {
            return false;
        }
        return configuration.get("DEDUPLICATION_DEBUG_MODE", "false").equals("true");
    }

    Map<Text, List<DocumentProtos.DocumentMetadata>> splitDocuments(Text text, List<DocumentProtos.DocumentMetadata> list, int i) {
        String text2 = text.toString();
        String str = text2.contains("-") ? text2.split("-")[1] : "";
        HashMap newHashMap = Maps.newHashMap();
        for (DocumentProtos.DocumentMetadata documentMetadata : list) {
            String generateKey = this.keyGen.generateKey(documentMetadata, i);
            if (!str.isEmpty()) {
                generateKey = generateKey + "-" + str;
            }
            Text text3 = new Text(generateKey);
            List list2 = (List) newHashMap.get(text3);
            if (list2 == null) {
                list2 = Lists.newArrayList();
                newHashMap.put(text3, list2);
            }
            list2.add(documentMetadata);
        }
        if (i > this.maxSplitLevel && newHashMap.size() == 1) {
            Text text4 = (Text) newHashMap.keySet().iterator().next();
            String text5 = text4.toString();
            if (!text5.contains("-")) {
                text5 = text5 + "-";
            }
            Text text6 = new Text(text5 + "0");
            Text text7 = new Text(text5 + "1");
            List list3 = (List) newHashMap.get(text4);
            int size = list3.size();
            List subList = list3.subList(0, size / 2);
            List subList2 = list3.subList(size / 2, size);
            newHashMap.clear();
            newHashMap.put(text6, subList);
            newHashMap.put(text7, subList2);
        }
        return newHashMap;
    }

    private void saveDuplicatesToContext(Map<Integer, Set<DocumentProtos.DocumentMetadata>> map, Text text, Reducer<Text, BytesWritable, Text, Text>.Context context) throws IOException, InterruptedException {
        for (Map.Entry<Integer, Set<DocumentProtos.DocumentMetadata>> entry : map.entrySet()) {
            String str = text.toString() + "_" + entry.getKey();
            Iterator<DocumentProtos.DocumentMetadata> it = entry.getValue().iterator();
            while (it.hasNext()) {
                context.write(new Text(str), new Text(it.next().getKey()));
            }
        }
    }

    @Value("1000")
    public void setBeginPackSize(int i) {
        this.initialMaxDocsSetSize = i;
    }

    @Value("200")
    public void setPackSizeInc(int i) {
        this.maxDocsSetSizeInc = i;
    }

    @Value("10")
    public void setMaxSplitLevels(int i) {
        this.maxSplitLevel = i;
    }

    public /* bridge */ /* synthetic */ void reduce(Object obj, Iterable iterable, Reducer.Context context) throws IOException, InterruptedException {
        reduce((Text) obj, (Iterable<BytesWritable>) iterable, (Reducer<Text, BytesWritable, Text, Text>.Context) context);
    }
}
