package pl.edu.icm.commoncrawl.filters;

import com.google.protobuf.ByteString;
import com.google.protobuf.InvalidProtocolBufferException;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Random;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ReflectionUtils;
import pl.edu.icm.generated.protobuf.commoncrawl.ScholarRecordProtos;

/* loaded from: input_file:pl/edu/icm/commoncrawl/filters/GSProtoSfUrlsIterator.class */
public class GSProtoSfUrlsIterator implements Iterator<String> {
    int maxJump;
    String file;
    Writable value;
    SequenceFile.Reader reader;
    HashSet<String> processedDomains = new HashSet<>();
    int probesFromLastSuccess = 0;
    int maxProbesFromLastSuccess = 30;
    Random rnd = new Random();
    Writable key = null;
    boolean has = false;
    Configuration conf = new Configuration();

    void open() throws URISyntaxException, IOException {
        this.reader = new SequenceFile.Reader(FileSystem.get(new URI("file:///" + this.file), this.conf), new Path("file:///" + this.file), this.conf);
        this.key = (Writable) ReflectionUtils.newInstance(this.reader.getKeyClass(), this.conf);
        System.out.println(this.key.getClass());
        this.value = (Writable) ReflectionUtils.newInstance(this.reader.getValueClass(), this.conf);
        if (this.value instanceof BytesWritable) {
        }
    }

    public GSProtoSfUrlsIterator(String str, int i) throws URISyntaxException, IOException {
        this.maxJump = 1;
        this.maxJump = i;
        this.file = str;
        open();
    }

    @Override // java.util.Iterator
    public synchronized boolean hasNext() {
        if (!this.has) {
            try {
                int nextInt = this.rnd.nextInt(this.maxJump - 1) + 1;
                for (int i = 0; i < nextInt; i++) {
                    this.has = this.reader.next(this.key, this.value);
                    if (i % 100 == 0) {
                        System.out.println("HAs is: " + this.has + " i: " + i + " skip: " + nextInt);
                    }
                    if (!this.has) {
                        try {
                            open();
                        } catch (URISyntaxException e) {
                            Logger.getLogger(GSProtoSfUrlsIterator.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e);
                        }
                    }
                }
            } catch (IOException e2) {
                Logger.getLogger(GSProtoSfUrlsIterator.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e2);
            }
        }
        return this.has;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // java.util.Iterator
    public synchronized String next() {
        if (!this.has && !hasNext()) {
            throw new RuntimeException("Endo of file");
        }
        this.has = false;
        if (this.value instanceof BytesWritable) {
            try {
                ScholarRecordProtos.ScholarRecordP parseFrom = ScholarRecordProtos.ScholarRecordP.parseFrom(ByteString.copyFrom(this.value.copyBytes()));
                String host = parseFrom.getHost();
                if (!this.processedDomains.contains(host) || this.probesFromLastSuccess >= this.maxProbesFromLastSuccess) {
                    this.probesFromLastSuccess = 0;
                    this.processedDomains.add(host);
                    return parseFrom.getUrl();
                }
                this.probesFromLastSuccess = 0;
            } catch (InvalidProtocolBufferException e) {
                Logger.getLogger(GSProtoSfUrlsIterator.class.getName()).log(Level.SEVERE, (String) null, e);
            }
        }
        return next();
    }

    @Override // java.util.Iterator
    public void remove() {
        throw new UnsupportedOperationException("Not supported yet.");
    }
}
