package pl.edu.icm.coansys.coansys.io.blog.crawler;

import com.google.protobuf.ByteString;
import edu.uci.ics.crawler4j.crawler.Page;
import edu.uci.ics.crawler4j.url.WebURL;
import pl.edu.icm.coansys.models.DocumentProtos;

/* loaded from: input_file:pl/edu/icm/coansys/coansys/io/blog/crawler/SiteDataProcessor.class */
public abstract class SiteDataProcessor {
    protected String domain;

    /* JADX INFO: Access modifiers changed from: protected */
    public String blogEntryUrlToRowId(String str) {
        String str2 = str;
        if (str2.contains("?")) {
            str2 = str2.substring(0, str2.indexOf(63));
        }
        return "http://comac.icm.edu.pl/" + str2.replaceAll("[^a-zA-Z0-9-]", "-");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void addContent(DocumentProtos.DocumentWrapper.Builder builder, Page page) {
        DocumentProtos.Media.Builder addMediaBuilder = builder.getMediaContainerBuilder().addMediaBuilder();
        addMediaBuilder.setKey("HTML");
        addMediaBuilder.setMediaType("text/html");
        addMediaBuilder.setContent(ByteString.copyFrom(page.getContentData()));
    }

    public abstract boolean isSiteEntry(WebURL webURL);

    public abstract DocumentProtos.DocumentWrapper.Builder processPageFromSite(Page page);
}
