package com.gravity.goose.extractors;

import com.gravity.goose.Article;
import com.gravity.goose.text.ReplaceSequence;
import com.gravity.goose.text.ReplaceSequence$;
import com.gravity.goose.text.StringReplacement;
import com.gravity.goose.text.StringReplacement$;
import com.gravity.goose.text.StringSplitter;
import com.gravity.goose.text.string$;
import java.net.URL;
import java.util.ArrayList;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Collector;
import org.jsoup.select.Elements;
import org.jsoup.select.Selector;
import org.jsoup.select.TagsEvaluator;
import org.slf4j.Logger;
import scala.Function1;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Some;
import scala.collection.JavaConversions$;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.Set;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.Buffer;
import scala.collection.mutable.Buffer$;
import scala.collection.mutable.HashSet;
import scala.collection.mutable.HashSet$;
import scala.collection.mutable.StringBuilder;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;
import scala.runtime.DoubleRef;
import scala.runtime.IntRef;
import scala.runtime.NonLocalReturnControl;
import scala.runtime.ObjectRef;

/* compiled from: ContentExtractor.scala */
@ScalaSignature(bytes = "\u0006\u0001\teu!B\u0001\u0003\u0011\u0003Y\u0011\u0001E\"p]R,g\u000e^#yiJ\f7\r^8s\u0015\t\u0019A!\u0001\u0006fqR\u0014\u0018m\u0019;peNT!!\u0002\u0004\u0002\u000b\u001d|wn]3\u000b\u0005\u001dA\u0011aB4sCZLG/\u001f\u0006\u0002\u0013\u0005\u00191m\\7\u0004\u0001A\u0011A\"D\u0007\u0002\u0005\u0019)aB\u0001E\u0001\u001f\t\u00012i\u001c8uK:$X\t\u001f;sC\u000e$xN]\n\u0004\u001bA1\u0002CA\t\u0015\u001b\u0005\u0011\"\"A\n\u0002\u000bM\u001c\u0017\r\\1\n\u0005U\u0011\"AB!osJ+g\r\u0005\u0002\u001855\t\u0001D\u0003\u0002\u001a\t\u0005)Q\u000f^5mg&\u00111\u0004\u0007\u0002\b\u0019><w-\u001b8h\u0011\u0015iR\u0002\"\u0001\u001f\u0003\u0019a\u0014N\\5u}Q\t1\u0002C\u0004!\u001b\t\u0007I\u0011A\u0011\u0002\u00131|w\r\u0015:fM&DX#\u0001\u0012\u0011\u0005\rBS\"\u0001\u0013\u000b\u0005\u00152\u0013\u0001\u00027b]\u001eT\u0011aJ\u0001\u0005U\u00064\u0018-\u0003\u0002*I\t11\u000b\u001e:j]\u001eDaaK\u0007!\u0002\u0013\u0011\u0013A\u00037pOB\u0013XMZ5yA\u00199aB\u0001I\u0001\u0004\u0003i3C\u0001\u0017\u0011\u0011\u0015yC\u0006\"\u00011\u0003\u0019!\u0013N\\5uIQ\t\u0011\u0007\u0005\u0002\u0012e%\u00111G\u0005\u0002\u0005+:LG\u000fC\u00036Y\u0011\u0005a'A\u0005hKRdunZ4feR\tq\u0007\u0005\u00029{5\t\u0011H\u0003\u0002;w\u0005)1\u000f\u001c45U*\tA(A\u0002pe\u001eL!AP\u001d\u0003\r1{wmZ3s\u0011\u001d\u0001EF1A\u0005\u0002\u0005\u000b!#T(U\u0019\u0016KvLU#Q\u0019\u0006\u001bU)T#O)V\t!\t\u0005\u0002D\r6\tAI\u0003\u0002F\t\u0005!A/\u001a=u\u0013\t9EIA\tTiJLgn\u001a*fa2\f7-Z7f]RDa!\u0013\u0017!\u0002\u0013\u0011\u0015aE'P)2+\u0015l\u0018*F!2\u000b5)R'F\u001dR\u0003\u0003bB&-\u0005\u0004%\t!Q\u0001\u001d\u000bN\u001b\u0015\tU#E?\u001a\u0013\u0016iR'F\u001dR{&+\u0012)M\u0003\u000e+U*\u0012(U\u0011\u0019iE\u0006)A\u0005\u0005\u0006iRiU\"B!\u0016#uL\u0012*B\u000f6+e\nV0S\u000bBc\u0015iQ#N\u000b:#\u0006\u0005C\u0004PY\t\u0007I\u0011\u0001)\u0002%QKE\u000bT#`%\u0016\u0003F*Q\"F\u001b\u0016sEkU\u000b\u0002#B\u00111IU\u0005\u0003'\u0012\u0013qBU3qY\u0006\u001cWmU3rk\u0016t7-\u001a\u0005\u0007+2\u0002\u000b\u0011B)\u0002'QKE\u000bT#`%\u0016\u0003F*Q\"F\u001b\u0016sEk\u0015\u0011\t\u000f]c#\u0019!C\u00011\u0006i\u0001+\u0013)F?N\u0003F*\u0013+U\u000bJ+\u0012!\u0017\t\u0003\u0007jK!a\u0017#\u0003\u001dM#(/\u001b8h'Bd\u0017\u000e\u001e;fe\"1Q\f\fQ\u0001\ne\u000ba\u0002U%Q\u000b~\u001b\u0006\u000bT%U)\u0016\u0013\u0006\u0005C\u0004`Y\t\u0007I\u0011\u0001-\u0002\u001b\u0011\u000b5\u000bS0T!2KE\u000bV#S\u0011\u0019\tG\u0006)A\u00053\u0006qA)Q*I?N\u0003F*\u0013+U\u000bJ\u0003\u0003bB2-\u0005\u0004%\t\u0001W\u0001\u0010\u0003J\u0013vjV*`'Bc\u0015\n\u0016+F%\"1Q\r\fQ\u0001\ne\u000b\u0001#\u0011*S\u001f^\u001bvl\u0015)M\u0013R#VI\u0015\u0011\t\u000f\u001dd#\u0019!C\u00011\u0006q1i\u0014'P\u001d~\u001b\u0006\u000bT%U)\u0016\u0013\u0006BB5-A\u0003%\u0011,A\bD\u001f2{ejX*Q\u0019&#F+\u0012*!\u0011\u001dYGF1A\u0005\u0002a\u000bab\u0015)B\u0007\u0016{6\u000b\u0015'J)R+%\u000b\u0003\u0004nY\u0001\u0006I!W\u0001\u0010'B\u000b5)R0T!2KE\u000bV#SA!9q\u000e\fb\u0001\n\u0003\u0001\u0018A\u0003(P?N#&+\u0013(H'V\t\u0011\u000fE\u0002sofl\u0011a\u001d\u0006\u0003iV\f\u0011\"[7nkR\f'\r\\3\u000b\u0005Y\u0014\u0012AC2pY2,7\r^5p]&\u0011\u0001p\u001d\u0002\u0004'\u0016$\bC\u0001>~\u001d\t\t20\u0003\u0002}%\u00051\u0001K]3eK\u001aL!!\u000b@\u000b\u0005q\u0014\u0002bBA\u0001Y\u0001\u0006I!]\u0001\f\u001d>{6\u000b\u0016*J\u001d\u001e\u001b\u0006\u0005C\u0005\u0002\u00061\u0012\r\u0011\"\u0001\u0002\b\u0005\u0011\u0012i\u0018*F\u0019~#\u0016iR0T\u000b2+5\tV(S+\u0005I\bbBA\u0006Y\u0001\u0006I!_\u0001\u0014\u0003~\u0013V\tT0U\u0003\u001e{6+\u0012'F\u0007R{%\u000b\t\u0005\n\u0003\u001fa#\u0019!C\u0001\u0003#\tQ\u0002V(Q?:{E)R0U\u0003\u001e\u001bVCAA\n!\u0011\t)\"a\b\u000e\u0005\u0005]!\u0002BA\r\u00037\taa]3mK\u000e$(bAA\u000fw\u0005)!n]8va&!\u0011\u0011EA\f\u00055!\u0016mZ:Fm\u0006dW/\u0019;pe\"A\u0011Q\u0005\u0017!\u0002\u0013\t\u0019\"\u0001\bU\u001fB{fj\u0014#F?R\u000bui\u0015\u0011\t\u000f\u0005%B\u0006\"\u0001\u0002,\u0005Aq-\u001a;USRdW\rF\u0002z\u0003[A\u0001\"a\f\u0002(\u0001\u0007\u0011\u0011G\u0001\bCJ$\u0018n\u00197f!\u0011\t\u0019$!\u000e\u000e\u0003\u0011I1!a\u000e\u0005\u0005\u001d\t%\u000f^5dY\u0016Dq!a\u000f-\t\u0003\ti$A\u0007e_RKG\u000f\\3Ta2LGo\u001d\u000b\u0006s\u0006}\u00121\t\u0005\b\u0003\u0003\nI\u00041\u0001z\u0003\u0015!\u0018\u000e\u001e7f\u0011\u001d\t)%!\u000fA\u0002e\u000b\u0001b\u001d9mSR$XM\u001d\u0005\b\u0003\u0013bC\u0011BA&\u000399W\r^'fi\u0006\u001cuN\u001c;f]R$R!_A'\u0003;B\u0001\"a\u0014\u0002H\u0001\u0007\u0011\u0011K\u0001\u0004I>\u001c\u0007\u0003BA*\u00033j!!!\u0016\u000b\t\u0005]\u00131D\u0001\u0006]>$Wm]\u0005\u0005\u00037\n)F\u0001\u0005E_\u000e,X.\u001a8u\u0011\u001d\ty&a\u0012A\u0002e\f\u0001\"\\3uC:\u000bW.\u001a\u0005\b\u0003GbC\u0011AA3\u0003I9W\r^'fi\u0006$Um]2sSB$\u0018n\u001c8\u0015\u0007e\f9\u0007\u0003\u0005\u00020\u0005\u0005\u0004\u0019AA\u0019\u0011\u001d\tY\u0007\fC\u0001\u0003[\nqbZ3u\u001b\u0016$\u0018mS3zo>\u0014Hm\u001d\u000b\u0004s\u0006=\u0004\u0002CA\u0018\u0003S\u0002\r!!\r\t\u000f\u0005MD\u0006\"\u0001\u0002v\u0005\u0001r-\u001a;DC:|g.[2bY2Kgn\u001b\u000b\u0004s\u0006]\u0004\u0002CA\u0018\u0003c\u0002\r!!\r\t\u000f\u0005mD\u0006\"\u0001\u0002~\u0005Iq-\u001a;E_6\f\u0017N\u001c\u000b\u0004s\u0006}\u0004bBAA\u0003s\u0002\r!_\u0001\u0004kJd\u0007bBACY\u0011\u0005\u0011qQ\u0001\fKb$(/Y2u)\u0006<7\u000f\u0006\u0003\u0002\n\u00065\u0005\u0003\u0002>\u0002\ffL!\u0001\u001f@\t\u0011\u0005=\u00121\u0011a\u0001\u0003cAq!!%-\t\u0003\t\u0019*\u0001\u0012dC2\u001cW\u000f\\1uK\n+7\u000f\u001e(pI\u0016\u0014\u0015m]3e\u001f:\u001cE.^:uKJLgn\u001a\u000b\u0005\u0003+\u000b\t\u000bE\u0003\u0012\u0003/\u000bY*C\u0002\u0002\u001aJ\u0011aa\u00149uS>t\u0007\u0003BA*\u0003;KA!a(\u0002V\t9Q\t\\3nK:$\b\u0002CA\u0018\u0003\u001f\u0003\r!!\r\t\u000f\u0005\u0015F\u0006\"\u0001\u0002(\u0006i\u0001O]5oiR\u0013\u0018mY3M_\u001e$2!MAU\u0011!\tY+a)A\u0002\u0005m\u0015a\u0002;pa:{G-\u001a\u0005\b\u0003_cC\u0011BAY\u0003-I7oT6U_\n{wn\u001d;\u0015\t\u0005M\u0016\u0011\u0018\t\u0004#\u0005U\u0016bAA\\%\t9!i\\8mK\u0006t\u0007\u0002CA^\u0003[\u0003\r!a'\u0002\t9|G-\u001a\u0005\b\u0003\u007fcC\u0011AAa\u000319W\r^*i_J$H+\u001a=u)\u0015I\u00181YAd\u0011\u001d\t)-!0A\u0002e\f\u0011!\u001a\u0005\t\u0003\u0013\fi\f1\u0001\u0002L\u0006\u0019Q.\u0019=\u0011\u0007E\ti-C\u0002\u0002PJ\u00111!\u00138u\u0011\u001d\t\u0019\u000e\fC\u0005\u0003+\f\u0011#[:IS\u001eDG*\u001b8l\t\u0016t7/\u001b;z)\u0011\t\u0019,a6\t\u0011\u0005\u0015\u0017\u0011\u001ba\u0001\u00037Cq!a7-\t\u0013\ti.\u0001\u0005hKR\u001c6m\u001c:f)\u0011\tY-a8\t\u0011\u0005m\u0016\u0011\u001ca\u0001\u00037Cq!a9-\t\u0013\t)/A\fhKR<%/\u0019<jif\u001c6m\u001c:f\rJ|WNT8eKR!\u0011q]Au!\u0015\t\u0012qSAf\u0011!\tY,!9A\u0002\u0005m\u0005bBAwY\u0011%\u0011q^\u0001\fkB$\u0017\r^3TG>\u0014X\rF\u00032\u0003c\f\u0019\u0010\u0003\u0005\u0002<\u0006-\b\u0019AAN\u0011!\t)0a;A\u0002\u0005-\u0017AC1eIR{7kY8sK\"9\u0011\u0011 \u0017\u0005\n\u0005m\u0018aD;qI\u0006$XMT8eK\u000e{WO\u001c;\u0015\u000bE\ni0a@\t\u0011\u0005m\u0016q\u001fa\u0001\u00037C\u0001B!\u0001\u0002x\u0002\u0007\u00111Z\u0001\u000bC\u0012$Gk\\\"pk:$\bb\u0002B\u0003Y\u0011\u0005!qA\u0001\u000eKb$(/Y2u-&$Wm\\:\u0015\t\t%!\u0011\u0005\t\u0007\u0005\u0017\u0011Y\"a'\u000f\t\t5!q\u0003\b\u0005\u0005\u001f\u0011)\"\u0004\u0002\u0003\u0012)\u0019!1\u0003\u0006\u0002\rq\u0012xn\u001c;?\u0013\u0005\u0019\u0012b\u0001B\r%\u00059\u0001/Y2lC\u001e,\u0017\u0002\u0002B\u000f\u0005?\u0011A\u0001T5ti*\u0019!\u0011\u0004\n\t\u0011\u0005m&1\u0001a\u0001\u00037CqA!\n-\t\u0003\u00119#\u0001\u0010jgR\u000b'\r\\3UC\u001e\fe\u000e\u001a(p!\u0006\u0014\u0018m\u001a:ba\"\u001cX\t_5tiR!\u00111\u0017B\u0015\u0011!\t)Ma\tA\u0002\u0005m\u0005b\u0002B\u0017Y\u0011\u0005!qF\u0001\u0016a>\u001cH/\u0012=ue\u0006\u001cG/[8o\u00072,\u0017M\\;q)\u0011\tYJ!\r\t\u0011\tM\"1\u0006a\u0001\u00037\u000b!\u0002^1sO\u0016$hj\u001c3f\u0011\u001d\u00119\u0004\fC\u0001\u0005s\t\u0001$[:O_\u0012,7kY8sKRC'/Z:iQ>dG-T3u)\u0019\t\u0019La\u000f\u0003>!A\u00111\u0018B\u001b\u0001\u0004\tY\n\u0003\u0005\u0002F\nU\u0002\u0019AAN\u0011\u001d\u0011\t\u0005\fC\u0001\u0005\u0007\n\u0011cZ3u'&\u0014G.\u001b8h\u0007>tG/\u001a8u)\u0019\u0011)Ea\u0012\u0003LA!\u0011#a&z\u0011!\u0011IEa\u0010A\u0002\u0005m\u0015AD2veJ,g\u000e^*jE2Lgn\u001a\u0005\t\u0005\u001b\u0012y\u00041\u0001\u0002L\u0006\t#-Y:fY&tWmU2pe\u00164uN]*jE2Lgn\u001a)be\u0006<'/\u00199ig\"9!\u0011\u000b\u0017\u0005\u0002\tM\u0013\u0001D<bY.\u001c\u0016N\u00197j]\u001e\u001cX\u0003\u0002B+\u0005G\"BAa\u0016\u0003��Q!!\u0011\fB;!\u0019\u0011YAa\u0017\u0003`%!!Q\fB\u0010\u0005\r\u0019V-\u001d\t\u0005\u0005C\u0012\u0019\u0007\u0004\u0001\u0005\u0011\t\u0015$q\nb\u0001\u0005O\u0012\u0011\u0001V\t\u0005\u0005S\u0012y\u0007E\u0002\u0012\u0005WJ1A!\u001c\u0013\u0005\u001dqu\u000e\u001e5j]\u001e\u00042!\u0005B9\u0013\r\u0011\u0019H\u0005\u0002\u0004\u0003:L\b\u0002\u0003B<\u0005\u001f\u0002\rA!\u001f\u0002\t]|'o\u001b\t\b#\tm\u00141\u0014B0\u0013\r\u0011iH\u0005\u0002\n\rVt7\r^5p]FB\u0001\"a/\u0003P\u0001\u0007\u00111\u0014\u0005\b\u0005\u0007cC\u0011\u0002BC\u0003-\tG\rZ*jE2LgnZ:\u0015\t\u0005m%q\u0011\u0005\t\u0003W\u0013\t\t1\u0001\u0002\u001c\"9!1\u0012\u0017\u0005\n\t5\u0015aG4fi\n\u000b7/\u001a7j]\u0016\u001c6m\u001c:f\r>\u00148+\u001b2mS:<7\u000f\u0006\u0003\u0002L\n=\u0005\u0002CAV\u0005\u0013\u0003\r!a'\t\u000f\tME\u0006\"\u0003\u0003\u0016\u0006IA-\u001a2vO:{G-\u001a\u000b\u0004s\n]\u0005\u0002CAc\u0005#\u0003\r!a'")
/* loaded from: input_file:com/gravity/goose/extractors/ContentExtractor.class */
public interface ContentExtractor {

    /* compiled from: ContentExtractor.scala */
    /* renamed from: com.gravity.goose.extractors.ContentExtractor$class, reason: invalid class name */
    /* loaded from: input_file:com/gravity/goose/extractors/ContentExtractor$class.class */
    public abstract class Cclass {
        public static Logger getLogger(ContentExtractor contentExtractor) {
            return ContentExtractor$.MODULE$.logger();
        }

        public static String getTitle(ContentExtractor contentExtractor, Article article) {
            string$.MODULE$.empty();
            try {
                Elements elementsByTag = article.doc().getElementsByTag("title");
                if (elementsByTag == null || elementsByTag.isEmpty()) {
                    return string$.MODULE$.empty();
                }
                String text = elementsByTag.first().text();
                if (string$.MODULE$.isNullOrEmpty(text)) {
                    return string$.MODULE$.empty();
                }
                boolean z = false;
                if (text.contains("|")) {
                    text = contentExtractor.doTitleSplits(text, contentExtractor.PIPE_SPLITTER());
                    z = true;
                }
                if (!z && text.contains("-")) {
                    text = contentExtractor.doTitleSplits(text, contentExtractor.DASH_SPLITTER());
                    z = true;
                }
                if (!z && text.contains("»")) {
                    text = contentExtractor.doTitleSplits(text, contentExtractor.ARROWS_SPLITTER());
                    z = true;
                }
                if (!z && text.contains(":")) {
                    text = contentExtractor.doTitleSplits(text, contentExtractor.COLON_SPLITTER());
                }
                String replaceAll = contentExtractor.MOTLEY_REPLACEMENT().replaceAll(text);
                ContentExtractor$.MODULE$.trace(new StringBuilder().append(ContentExtractor$.MODULE$.logPrefix()).append("Page title is: ").append(replaceAll).toString(), Predef$.MODULE$.genericWrapArray(new Object[0]));
                return replaceAll;
            } catch (NullPointerException e) {
                ContentExtractor$.MODULE$.warn(e.toString(), Predef$.MODULE$.genericWrapArray(new Object[0]));
                return string$.MODULE$.empty();
            }
        }

        public static String doTitleSplits(ContentExtractor contentExtractor, String str, StringSplitter stringSplitter) {
            int i = 0;
            int i2 = 0;
            String[] split = stringSplitter.split(str);
            int i3 = 0;
            while (true) {
                int i4 = i3;
                if (i4 >= split.length) {
                    return contentExtractor.TITLE_REPLACEMENTS().replaceAll(split[i2]).trim();
                }
                String str2 = split[i4];
                if (str2.length() > i) {
                    i = str2.length();
                    i2 = i4;
                }
                i3 = i4 + 1;
            }
        }

        private static String getMetaContent(ContentExtractor contentExtractor, Document document, String str) {
            Elements select = document.select(str);
            String str2 = null;
            if (select.size() > 0) {
                str2 = select.first().attr("content");
            }
            return string$.MODULE$.isNullOrEmpty(str2) ? string$.MODULE$.empty() : str2.trim();
        }

        public static String getMetaDescription(ContentExtractor contentExtractor, Article article) {
            return getMetaContent(contentExtractor, article.doc(), "meta[name=description]");
        }

        public static String getMetaKeywords(ContentExtractor contentExtractor, Article article) {
            return getMetaContent(contentExtractor, article.doc(), "meta[name=keywords]");
        }

        public static String getCanonicalLink(ContentExtractor contentExtractor, Article article) {
            Elements select = article.doc().select("link[rel=canonical]");
            if (select.size() <= 0) {
                return article.finalUrl();
            }
            String trim = ((String) Option$.MODULE$.apply(select.first().attr("href")).getOrElse(new ContentExtractor$$anonfun$1(contentExtractor))).trim();
            return new StringOps(Predef$.MODULE$.augmentString(trim)).nonEmpty() ? trim : article.finalUrl();
        }

        public static String getDomain(ContentExtractor contentExtractor, String str) {
            return new URL(str).getHost();
        }

        public static Set extractTags(ContentExtractor contentExtractor, Article article) {
            Document doc = article.doc();
            if (doc.children().size() == 0) {
                return contentExtractor.NO_STRINGS();
            }
            Elements select = Selector.select(contentExtractor.A_REL_TAG_SELECTOR(), doc);
            if (select.size() == 0) {
                return contentExtractor.NO_STRINGS();
            }
            HashSet apply = HashSet$.MODULE$.apply(Nil$.MODULE$);
            JavaConversions$.MODULE$.asScalaBuffer(select).foreach(new ContentExtractor$$anonfun$extractTags$1(contentExtractor, apply));
            return apply.toSet();
        }

        public static Option calculateBestNodeBasedOnClustering(ContentExtractor contentExtractor, Article article) {
            ContentExtractor$.MODULE$.trace(new StringBuilder().append(ContentExtractor$.MODULE$.logPrefix()).append("Starting to calculate TopNode").toString(), Predef$.MODULE$.genericWrapArray(new Object[0]));
            Document doc = article.doc();
            ObjectRef create = ObjectRef.create((Object) null);
            Elements collect = Collector.collect(contentExtractor.TOP_NODE_TAGS(), doc);
            DoubleRef create2 = DoubleRef.create(1.0d);
            IntRef create3 = IntRef.create(0);
            IntRef create4 = IntRef.create(0);
            HashSet apply = HashSet$.MODULE$.apply(Nil$.MODULE$);
            Buffer apply2 = Buffer$.MODULE$.apply(Nil$.MODULE$);
            JavaConversions$.MODULE$.asScalaBuffer(collect).foreach(new ContentExtractor$$anonfun$calculateBestNodeBasedOnClustering$1(contentExtractor, apply2));
            int size = apply2.size();
            ContentExtractor$.MODULE$.trace(new StringBuilder().append(ContentExtractor$.MODULE$.logPrefix()).append("About to inspect num of nodes with text: ").append(BoxesRunTime.boxToInteger(size)).toString(), Predef$.MODULE$.genericWrapArray(new Object[0]));
            apply2.foreach(new ContentExtractor$$anonfun$calculateBestNodeBasedOnClustering$2(contentExtractor, create2, create3, create4, apply, size, 0, size * 0.25d));
            apply.foreach(new ContentExtractor$$anonfun$calculateBestNodeBasedOnClustering$3(contentExtractor, create, IntRef.create(0)));
            contentExtractor.printTraceLog((Element) create.elem);
            return ((Element) create.elem) == null ? None$.MODULE$ : new Some((Element) create.elem);
        }

        public static void printTraceLog(ContentExtractor contentExtractor, Element element) {
            if (element != null) {
                try {
                    ContentExtractor$.MODULE$.trace(new StringBuilder().append(ContentExtractor$.MODULE$.logPrefix()).append("Our TOPNODE: score='").append(element.attr("gravityScore")).append("' nodeCount='").append(element.attr("gravityNodes")).append("' id='").append(element.id()).append("' class='").append(element.attr("class")).append("' ").toString(), Predef$.MODULE$.genericWrapArray(new Object[0]));
                    ContentExtractor$.MODULE$.trace(new StringBuilder().append(ContentExtractor$.MODULE$.logPrefix()).append("Text - ").append(element.text().trim().length() > 100 ? new StringBuilder().append(element.text().trim().substring(0, 100)).append("...").toString() : element.text().trim()).toString(), Predef$.MODULE$.genericWrapArray(new Object[0]));
                } catch (NullPointerException e) {
                    ContentExtractor$.MODULE$.warn(new StringBuilder().append("printTraceLog: ").append(e.toString()).toString(), Predef$.MODULE$.genericWrapArray(new Object[0]));
                }
            }
        }

        public static boolean com$gravity$goose$extractors$ContentExtractor$$isOkToBoost(ContentExtractor contentExtractor, Element element) {
            Object obj = new Object();
            try {
                contentExtractor.walkSiblings(element, new ContentExtractor$$anonfun$com$gravity$goose$extractors$ContentExtractor$$isOkToBoost$1(contentExtractor, "p", IntRef.create(0), 5, 3, obj));
                return false;
            } catch (NonLocalReturnControl e) {
                if (e.key() == obj) {
                    return e.value$mcZ$sp();
                }
                throw e;
            }
        }

        public static String getShortText(ContentExtractor contentExtractor, String str, int i) {
            return str.length() > i ? new StringBuilder().append(str.substring(0, i)).append("...").toString() : str;
        }

        public static boolean com$gravity$goose$extractors$ContentExtractor$$isHighLinkDensity(ContentExtractor contentExtractor, Element element) {
            Elements elementsByTag = element.getElementsByTag("a");
            if (elementsByTag.size() == 0) {
                return false;
            }
            float length = contentExtractor.SPACE_SPLITTER().split(element.text().trim()).length;
            JavaConversions$.MODULE$.asScalaBuffer(elementsByTag).foreach(new ContentExtractor$$anonfun$com$gravity$goose$extractors$ContentExtractor$$isHighLinkDensity$1(contentExtractor, new StringBuilder()));
            float length2 = (contentExtractor.SPACE_SPLITTER().split(r0.toString()).length / length) * elementsByTag.size();
            ContentExtractor$.MODULE$.trace(new StringBuilder().append(ContentExtractor$.MODULE$.logPrefix()).append("Calulated link density score as: ").append(BoxesRunTime.boxToFloat(length2)).append(" for node: ").append(contentExtractor.getShortText(element.text(), 50)).toString(), Predef$.MODULE$.genericWrapArray(new Object[0]));
            return length2 > ((float) 1);
        }

        public static int com$gravity$goose$extractors$ContentExtractor$$getScore(ContentExtractor contentExtractor, Element element) {
            int i;
            Some gravityScoreFromNode = getGravityScoreFromNode(contentExtractor, element);
            if (gravityScoreFromNode instanceof Some) {
                i = BoxesRunTime.unboxToInt(gravityScoreFromNode.x());
            } else {
                if (!None$.MODULE$.equals(gravityScoreFromNode)) {
                    throw new MatchError(gravityScoreFromNode);
                }
                i = 0;
            }
            return i;
        }

        private static Option getGravityScoreFromNode(ContentExtractor contentExtractor, Element element) {
            try {
                String attr = element.attr("gravityScore");
                return string$.MODULE$.isNullOrEmpty(attr) ? None$.MODULE$ : new Some(BoxesRunTime.boxToInteger(Integer.parseInt(attr)));
            } catch (Exception e) {
                return None$.MODULE$;
            }
        }

        public static void com$gravity$goose$extractors$ContentExtractor$$updateScore(ContentExtractor contentExtractor, Element element, int i) {
            int i2;
            try {
                String attr = element.attr("gravityScore");
                i2 = string$.MODULE$.isNullOrEmpty(attr) ? 0 : Integer.parseInt(attr);
            } catch (NumberFormatException e) {
                i2 = 0;
            }
            element.attr("gravityScore", Integer.toString(i2 + i));
        }

        public static void com$gravity$goose$extractors$ContentExtractor$$updateNodeCount(ContentExtractor contentExtractor, Element element, int i) {
            int i2;
            try {
                String attr = element.attr("gravityNodes");
                i2 = string$.MODULE$.isNullOrEmpty(attr) ? 0 : Integer.parseInt(attr);
            } catch (NumberFormatException e) {
                i2 = 0;
            }
            element.attr("gravityNodes", Integer.toString(i2 + i));
        }

        public static List extractVideos(ContentExtractor contentExtractor, Element element) {
            ArrayList arrayList = new ArrayList();
            Buffer apply = Buffer$.MODULE$.apply(Nil$.MODULE$);
            try {
                JavaConversions$.MODULE$.asScalaBuffer(element.parent().getElementsByTag("embed")).foreach(new ContentExtractor$$anonfun$extractVideos$1(contentExtractor, arrayList));
                JavaConversions$.MODULE$.asScalaBuffer(element.parent().getElementsByTag("object")).foreach(new ContentExtractor$$anonfun$extractVideos$2(contentExtractor, arrayList));
                ContentExtractor$.MODULE$.trace(new StringBuilder().append(ContentExtractor$.MODULE$.logPrefix()).append("extractVideos: Starting to extract videos. Found: ").append(BoxesRunTime.boxToInteger(arrayList.size())).toString(), Predef$.MODULE$.genericWrapArray(new Object[0]));
                JavaConversions$.MODULE$.asScalaBuffer(arrayList).foreach(new ContentExtractor$$anonfun$extractVideos$3(contentExtractor, apply, "youtube", "vimeo"));
            } catch (NullPointerException e) {
                ContentExtractor$.MODULE$.warn(e.toString(), Predef$.MODULE$.genericWrapArray(new Object[]{e}));
            } catch (Exception e2) {
                ContentExtractor$.MODULE$.warn(e2.toString(), Predef$.MODULE$.genericWrapArray(new Object[]{e2}));
            }
            ContentExtractor$.MODULE$.trace(new StringBuilder().append(ContentExtractor$.MODULE$.logPrefix()).append("extractVideos:  done looking videos").toString(), Predef$.MODULE$.genericWrapArray(new Object[0]));
            return apply.toList();
        }

        public static boolean isTableTagAndNoParagraphsExist(ContentExtractor contentExtractor, Element element) {
            JavaConversions$.MODULE$.asScalaBuffer(element.getElementsByTag("p")).foreach(new ContentExtractor$$anonfun$isTableTagAndNoParagraphsExist$1(contentExtractor));
            if (element.getElementsByTag("p").size() == 0) {
                String tagName = element.tagName();
                if (tagName != null ? !tagName.equals("td") : "td" != 0) {
                    ContentExtractor$.MODULE$.trace("Removing node because it doesn't have any paragraphs", Predef$.MODULE$.genericWrapArray(new Object[0]));
                    return true;
                }
            }
            return false;
        }

        public static Element postExtractionCleanup(ContentExtractor contentExtractor, Element element) {
            ContentExtractor$.MODULE$.trace(new StringBuilder().append(ContentExtractor$.MODULE$.logPrefix()).append("Starting cleanup Node").toString(), Predef$.MODULE$.genericWrapArray(new Object[0]));
            Element addSiblings = addSiblings(contentExtractor, element);
            JavaConversions$.MODULE$.asScalaBuffer(addSiblings.children()).withFilter(new ContentExtractor$$anonfun$postExtractionCleanup$1(contentExtractor)).foreach(new ContentExtractor$$anonfun$postExtractionCleanup$2(contentExtractor, addSiblings));
            return addSiblings;
        }

        public static boolean isNodeScoreThreshholdMet(ContentExtractor contentExtractor, Element element, Element element2) {
            int com$gravity$goose$extractors$ContentExtractor$$getScore = com$gravity$goose$extractors$ContentExtractor$$getScore(contentExtractor, element);
            int com$gravity$goose$extractors$ContentExtractor$$getScore2 = com$gravity$goose$extractors$ContentExtractor$$getScore(contentExtractor, element2);
            float f = (float) (com$gravity$goose$extractors$ContentExtractor$$getScore * 0.08d);
            ContentExtractor$.MODULE$.trace(new StringBuilder().append(ContentExtractor$.MODULE$.logPrefix()).append("topNodeScore: ").append(BoxesRunTime.boxToInteger(com$gravity$goose$extractors$ContentExtractor$$getScore)).append(" currentNodeScore: ").append(BoxesRunTime.boxToInteger(com$gravity$goose$extractors$ContentExtractor$$getScore2)).append(" threshold: ").append(BoxesRunTime.boxToFloat(f)).toString(), Predef$.MODULE$.genericWrapArray(new Object[0]));
            if (com$gravity$goose$extractors$ContentExtractor$$getScore2 < f) {
                String tagName = element2.tagName();
                if (tagName != null ? !tagName.equals("td") : "td" != 0) {
                    ContentExtractor$.MODULE$.trace(new StringBuilder().append(ContentExtractor$.MODULE$.logPrefix()).append("Removing node due to low threshold score").toString(), Predef$.MODULE$.genericWrapArray(new Object[0]));
                    return false;
                }
            }
            ContentExtractor$.MODULE$.trace(new StringBuilder().append(ContentExtractor$.MODULE$.logPrefix()).append("Not removing TD node").toString(), Predef$.MODULE$.genericWrapArray(new Object[0]));
            return true;
        }

        public static Option getSiblingContent(ContentExtractor contentExtractor, Element element, int i) {
            String tagName = element.tagName();
            if (tagName != null ? tagName.equals("p") : "p" == 0) {
                if (element.text().length() > 0) {
                    return new Some(element.outerHtml());
                }
            }
            Elements elementsByTag = element.getElementsByTag("p");
            return elementsByTag.first() == null ? None$.MODULE$ : new Some(((TraversableOnce) ((TraversableLike) JavaConversions$.MODULE$.asScalaBuffer(elementsByTag).withFilter(new ContentExtractor$$anonfun$getSiblingContent$1(contentExtractor)).map(new ContentExtractor$$anonfun$getSiblingContent$2(contentExtractor), Buffer$.MODULE$.canBuildFrom())).withFilter(new ContentExtractor$$anonfun$getSiblingContent$3(contentExtractor, i)).map(new ContentExtractor$$anonfun$getSiblingContent$4(contentExtractor), Buffer$.MODULE$.canBuildFrom())).mkString());
        }

        public static Seq walkSiblings(ContentExtractor contentExtractor, Element element, Function1 function1) {
            Element previousElementSibling = element.previousElementSibling();
            Buffer apply = Buffer$.MODULE$.apply(Nil$.MODULE$);
            while (previousElementSibling != null) {
                ContentExtractor$.MODULE$.trace(new StringBuilder().append(ContentExtractor$.MODULE$.logPrefix()).append("SIBLINGCHECK: ").append(debugNode(contentExtractor, previousElementSibling)).toString(), Predef$.MODULE$.genericWrapArray(new Object[0]));
                apply.$plus$eq(function1.apply(previousElementSibling));
                previousElementSibling = previousElementSibling == null ? null : previousElementSibling.previousElementSibling();
            }
            return apply;
        }

        private static Element addSiblings(ContentExtractor contentExtractor, Element element) {
            ContentExtractor$.MODULE$.trace(new StringBuilder().append(ContentExtractor$.MODULE$.logPrefix()).append("Starting to add siblings").toString(), Predef$.MODULE$.genericWrapArray(new Object[0]));
            element.child(0).before(((Seq) ((TraversableLike) contentExtractor.walkSiblings(element, new ContentExtractor$$anonfun$2(contentExtractor, getBaselineScoreForSiblings(contentExtractor, element))).reverse()).flatMap(new ContentExtractor$$anonfun$3(contentExtractor), Seq$.MODULE$.canBuildFrom())).mkString());
            return element;
        }

        private static int getBaselineScoreForSiblings(ContentExtractor contentExtractor, Element element) {
            int i = 100000;
            IntRef create = IntRef.create(0);
            IntRef create2 = IntRef.create(0);
            JavaConversions$.MODULE$.asScalaBuffer(element.getElementsByTag("p")).foreach(new ContentExtractor$$anonfun$getBaselineScoreForSiblings$1(contentExtractor, create, create2));
            if (create.elem > 0) {
                i = create2.elem / create.elem;
                if (ContentExtractor$.MODULE$.logger().isDebugEnabled()) {
                    ContentExtractor$.MODULE$.logger().debug(new StringBuilder().append("The base score for siblings to beat is: ").append(BoxesRunTime.boxToInteger(i)).append(" NumOfParas: ").append(BoxesRunTime.boxToInteger(create.elem)).append(" scoreOfAll: ").append(BoxesRunTime.boxToInteger(create2.elem)).toString());
                }
            }
            return i;
        }

        private static String debugNode(ContentExtractor contentExtractor, Element element) {
            StringBuilder stringBuilder = new StringBuilder();
            stringBuilder.append("GravityScore: '");
            stringBuilder.append(element.attr("gravityScore"));
            stringBuilder.append("' paraNodeCount: '");
            stringBuilder.append(element.attr("gravityNodes"));
            stringBuilder.append("' nodeId: '");
            stringBuilder.append(element.id());
            stringBuilder.append("' className: '");
            stringBuilder.append(element.attr("class"));
            return stringBuilder.toString();
        }

        public static void $init$(ContentExtractor contentExtractor) {
            contentExtractor.com$gravity$goose$extractors$ContentExtractor$_setter_$MOTLEY_REPLACEMENT_$eq(StringReplacement$.MODULE$.compile("&#65533;", string$.MODULE$.empty()));
            contentExtractor.com$gravity$goose$extractors$ContentExtractor$_setter_$ESCAPED_FRAGMENT_REPLACEMENT_$eq(StringReplacement$.MODULE$.compile("#!", "?_escaped_fragment_="));
            contentExtractor.com$gravity$goose$extractors$ContentExtractor$_setter_$TITLE_REPLACEMENTS_$eq(ReplaceSequence$.MODULE$.create("&raquo;").append("»"));
            contentExtractor.com$gravity$goose$extractors$ContentExtractor$_setter_$PIPE_SPLITTER_$eq(new StringSplitter("\\|"));
            contentExtractor.com$gravity$goose$extractors$ContentExtractor$_setter_$DASH_SPLITTER_$eq(new StringSplitter(" - "));
            contentExtractor.com$gravity$goose$extractors$ContentExtractor$_setter_$ARROWS_SPLITTER_$eq(new StringSplitter("»"));
            contentExtractor.com$gravity$goose$extractors$ContentExtractor$_setter_$COLON_SPLITTER_$eq(new StringSplitter(":"));
            contentExtractor.com$gravity$goose$extractors$ContentExtractor$_setter_$SPACE_SPLITTER_$eq(new StringSplitter(" "));
            contentExtractor.com$gravity$goose$extractors$ContentExtractor$_setter_$NO_STRINGS_$eq(Predef$.MODULE$.Set().empty());
            contentExtractor.com$gravity$goose$extractors$ContentExtractor$_setter_$A_REL_TAG_SELECTOR_$eq("a[rel=tag], a[href*=/tag/]");
            contentExtractor.com$gravity$goose$extractors$ContentExtractor$_setter_$TOP_NODE_TAGS_$eq(new TagsEvaluator(Predef$.MODULE$.Set().apply(Predef$.MODULE$.wrapRefArray(new String[]{"p", "td", "pre"}))));
        }
    }

    void com$gravity$goose$extractors$ContentExtractor$_setter_$MOTLEY_REPLACEMENT_$eq(StringReplacement stringReplacement);

    void com$gravity$goose$extractors$ContentExtractor$_setter_$ESCAPED_FRAGMENT_REPLACEMENT_$eq(StringReplacement stringReplacement);

    void com$gravity$goose$extractors$ContentExtractor$_setter_$TITLE_REPLACEMENTS_$eq(ReplaceSequence replaceSequence);

    void com$gravity$goose$extractors$ContentExtractor$_setter_$PIPE_SPLITTER_$eq(StringSplitter stringSplitter);

    void com$gravity$goose$extractors$ContentExtractor$_setter_$DASH_SPLITTER_$eq(StringSplitter stringSplitter);

    void com$gravity$goose$extractors$ContentExtractor$_setter_$ARROWS_SPLITTER_$eq(StringSplitter stringSplitter);

    void com$gravity$goose$extractors$ContentExtractor$_setter_$COLON_SPLITTER_$eq(StringSplitter stringSplitter);

    void com$gravity$goose$extractors$ContentExtractor$_setter_$SPACE_SPLITTER_$eq(StringSplitter stringSplitter);

    void com$gravity$goose$extractors$ContentExtractor$_setter_$NO_STRINGS_$eq(Set set);

    void com$gravity$goose$extractors$ContentExtractor$_setter_$A_REL_TAG_SELECTOR_$eq(String str);

    void com$gravity$goose$extractors$ContentExtractor$_setter_$TOP_NODE_TAGS_$eq(TagsEvaluator tagsEvaluator);

    Logger getLogger();

    StringReplacement MOTLEY_REPLACEMENT();

    StringReplacement ESCAPED_FRAGMENT_REPLACEMENT();

    ReplaceSequence TITLE_REPLACEMENTS();

    StringSplitter PIPE_SPLITTER();

    StringSplitter DASH_SPLITTER();

    StringSplitter ARROWS_SPLITTER();

    StringSplitter COLON_SPLITTER();

    StringSplitter SPACE_SPLITTER();

    Set<String> NO_STRINGS();

    String A_REL_TAG_SELECTOR();

    TagsEvaluator TOP_NODE_TAGS();

    String getTitle(Article article);

    String doTitleSplits(String str, StringSplitter stringSplitter);

    String getMetaDescription(Article article);

    String getMetaKeywords(Article article);

    String getCanonicalLink(Article article);

    String getDomain(String str);

    Set<String> extractTags(Article article);

    Option<Element> calculateBestNodeBasedOnClustering(Article article);

    void printTraceLog(Element element);

    String getShortText(String str, int i);

    List<Element> extractVideos(Element element);

    boolean isTableTagAndNoParagraphsExist(Element element);

    Element postExtractionCleanup(Element element);

    boolean isNodeScoreThreshholdMet(Element element, Element element2);

    Option<String> getSiblingContent(Element element, int i);

    <T> Seq<T> walkSiblings(Element element, Function1<Element, T> function1);
}
