package ws.palladian.extraction.pos;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import ws.palladian.core.Annotation;
import ws.palladian.core.ImmutableAnnotation;
import ws.palladian.core.Instance;
import ws.palladian.core.Tagger;
import ws.palladian.core.TextTokenizer;
import ws.palladian.core.Token;
import ws.palladian.extraction.entity.TaggingFormat;
import ws.palladian.extraction.entity.tagger.NerHelper;
import ws.palladian.extraction.token.WordTokenizer;
import ws.palladian.helper.collection.CollectionHelper;

/* loaded from: input_file:ws/palladian/extraction/pos/AbstractPosTagger.class */
public abstract class AbstractPosTagger implements Tagger {
    private static final TextTokenizer DEFAULT_TOKENIZER = new WordTokenizer();

    @Override // ws.palladian.core.Tagger
    public List<Annotation> getAnnotations(String str) {
        ArrayList<Token> newArrayList = CollectionHelper.newArrayList(getTokenizer().iterateTokens(str));
        List<String> tags = getTags(CollectionHelper.convertList(newArrayList, Token.VALUE_CONVERTER));
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = tags.iterator();
        for (Token token : newArrayList) {
            arrayList.add(new ImmutableAnnotation(token.getStartPosition(), token.getValue(), it.next().toUpperCase()));
        }
        return arrayList;
    }

    public String getTaggedString(String str) {
        return NerHelper.tag(str, getAnnotations(str), TaggingFormat.SLASHES);
    }

    protected TextTokenizer getTokenizer() {
        return DEFAULT_TOKENIZER;
    }

    protected abstract List<String> getTags(List<String> list);

    /* JADX INFO: Access modifiers changed from: protected */
    public static String normalizeTag(String str) {
        return str.replaceAll("-.*", Instance.NO_CATEGORY_DUMMY);
    }

    public abstract String getName();
}
