package org.apache.lucene.wikipedia.analysis;

import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.Set;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;

/* loaded from: input_file:org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.class */
public class WikipediaTokenizer extends Tokenizer {
    public static final String INTERNAL_LINK = "il";
    public static final String EXTERNAL_LINK = "el";
    public static final String EXTERNAL_LINK_URL = "elu";
    public static final String CITATION = "ci";
    public static final String CATEGORY = "c";
    public static final String BOLD = "b";
    public static final String ITALICS = "i";
    public static final String BOLD_ITALICS = "bi";
    public static final String HEADING = "h";
    public static final String SUB_HEADING = "sh";
    public static final int ALPHANUM_ID = 0;
    public static final int APOSTROPHE_ID = 1;
    public static final int ACRONYM_ID = 2;
    public static final int COMPANY_ID = 3;
    public static final int EMAIL_ID = 4;
    public static final int HOST_ID = 5;
    public static final int NUM_ID = 6;
    public static final int CJ_ID = 7;
    public static final int INTERNAL_LINK_ID = 8;
    public static final int EXTERNAL_LINK_ID = 9;
    public static final int CITATION_ID = 10;
    public static final int CATEGORY_ID = 11;
    public static final int BOLD_ID = 12;
    public static final int ITALICS_ID = 13;
    public static final int BOLD_ITALICS_ID = 14;
    public static final int HEADING_ID = 15;
    public static final int SUB_HEADING_ID = 16;
    public static final int EXTERNAL_LINK_URL_ID = 17;
    public static final String[] TOKEN_TYPES;
    public static final String[] tokenImage;
    public static final int TOKENS_ONLY = 0;
    public static final int UNTOKENIZED_ONLY = 1;
    public static final int BOTH = 2;
    public static final int UNTOKENIZED_TOKEN_FLAG = 1;
    private final WikipediaTokenizerImpl scanner;
    private int tokenOutput;
    private Set untokenizedTypes;
    private Iterator tokens;
    static final boolean $assertionsDisabled;
    static Class class$org$apache$lucene$wikipedia$analysis$WikipediaTokenizer;

    void setInput(Reader reader) {
        this.input = reader;
    }

    public WikipediaTokenizer(Reader reader) {
        this(reader, 0, Collections.EMPTY_SET);
    }

    public WikipediaTokenizer(Reader reader, int i, Set set) {
        super(reader);
        this.tokenOutput = 0;
        this.untokenizedTypes = Collections.EMPTY_SET;
        this.tokens = null;
        this.tokenOutput = i;
        this.scanner = new WikipediaTokenizerImpl(reader);
        this.untokenizedTypes = set;
    }

    public Token next(Token token) throws IOException {
        if (!$assertionsDisabled && token == null) {
            throw new AssertionError();
        }
        if (this.tokens != null && this.tokens.hasNext()) {
            return (Token) this.tokens.next();
        }
        int nextToken = this.scanner.getNextToken();
        if (nextToken == -1) {
            return null;
        }
        String str = WikipediaTokenizerImpl.TOKEN_TYPES[nextToken];
        if (this.tokenOutput == 0 || !this.untokenizedTypes.contains(str)) {
            setupToken(token);
        } else if (this.tokenOutput == 1 && this.untokenizedTypes.contains(str)) {
            collapseTokens(token, nextToken);
        } else if (this.tokenOutput == 2) {
            collapseAndSaveTokens(token, nextToken, str);
        }
        token.setPositionIncrement(this.scanner.getPositionIncrement());
        token.setType(str);
        return token;
    }

    private void collapseAndSaveTokens(Token token, int i, String str) throws IOException {
        int nextToken;
        StringBuffer stringBuffer = new StringBuffer(32);
        int text = this.scanner.setText(stringBuffer);
        int yychar = this.scanner.yychar();
        int i2 = yychar + text;
        int i3 = 0;
        ArrayList arrayList = new ArrayList();
        Token token2 = new Token();
        setupSavedToken(token2, 0, str);
        arrayList.add(token2);
        while (true) {
            nextToken = this.scanner.getNextToken();
            if (nextToken == -1 || nextToken != i || this.scanner.getNumWikiTokensSeen() <= i3) {
                break;
            }
            int yychar2 = this.scanner.yychar();
            for (int i4 = 0; i4 < yychar2 - i2; i4++) {
                stringBuffer.append(' ');
            }
            int text2 = this.scanner.setText(stringBuffer);
            Token token3 = new Token();
            setupSavedToken(token3, this.scanner.getPositionIncrement(), str);
            arrayList.add(token3);
            i3++;
            i2 = yychar2 + text2;
        }
        String trim = stringBuffer.toString().trim();
        token.setTermBuffer(trim.toCharArray(), 0, trim.length());
        token.setStartOffset(yychar);
        token.setEndOffset(yychar + trim.length());
        token.setFlags(1);
        if (nextToken != -1) {
            this.scanner.yypushback(this.scanner.yylength());
        }
        this.tokens = arrayList.iterator();
    }

    private void setupSavedToken(Token token, int i, String str) {
        setupToken(token);
        token.setPositionIncrement(i);
        token.setType(str);
    }

    private void collapseTokens(Token token, int i) throws IOException {
        int nextToken;
        StringBuffer stringBuffer = new StringBuffer(32);
        int text = this.scanner.setText(stringBuffer);
        int yychar = this.scanner.yychar();
        int i2 = yychar + text;
        int i3 = 0;
        while (true) {
            nextToken = this.scanner.getNextToken();
            if (nextToken == -1 || nextToken != i || this.scanner.getNumWikiTokensSeen() <= i3) {
                break;
            }
            int yychar2 = this.scanner.yychar();
            for (int i4 = 0; i4 < yychar2 - i2; i4++) {
                stringBuffer.append(' ');
            }
            i3++;
            i2 = yychar2 + this.scanner.setText(stringBuffer);
        }
        String trim = stringBuffer.toString().trim();
        token.setTermBuffer(trim.toCharArray(), 0, trim.length());
        token.setStartOffset(yychar);
        token.setEndOffset(yychar + trim.length());
        token.setFlags(1);
        if (nextToken != -1) {
            this.scanner.yypushback(this.scanner.yylength());
        } else {
            this.tokens = null;
        }
    }

    private void setupToken(Token token) {
        this.scanner.getText(token);
        int yychar = this.scanner.yychar();
        token.setStartOffset(yychar);
        token.setEndOffset(yychar + token.termLength());
    }

    public void reset() throws IOException {
        super.reset();
        this.scanner.yyreset(this.input);
    }

    public void reset(Reader reader) throws IOException {
        this.input = reader;
        reset();
    }

    static Class class$(String str) {
        try {
            return Class.forName(str);
        } catch (ClassNotFoundException e) {
            throw new NoClassDefFoundError().initCause(e);
        }
    }

    static {
        Class cls;
        if (class$org$apache$lucene$wikipedia$analysis$WikipediaTokenizer == null) {
            cls = class$("org.apache.lucene.wikipedia.analysis.WikipediaTokenizer");
            class$org$apache$lucene$wikipedia$analysis$WikipediaTokenizer = cls;
        } else {
            cls = class$org$apache$lucene$wikipedia$analysis$WikipediaTokenizer;
        }
        $assertionsDisabled = !cls.desiredAssertionStatus();
        TOKEN_TYPES = new String[]{"<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", INTERNAL_LINK, EXTERNAL_LINK, CITATION, CATEGORY, BOLD, ITALICS, BOLD_ITALICS, HEADING, SUB_HEADING, EXTERNAL_LINK_URL};
        tokenImage = TOKEN_TYPES;
    }
}
