package org.apache.ctakes.core.nlp.tokenizer;

import java.util.ArrayList;
import java.util.List;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.ContractionToken;
import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
import org.apache.ctakes.typesystem.type.syntax.NumToken;
import org.apache.ctakes.typesystem.type.syntax.PunctuationToken;
import org.apache.ctakes.typesystem.type.syntax.SymbolToken;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.uima.jcas.JCas;

/* loaded from: input_file:org/apache/ctakes/core/nlp/tokenizer/TokenizerPTB.class */
public class TokenizerPTB {
    private String possibleFinalPunctuation = "?!:";
    private String validOtherEmailAddressCharacters = "!#$%&'*+/=?^_`{|}~-";
    static final String[] emptyStringList = new String[0];
    static final ArrayList<BaseToken> emptyTokenList = new ArrayList<>();
    private static char DASH = '-';
    private static String ellipsis = "...";
    static String[] nameStartingWithApostrophe = {"'assad", "'awarta", "'ashira"};
    private static String[] urlStarters = {"http://", "https://", "ftp://", "mailto:"};
    static String[] testsForNumbers = {"2,000,123.For", "92,000,123.", "2,000,123.", "2,000,123.0", "2,000,13", "2", "2.", "2,", "22", "12345678901@4", "2.2.2."};
    static String[] testsForEmailAddress = {"masanz@mayo.edu", "masanz@mayo", "m@l", "m.@p", "m.n.@p", "3@4", "%@f", "R@@", "MASANZ@MAYO", "jk$jk@.m", "asdf@.m$", "masanz.james-mi@ibm.com.us", ".mn@p", ".@p", "@t"};

    public List<?> tokenizeTextSegment(JCas jCas, String str, int i, boolean z) {
        char charAt;
        int lenIfIsNumberContainingComma;
        int i2;
        String lowerCase = str.toLowerCase();
        ArrayList arrayList = new ArrayList();
        Class<? extends BaseToken> cls = null;
        if (str == null || str.length() == 0) {
            return emptyTokenList;
        }
        int findFirstCharOfNextToken = findFirstCharOfNextToken(str, 0);
        if (findFirstCharOfNextToken < 0) {
            return emptyTokenList;
        }
        while (true) {
            int findFirstCharOfNextToken2 = findFirstCharOfNextToken(str, findFirstCharOfNextToken);
            int i3 = findFirstCharOfNextToken2;
            if (findFirstCharOfNextToken2 < 0) {
                return arrayList;
            }
            char charAt2 = str.charAt(i3);
            int i4 = -999;
            if (i3 + 1 >= str.length()) {
                i4 = 1;
                cls = null;
            } else if (Character.isWhitespace(str.charAt(i3 + 1))) {
                i4 = 1;
                cls = null;
            } else if (charAt2 == '\n') {
                i4 = 1;
                cls = NewlineToken.class;
            } else if (charAt2 == '\r') {
                if (str.charAt(i3 + 1) != '\n') {
                    i4 = 1;
                    cls = NewlineToken.class;
                } else {
                    i4 = 2;
                    cls = NewlineToken.class;
                }
            } else if (charAt2 == '.') {
                int lengthIfIsNumberThatStartsWithPeriod = getLengthIfIsNumberThatStartsWithPeriod(i3, str);
                if (lengthIfIsNumberThatStartsWithPeriod > 0) {
                    cls = NumToken.class;
                    i4 = lengthIfIsNumberThatStartsWithPeriod;
                } else if (isEllipsis(i3, str)) {
                    i4 = 3;
                    cls = PunctuationToken.class;
                } else {
                    i4 = 1;
                    cls = PunctuationToken.class;
                }
            } else if (charAt2 == '-') {
                i4 = 1;
                cls = PunctuationToken.class;
            } else if (charAt2 == '\'') {
                int lengthIfNameStartingWithApostrophe = getLengthIfNameStartingWithApostrophe(i3, str);
                if (lengthIfNameStartingWithApostrophe > 0) {
                    i4 = lengthIfNameStartingWithApostrophe;
                    cls = WordToken.class;
                } else if (ContractionsPTB.isContractionThatStartsWithApostrophe(i3, lowerCase)) {
                    i4 = 2;
                    cls = ContractionToken.class;
                } else {
                    i4 = 1;
                    cls = PunctuationToken.class;
                }
            } else if (TokenizerHelper.isPunctuation(charAt2)) {
                i4 = 1;
                cls = PunctuationToken.class;
            } else if (Character.isLetterOrDigit(charAt2)) {
                boolean z2 = true;
                boolean z3 = true;
                int i5 = -1;
                int i6 = -1;
                int i7 = -1;
                int i8 = -1;
                int i9 = -1;
                int i10 = -1;
                int i11 = i3;
                do {
                    charAt = str.charAt(i11);
                    if (Character.isWhitespace(charAt)) {
                        if (i6 < 0) {
                            i6 = i11;
                        }
                        if (i7 < 0) {
                            i7 = i11;
                        }
                        if (i10 < 0) {
                            i10 = i11;
                        }
                        if (i8 < 0) {
                            i8 = i11;
                        }
                        if (i9 < 0) {
                            i9 = i11;
                        }
                        i5 = i11;
                    } else if (!Character.isLetterOrDigit(charAt)) {
                        z2 = false;
                        z3 = false;
                        if (i6 < 0) {
                            i6 = i11;
                        }
                        if (i7 < 0 && charAt != '\'') {
                            i7 = i11;
                        }
                        if (i10 < 0) {
                            i10 = i11;
                        }
                        if (i8 < 0 && !isTelephoneNumberChar(charAt)) {
                            i8 = i11;
                        }
                        if (i9 < 0 && !isNumericChar(charAt)) {
                            i9 = i11;
                        }
                    } else if (!Character.isDigit(charAt)) {
                        z3 = false;
                        if (i10 < 0) {
                            i10 = i11;
                        }
                        if (i8 < 0 && !isTelephoneNumberChar(charAt)) {
                            i8 = i11;
                        }
                        if (i9 < 0 && !isNumericChar(charAt)) {
                            i9 = i11;
                        }
                    }
                    i11++;
                    if (i11 >= str.length()) {
                        break;
                    }
                } while (!Character.isWhitespace(charAt));
                if (i11 >= str.length()) {
                    if (i5 < 0) {
                        i5 = str.length();
                    }
                    if (i6 < 0) {
                        i6 = str.length();
                    }
                    if (i7 < 0) {
                        i7 = str.length();
                    }
                    if (i8 < 0) {
                        i8 = str.length();
                    }
                    if (i9 < 0) {
                        i9 = str.length();
                    }
                }
                if (z3) {
                    i4 = i5 - i3;
                    cls = NumToken.class;
                } else if (z2) {
                    String lowerCase2 = str.substring(i3, i5).toLowerCase();
                    int lenOfFirstTokenInContraction = ContractionsPTB.lenOfFirstTokenInContraction(lowerCase2);
                    if (lenOfFirstTokenInContraction > 0) {
                        arrayList.add(createToken(WordToken.class, str, jCas, i3, i3 + lenOfFirstTokenInContraction, i));
                        i3 += lenOfFirstTokenInContraction;
                        i4 = ContractionsPTB.lenOfSecondTokenInContraction(lowerCase2);
                        cls = ContractionToken.class;
                        int lenOfThirdTokenInContraction = ContractionsPTB.lenOfThirdTokenInContraction(lowerCase2);
                        if (lenOfThirdTokenInContraction > 0) {
                            arrayList.add(createToken(cls, str, jCas, i3, i3 + i4, i));
                            i3 += i4;
                            i4 = lenOfThirdTokenInContraction;
                            cls = ContractionToken.class;
                        }
                    } else {
                        i4 = i5 - i3;
                        cls = WordToken.class;
                    }
                } else {
                    if (i6 < lowerCase.length() && lowerCase.charAt(i6) == '\'' && (i2 = ContractionsPTB.tokenLengthCheckingForSingleQuoteWordsToKeepTogether(lowerCase.substring(i3, i5))) > i6 - i3) {
                        i4 = i2;
                        cls = wordTokenOrNumToken(lowerCase, i3, i4);
                    }
                    if (i4 == -999) {
                        ContractionResult lengthIfNextApostIsMiddleOfContraction = ContractionsPTB.getLengthIfNextApostIsMiddleOfContraction(i3, i6, lowerCase);
                        if (lengthIfNextApostIsMiddleOfContraction != null) {
                            int wordTokenLen = lengthIfNextApostIsMiddleOfContraction.getWordTokenLen();
                            char charAt3 = lowerCase.charAt(i3 + wordTokenLen);
                            if (charAt3 != 'n' && charAt3 != '\'') {
                                throw new RuntimeException("ERROR: getLengthIfNextApostIsMiddleOfContraction returned " + wordTokenLen + " but the character (" + charAt3 + ") after that is not 'n' or apostrophe ");
                            }
                            if (wordTokenLen < 0) {
                                throw new RuntimeException("c = " + charAt3 + "tokenLen = " + wordTokenLen + " currentPosition = " + i3);
                            }
                            arrayList.add(createToken(WordToken.class, str, jCas, i3, i3 + wordTokenLen, i));
                            i3 += wordTokenLen;
                            i4 = lengthIfNextApostIsMiddleOfContraction.getContractionTokenLen();
                            cls = ContractionToken.class;
                        } else {
                            int lenIfIsTelephoneNumber = lenIfIsTelephoneNumber(i3, lowerCase, i8);
                            if (lenIfIsTelephoneNumber > 0) {
                                i4 = lenIfIsTelephoneNumber;
                                cls = WordToken.class;
                            } else {
                                int lenIfIsPostalCode = lenIfIsPostalCode(i3, lowerCase, i8);
                                if (lenIfIsPostalCode > 0) {
                                    i4 = lenIfIsPostalCode;
                                    cls = WordToken.class;
                                } else {
                                    int lenIfIsUrl = lenIfIsUrl(i3, lowerCase, i5);
                                    if (lenIfIsUrl > 0) {
                                        i4 = lenIfIsUrl;
                                        cls = WordToken.class;
                                    } else {
                                        int lenIfIsEmailAddress = lenIfIsEmailAddress(i3, lowerCase, i5);
                                        if (lenIfIsEmailAddress > 0) {
                                            i4 = lenIfIsEmailAddress;
                                            cls = WordToken.class;
                                        } else {
                                            int lenIfIsAbbreviation = lenIfIsAbbreviation(i3, str, i5);
                                            if (lenIfIsAbbreviation > 0) {
                                                i4 = lenIfIsAbbreviation;
                                                cls = WordToken.class;
                                            } else if (i6 < lowerCase.length() && lowerCase.charAt(i6) == '-') {
                                                i4 = HyphenatedPTB.tokenLengthCheckingForHyphenatedTerms(lowerCase.substring(i3, i5));
                                                if (i4 < 0) {
                                                    throw new RuntimeException("tokenLen = " + i4 + " currentPosition = " + i3 + " nextNonLetterOrNonDigit = " + i6);
                                                }
                                                cls = wordTokenOrNumToken(lowerCase, i3, i4);
                                            } else if (i9 > 0 && (lenIfIsNumberContainingComma = lenIfIsNumberContainingComma(i3, lowerCase, i9)) > 0) {
                                                i4 = lenIfIsNumberContainingComma;
                                                cls = NumToken.class;
                                            } else if (i7 >= lowerCase.length() || lowerCase.charAt(i7) != '.') {
                                                i4 = i6 - i3;
                                                cls = wordTokenOrNumToken(lowerCase, i3, i4);
                                            } else if (i10 == lowerCase.length() - 1) {
                                                i4 = i10 - i3;
                                                cls = NumToken.class;
                                            } else if (i7 == i10) {
                                                i4 = ((i10 + 1) + getLenToNextNonDigit(lowerCase, i10 + 1)) - i3;
                                                cls = NumToken.class;
                                            } else {
                                                i4 = i6 - i3;
                                                cls = wordTokenOrNumToken(lowerCase, i3, i4);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            } else {
                i4 = 1;
                cls = SymbolToken.class;
            }
            if (i4 < 0) {
                throw new RuntimeException("tokenLen = " + i4 + " currentPosition = " + i3);
            }
            arrayList.add(createToken(cls, str, jCas, i3, i3 + i4, i));
            findFirstCharOfNextToken = i3 + i4;
        }
    }

    public List<?> tokenize(String str) {
        return tokenizeTextSegment(null, str, 0, true);
    }

    private int lenIfIsNumberContainingComma(int i, String str, int i2) {
        String substring = str.substring(0, i2);
        int indexOf = substring.indexOf(44, i);
        if (indexOf < 0 || indexOf > i2) {
            return -1;
        }
        int i3 = -1;
        int indexOf2 = substring.indexOf(46, i);
        int i4 = indexOf2;
        if (i4 < 0) {
            i4 = substring.length();
        }
        if (indexOf > i4 || indexOf == 0) {
            return -1;
        }
        int i5 = indexOf;
        boolean z = false;
        while (!z) {
            i3 = i5 - i;
            if (i5 < i4 && substring.charAt(i5) == ',') {
                i5++;
            }
            for (int i6 = 0; i6 < 3; i6++) {
                if (i5 >= i4 || !Character.isDigit(substring.charAt(i5))) {
                    z = true;
                } else {
                    i5++;
                }
            }
            if (i5 < i4 && Character.isDigit(substring.charAt(i5))) {
                z = true;
            }
        }
        if (i3 <= 0) {
            return -1;
        }
        if (indexOf2 != str.length() - 1 && indexOf2 == i + i3) {
            do {
                i3++;
                if (i3 >= i2 - i) {
                    break;
                }
            } while (Character.isDigit(substring.charAt(i + i3)));
        }
        return i3;
    }

    private int lenIfIsPostalCode(int i, String str, int i2) {
        if (i2 < 0) {
            return i2;
        }
        int i3 = i2 - i;
        String substring = str.substring(i, i2);
        if (i3 == 10 && Character.isDigit(substring.charAt(0)) && Character.isDigit(substring.charAt(1)) && Character.isDigit(substring.charAt(2)) && Character.isDigit(substring.charAt(3)) && Character.isDigit(substring.charAt(4)) && substring.charAt(5) == DASH && Character.isDigit(substring.charAt(6)) && Character.isDigit(substring.charAt(7)) && Character.isDigit(substring.charAt(8)) && Character.isDigit(substring.charAt(9))) {
            return i3;
        }
        return -1;
    }

    private int lenIfIsTelephoneNumber(int i, String str, int i2) {
        if (i2 < 0) {
            return i2;
        }
        int i3 = i2 - i;
        String substring = str.substring(i, i2);
        if (i3 == 6) {
            if (Character.isDigit(substring.charAt(0)) && substring.charAt(1) == DASH && Character.isDigit(substring.charAt(2)) && Character.isDigit(substring.charAt(3)) && Character.isDigit(substring.charAt(4)) && Character.isDigit(substring.charAt(5))) {
                return i3;
            }
            return -1;
        }
        if (i3 == 8) {
            if (Character.isDigit(substring.charAt(0)) && Character.isDigit(substring.charAt(1)) && Character.isDigit(substring.charAt(2)) && substring.charAt(3) == DASH && Character.isDigit(substring.charAt(4)) && Character.isDigit(substring.charAt(5)) && Character.isDigit(substring.charAt(6)) && Character.isDigit(substring.charAt(7))) {
                return i3;
            }
            return -1;
        }
        if (i3 == 12) {
            if (Character.isDigit(substring.charAt(0)) && Character.isDigit(substring.charAt(1)) && Character.isDigit(substring.charAt(2)) && substring.charAt(3) == DASH && Character.isDigit(substring.charAt(4)) && Character.isDigit(substring.charAt(5)) && Character.isDigit(substring.charAt(6)) && substring.charAt(7) == DASH && Character.isDigit(substring.charAt(8)) && Character.isDigit(substring.charAt(9)) && Character.isDigit(substring.charAt(10)) && Character.isDigit(substring.charAt(11))) {
                return i3;
            }
            return checkFormat2(substring);
        }
        if (i3 == 14 && Character.isDigit(substring.charAt(0)) && substring.charAt(1) == DASH && Character.isDigit(substring.charAt(2)) && Character.isDigit(substring.charAt(3)) && Character.isDigit(substring.charAt(4)) && substring.charAt(5) == DASH && Character.isDigit(substring.charAt(6)) && Character.isDigit(substring.charAt(7)) && Character.isDigit(substring.charAt(8)) && substring.charAt(9) == DASH && Character.isDigit(substring.charAt(10)) && Character.isDigit(substring.charAt(11)) && Character.isDigit(substring.charAt(12)) && Character.isDigit(substring.charAt(13))) {
            return i3;
        }
        return -1;
    }

    private int checkFormat2(String str) {
        return (Character.isDigit(str.charAt(0)) && Character.isDigit(str.charAt(1)) && str.charAt(2) == DASH && Character.isDigit(str.charAt(3)) && Character.isDigit(str.charAt(4)) && Character.isDigit(str.charAt(5)) && Character.isDigit(str.charAt(6)) && str.charAt(7) == DASH && Character.isDigit(str.charAt(8)) && Character.isDigit(str.charAt(9)) && Character.isDigit(str.charAt(10)) && !Character.isDigit(str.charAt(11))) ? -1 : -1;
    }

    private boolean isTelephoneNumberChar(char c) {
        return Character.isDigit(c) || c == '-';
    }

    private boolean isNumericChar(char c) {
        return Character.isDigit(c) || c == ',' || c == '.';
    }

    private int getLenToNextNonDigit(String str, int i) {
        for (int i2 = 0; i + i2 < str.length(); i2++) {
            if (!Character.isDigit(str.charAt(i + i2))) {
                return i2;
            }
        }
        return str.length() - i;
    }

    private Class<? extends BaseToken> wordTokenOrNumToken(String str, int i, int i2) {
        return containsLetter(str, i, i2) ? WordToken.class : NumToken.class;
    }

    private boolean containsLetter(String str, int i, int i2) {
        for (int i3 = i; i3 < i + i2; i3++) {
            if (Character.isLetter(str.charAt(i3))) {
                return true;
            }
        }
        return false;
    }

    private boolean isEllipsis(int i, String str) {
        return str.substring(i).startsWith(ellipsis);
    }

    private int getLengthIfNameStartingWithApostrophe(int i, String str) {
        String lowerCase = str.substring(i).toLowerCase();
        if (lowerCase.length() == 1 || !Character.isLetter(str.charAt(i + 1))) {
            return -1;
        }
        for (String str2 : nameStartingWithApostrophe) {
            if (str2.length() == lowerCase.length()) {
                if (lowerCase.startsWith(str2)) {
                    return str2.length();
                }
            } else if (str2.length() <= lowerCase.length() && lowerCase.startsWith(str2)) {
                return str2.length();
            }
        }
        return -1;
    }

    private int getLengthIfIsNumberThatStartsWithPeriod(int i, String str) {
        int length = str.length() - i;
        if (length < 2) {
            return -1;
        }
        int i2 = i + 1;
        if (!Character.isDigit(str.charAt(i2))) {
            return -1;
        }
        do {
            i2++;
            if (i2 >= i + length) {
                return length;
            }
        } while (Character.isDigit(str.charAt(i2)));
        return i2 - i;
    }

    private int lenIfIsAbbreviation(int i, String str, int i2) {
        boolean z = false;
        if (i2 - i >= 4 && str.substring(i, i + 4).toLowerCase().equals("www.")) {
            return -1;
        }
        for (int i3 = i; i3 < i2; i3++) {
            char charAt = str.charAt(i3);
            char charAt2 = i3 + 1 < i2 ? str.charAt(i3 + 1) : ' ';
            if (!Character.isLetter(charAt)) {
                if (charAt != '.' || !z || i3 + 1 == str.length()) {
                    return -1;
                }
                int i4 = (i3 + 1) - i;
                int lenIfIsAbbreviation = lenIfIsAbbreviation(i3 + 1, str, i2);
                if (lenIfIsAbbreviation > 0) {
                    return i4 + lenIfIsAbbreviation;
                }
                if (Character.isWhitespace(charAt2) || isPossibleFinalPunctuation(charAt2)) {
                    return i4;
                }
                if (Character.isLetterOrDigit(charAt2)) {
                    return -1;
                }
                return i4 - 1;
            }
            z = true;
        }
        return -1;
    }

    private boolean isPossibleFinalPunctuation(char c) {
        return this.possibleFinalPunctuation.indexOf(c) > -1;
    }

    private int lenIfIsEmailAddress(int i, String str, int i2) {
        int indexOf = str.substring(i, i2).indexOf(64);
        if (indexOf < 1 || i + indexOf + 1 == i2 || indexOf > 64) {
            return -1;
        }
        for (int i3 = i; i3 < i + indexOf; i3++) {
            char charAt = str.charAt(i3);
            CharSequence subSequence = str.subSequence(i3, i3 + 1);
            if (!Character.isLetterOrDigit(charAt) && !this.validOtherEmailAddressCharacters.contains(subSequence)) {
                return -1;
            }
            if (charAt == '.' && (i3 == i || i3 == (i + indexOf) - 1)) {
                return -1;
            }
        }
        for (int i4 = i + indexOf + 1; i4 < i2; i4++) {
            char charAt2 = str.charAt(i4);
            if (!Character.isLetterOrDigit(charAt2)) {
                if (charAt2 != '-' && charAt2 != '.') {
                    if (Character.isLetterOrDigit('@')) {
                        return (i4 - i) - 1;
                    }
                    return -1;
                }
                if (i4 + 1 >= i2 || !Character.isLetterOrDigit(str.charAt(i4 + 1))) {
                    if (Character.isLetterOrDigit('@')) {
                        return (i4 - i) - 1;
                    }
                    return -1;
                }
            }
        }
        int i5 = i2 - i;
        if (i5 > 320) {
            return -1;
        }
        return i5;
    }

    private int lenIfIsUrl(int i, String str, int i2) {
        String substring = str.substring(i, i2);
        for (String str2 : urlStarters) {
            if (substring.startsWith(str2) && substring.length() > str2.length()) {
                return i2 - i;
            }
        }
        return -1;
    }

    private Class<? extends BaseToken> determineTokenType(String str, int i, int i2) {
        if (str == null || str.length() < i2 || i + 1 != i2) {
            new Exception("ERROR: s not at least one char:  s= " + str + " begin, end = " + i + "," + i2).printStackTrace();
            return null;
        }
        char charAt = str.charAt(i);
        return (charAt == '\n' || charAt == '\r') ? NewlineToken.class : Character.isDigit(charAt) ? NumToken.class : Character.isLetter(charAt) ? WordToken.class : isContraction(charAt) ? ContractionToken.class : TokenizerHelper.isPunctuation(charAt) ? PunctuationToken.class : SymbolToken.class;
    }

    private boolean isContraction(char c) {
        return false;
    }

    private boolean verify(int i, int i2, int i3) {
        Exception exc = new Exception(i + " " + i2 + " " + i3);
        if (i < 0) {
            System.err.println("ERROR: begin = " + i);
            exc.printStackTrace();
            return false;
        }
        if (i2 < 0) {
            System.err.println("ERROR: end = " + i2);
            exc.printStackTrace();
            return false;
        }
        if (i2 < i) {
            System.err.println("ERROR: end < begin " + i2 + " < " + i);
            exc.printStackTrace();
            return false;
        }
        if (i3 >= 0) {
            return true;
        }
        System.err.println("ERROR: offsetAdjustment = " + i3);
        exc.printStackTrace();
        return false;
    }

    private Object createToken(Class<? extends BaseToken> cls, String str, JCas jCas, int i, int i2, int i3) {
        Object token;
        int i4 = i + i3;
        int i5 = i2 + i3;
        if (!verify(i4, i5, i3)) {
            System.err.println("ERROR: so creating a BaseToken with begin = 0 end = 0 just to avoid exception");
            return jCas != null ? new BaseToken(jCas, 0, 0) : new Token(0, 0);
        }
        if (cls == null) {
            Class<? extends BaseToken> determineTokenType = determineTokenType(str, i, i2);
            if (determineTokenType == null) {
                throw new RuntimeException(" still is null");
            }
            if (jCas != null) {
                token = createToken(determineTokenType, str, jCas, i, i2, i3);
            } else {
                token = new Token(i, i2);
                ((Token) token).setText(str.substring(i, i2));
            }
        } else if (cls.equals(NewlineToken.class)) {
            if (jCas != null) {
                token = new NewlineToken(jCas, i4, i5);
            } else {
                token = new Token(i4, i5);
                ((Token) token).setText(str.substring(i, i2));
            }
        } else if (cls.equals(NumToken.class)) {
            if (jCas != null) {
                token = new NumToken(jCas, i4, i5);
                setNumType((NumToken) token, str.substring(i, i2));
            } else {
                token = new Token(i4, i5);
                ((Token) token).setText(str.substring(i, i2));
            }
        } else if (cls.equals(WordToken.class)) {
            if (jCas != null) {
                token = new WordToken(jCas, i4, i5);
                String substring = str.substring(i, i2);
                setCapitalization((WordToken) token, substring);
                setNumPosition((WordToken) token, substring);
            } else {
                token = new Token(i4, i5);
                ((Token) token).setText(str.substring(i, i2));
            }
        } else if (cls.equals(SymbolToken.class)) {
            if (jCas != null) {
                token = new SymbolToken(jCas, i4, i5);
            } else {
                token = new Token(i4, i5);
                ((Token) token).setText(str.substring(i, i2));
            }
        } else if (cls.equals(PunctuationToken.class)) {
            if (jCas != null) {
                token = new PunctuationToken(jCas, i4, i5);
            } else {
                token = new Token(i4, i5);
                ((Token) token).setText(str.substring(i, i2));
            }
        } else if (cls.equals(ContractionToken.class)) {
            if (jCas != null) {
                token = new ContractionToken(jCas, i4, i5);
            } else {
                token = new Token(i4, i5);
                ((Token) token).setText(str.substring(i, i2));
            }
        } else if (cls.equals(BaseToken.class)) {
            if (jCas != null) {
                token = new BaseToken(jCas, i4, i5);
            } else {
                token = new Token(i4, i5);
                ((Token) token).setText(str.substring(i, i2));
            }
        } else if (jCas != null) {
            System.err.println("clas=" + cls + " and need to add more code here to support that class");
            token = null;
        } else {
            token = new Token(i4, i5);
            ((Token) token).setText(str.substring(i, i2));
        }
        return token;
    }

    private void setNumType(NumToken numToken, String str) {
        if (!Tokenizer.isNumber(str) || str.contains(".")) {
            numToken.setNumType(2);
        } else {
            numToken.setNumType(1);
        }
    }

    private void setNumPosition(WordToken wordToken, String str) {
        if (Character.isDigit(str.charAt(0))) {
            wordToken.setNumPosition(1);
            return;
        }
        if (Character.isDigit(str.charAt(str.length() - 1))) {
            wordToken.setNumPosition(3);
            return;
        }
        boolean z = false;
        for (int i = 0; i < str.length(); i++) {
            if (Character.isDigit(str.charAt(i))) {
                z = true;
            }
        }
        if (z) {
            wordToken.setNumPosition(2);
        } else {
            wordToken.setNumPosition(0);
        }
    }

    private void setCapitalization(WordToken wordToken, String str) {
        int i = 0;
        boolean z = false;
        for (int i2 = 0; i2 < str.length(); i2++) {
            if (Character.isUpperCase(str.charAt(i2))) {
                i++;
            } else {
                z = true;
            }
        }
        if (i == 0) {
            wordToken.setCapitalization(0);
            return;
        }
        if (!z) {
            wordToken.setCapitalization(3);
        } else if (i == 1 && Character.isUpperCase(str.charAt(0))) {
            wordToken.setCapitalization(1);
        } else {
            wordToken.setCapitalization(2);
        }
    }

    public int findFirstCharOfNextToken(String str, int i) {
        int i2 = i;
        while (i2 < str.length()) {
            if (i2 < 0) {
                System.out.println("position = " + i2);
            }
            char charAt = str.charAt(i2);
            if (Character.isWhitespace(charAt) && !isEndOfLine(charAt)) {
                i2++;
            }
            return i2;
        }
        return -1;
    }

    private boolean isEndOfLine(char c) {
        return c == '\n' || c == '\r';
    }

    public static void main(String[] strArr) {
        runEmailTests();
        runNumberTests();
    }

    static void runNumberTests() {
        TokenizerPTB tokenizerPTB = new TokenizerPTB();
        for (String str : testsForNumbers) {
            int lenIfIsNumberContainingComma = tokenizerPTB.lenIfIsNumberContainingComma(0, str, Math.min(str.length(), 11));
            System.out.println("========== Test NumberWithComma ========== ");
            System.out.println(str);
            System.out.println(lenIfIsNumberContainingComma);
        }
    }

    static void runEmailTests() {
        TokenizerPTB tokenizerPTB = new TokenizerPTB();
        for (String str : testsForEmailAddress) {
            int lenIfIsEmailAddress = tokenizerPTB.lenIfIsEmailAddress(0, str, str.length());
            int lenIfIsEmailAddress2 = tokenizerPTB.lenIfIsEmailAddress("XYZ".length(), "XYZ" + str, str.length() + "XYZ".length());
            System.out.println("========== Test ========== ");
            System.out.println("      0123456789ABCDEF");
            System.out.println("  s = " + str + "\t  and prepend+s = XYZ" + str);
            System.out.println("  lenIfIsEmailAddress = " + lenIfIsEmailAddress + "\t   and if prepend, len = " + lenIfIsEmailAddress2);
        }
    }
}
