package org.apache.ctakes.core.nlp.tokenizer;

import java.util.HashMap;
import java.util.HashSet;

/* loaded from: input_file:org/apache/ctakes/core/nlp/tokenizer/HyphenatedPTB.class */
public class HyphenatedPTB {
    static String[] MultiTokenWords = {"cannot", "gonna", "gotta", "lemme", "wanna", "whaddya", "whatcha"};
    static int[] MultiTokenWordLenToken1 = {3, 3, 3, 3, 3, 3, 3};
    static int[] MultiTokenWordLenToken2 = {3, 2, 2, 2, 2, 2, 1};
    static HashMap<String, Integer> MultiTokenWordsLookup = new HashMap<>();
    static String[] possibleContractionEndings;
    static String lettersAfterApostropheForMiddleOfContraction;
    static String[] contractionsStartingWithApostrophe;
    static String[] hyphenatedPrefixes;
    static HashSet<String> hyphenatedPrefixesLookup;
    static String[] hyphenatedSuffixes;
    static HashSet<String> hyphenatedSuffixesLookup;
    static String[] hyphenatedWords;
    static HashSet<String> hyphenatedWordsLookup;
    static char MINUS_OR_HYPHEN;

    static int lenOfFirstTokenInContraction(String str) {
        Integer num = MultiTokenWordsLookup.get(str);
        if (num == null) {
            return -1;
        }
        return MultiTokenWordLenToken1[num.intValue()];
    }

    static boolean isContractionThatStartsWithApostrophe(int i, String str) {
        String substring = str.substring(i);
        for (String str2 : contractionsStartingWithApostrophe) {
            if (TokenizerHelper.startsWithWithoutBeingFollowedByLetter(substring, str2)) {
                return true;
            }
        }
        return false;
    }

    public static void main(String[] strArr) {
        for (int i = 0; i < MultiTokenWords.length; i++) {
            String str = MultiTokenWords[i];
            System.out.println(str.substring(0, MultiTokenWordLenToken1[i]) + " " + str.substring(MultiTokenWordLenToken1[i], MultiTokenWordLenToken1[i] + MultiTokenWordLenToken2[i]) + " " + str.substring(MultiTokenWordLenToken1[i] + MultiTokenWordLenToken2[i]));
        }
    }

    public static int tokenLengthCheckingForHyphenatedTerms(String str) {
        if (str == null) {
            throw new UnsupportedOperationException("no hyphen found in (null)");
        }
        int indexOf = str.indexOf(MINUS_OR_HYPHEN);
        if (indexOf < 0) {
            throw new UnsupportedOperationException("no hyphen found in '" + str + "'");
        }
        if (indexOf == 0) {
            return -1;
        }
        if (indexOf + 1 == str.length()) {
            return indexOf;
        }
        int findNextNonAlphaNum = TokenizerHelper.findNextNonAlphaNum(str, indexOf + 1);
        int i = -1;
        if (findNextNonAlphaNum != str.length()) {
            i = TokenizerHelper.findNextNonAlphaNum(str, findNextNonAlphaNum + 1);
        }
        if (findNextNonAlphaNum == str.length()) {
            return lenIncludingHyphensToKeep(str, indexOf, 1, findNextNonAlphaNum, i);
        }
        if (str.charAt(findNextNonAlphaNum) == MINUS_OR_HYPHEN) {
            return lenIncludingHyphensToKeep(str, indexOf, 2, findNextNonAlphaNum, i);
        }
        if (str.charAt(findNextNonAlphaNum) != '\'' && Character.isWhitespace(str.charAt(findNextNonAlphaNum))) {
            return lenIncludingHyphensToKeep(str, indexOf, 1, findNextNonAlphaNum, i);
        }
        return lenIncludingHyphensToKeep(str, indexOf, 1, findNextNonAlphaNum, i);
    }

    private static int lenIncludingHyphensToKeep(String str, int i, int i2, int i3, int i4) {
        if (i2 > 2 || i2 < 1) {
            throw new UnsupportedOperationException("Not ready to handle numberOfHyphensToConsiderKeeping = " + i2);
        }
        if (i2 == 2) {
            if (hyphenatedSuffixesLookup.contains(str.substring(i, i4))) {
                return i4;
            }
        }
        if (hyphenatedSuffixesLookup.contains(str.substring(i, i3))) {
            if (i4 > i3) {
                if (hyphenatedSuffixesLookup.contains(str.substring(i3, i4))) {
                    return i4;
                }
            }
            return i3;
        }
        if (i2 > 1) {
            if (hyphenatedWordsLookup.contains(str.substring(0, i3)) && hyphenatedSuffixesLookup.contains(str.substring(i3, i4))) {
                return i4;
            }
        }
        boolean contains = hyphenatedPrefixesLookup.contains(str.substring(0, i + 1));
        if (contains && i2 > 1) {
            if (hyphenatedWordsLookup.contains(str.substring(i + 1, i4))) {
                return i4;
            }
        }
        if (i2 == 1 && contains) {
            return i3;
        }
        if (i2 == 2 && contains) {
            return hyphenatedSuffixesLookup.contains(str.substring(i3, i4)) ? i4 : i3;
        }
        return hyphenatedWordsLookup.contains(str.substring(0, i3)) ? i3 : i;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static int lenIfHyphenatedSuffix(String str, int i) {
        String lowerCase = str.toLowerCase();
        String substring = lowerCase.substring(i, TokenizerHelper.findNextNonAlphaNum(lowerCase, i + 1));
        if (lowerCase.substring(i).startsWith("-o-")) {
            substring = lowerCase.substring(i, TokenizerHelper.findNextNonAlphaNum(lowerCase, i + 3));
        }
        if (hyphenatedSuffixesLookup.contains(substring)) {
            return substring.length();
        }
        return -1;
    }

    static {
        for (int i = 0; i < MultiTokenWords.length; i++) {
            MultiTokenWordsLookup.put(MultiTokenWords[i], Integer.valueOf(i));
        }
        possibleContractionEndings = new String[]{"'s", "'ve", "'re", "'ll", "'d", "'n", "n't"};
        lettersAfterApostropheForMiddleOfContraction = "";
        for (String str : possibleContractionEndings) {
            lettersAfterApostropheForMiddleOfContraction += str.charAt(1);
        }
        contractionsStartingWithApostrophe = new String[]{"'tis", "'twas"};
        hyphenatedPrefixes = new String[]{"e-", "a-", "u-", "x-", "agro-", "ante-", "anti-", "arch-", "be-", "bi-", "bio-", "co-", "counter-", "cross-", "cyber-", "de-", "eco-", "ex-", "extra-", "inter-", "intra-", "macro-", "mega-", "micro-", "mid-", "mini-", "multi-", "neo-", "non-", "over-", "pan-", "para-", "peri-", "post-", "pre-", "pro-", "pseudo-", "quasi-", "re-", "semi-", "sub-", "super-", "tri-", "ultra-", "un-", "uni-", "vice-", "electro-", "gasto-", "homo-", "hetero-", "ortho-", "phospho-"};
        hyphenatedPrefixesLookup = new HashSet<>();
        for (String str2 : hyphenatedPrefixes) {
            hyphenatedPrefixesLookup.add(str2);
        }
        hyphenatedSuffixes = new String[]{"-esque", "-ette", "-fest", "-fold", "-gate", "-itis", "-less", "-most", "-o-torium", "-rama", "-wise"};
        hyphenatedSuffixesLookup = new HashSet<>();
        for (String str3 : hyphenatedSuffixes) {
            hyphenatedSuffixesLookup.add(str3);
        }
        hyphenatedWords = new String[]{"mm-hm", "mm-mm", "o-kay", "uh-huh", "uh-oh"};
        hyphenatedWordsLookup = new HashSet<>();
        for (String str4 : hyphenatedWords) {
            hyphenatedWordsLookup.add(str4);
        }
        MINUS_OR_HYPHEN = '-';
    }
}
