/*
 * Decompiled with CFR 0.152.
 */
package edu.utah.bmi.nlp.core;

import edu.utah.bmi.nlp.core.Span;
import edu.utah.bmi.nlp.core.WildCardChecker;
import java.util.ArrayList;

public class SimpleParser {
    protected static final int whitespace = -1;
    protected static final int punctuation = 0;
    protected static final int dot = 1;
    protected static final int returnc = 2;
    protected static final int letter = 3;
    protected static final int digit = 4;

    public static ArrayList<Span> tokenizeOnWhitespaces(String sentence) {
        return SimpleParser.tokenizeOnWhitespaces(sentence, 0);
    }

    public static ArrayList<Span> tokenizeOnWhitespaces(String sentence, int offset) {
        ArrayList<Span> tokens = new ArrayList<Span>();
        int type = -1;
        int tokenBegin = 0;
        StringBuilder tmp = new StringBuilder();
        for (int i = 0; i < sentence.length(); ++i) {
            char thisChar = sentence.charAt(i);
            if (Character.isWhitespace(thisChar)) {
                if (type > -1) {
                    tokens.add(new Span(tokenBegin + offset, i + offset, tmp.toString()));
                    tmp.setLength(0);
                }
                type = -1;
                continue;
            }
            if (type == -1) {
                tokenBegin = i;
                type = 1;
            }
            tmp.append(thisChar);
        }
        if (type == 1) {
            tokens.add(new Span(tokenBegin + offset, sentence.length() + offset, sentence.substring(tokenBegin)));
        }
        return tokens;
    }

    public static ArrayList<ArrayList<String>> parse(String text, boolean includePunctuation) {
        ArrayList<ArrayList<String>> paragraphs = new ArrayList<ArrayList<String>>();
        ArrayList<String> tokens = new ArrayList<String>();
        StringBuilder sb = new StringBuilder();
        int type = -1;
        for (int i = 0; i < text.length(); ++i) {
            char thisChar = text.charAt(i);
            if (WildCardChecker.isPunctuation(thisChar)) {
                if (type > 0) {
                    tokens.add(sb.toString());
                }
                if (includePunctuation) {
                    tokens.add(Character.toString(thisChar));
                }
                sb.setLength(0);
                type = 0;
                continue;
            }
            if (thisChar == '\n' || thisChar == '\r') {
                if (type == 3) continue;
                paragraphs.add(tokens);
                tokens = new ArrayList();
                type = 3;
                continue;
            }
            if (Character.isDigit(thisChar)) {
                if (type == 0 || type == 3) {
                    sb.append(thisChar);
                    type = 2;
                    continue;
                }
                if (type == 1) {
                    tokens.add(sb.toString());
                    sb.setLength(0);
                    sb.append(thisChar);
                    type = 2;
                    continue;
                }
                sb.append(thisChar);
                continue;
            }
            if (Character.isLetter(thisChar)) {
                if (type == 0 || type == 3) {
                    sb.append(thisChar);
                    type = 1;
                    continue;
                }
                if (type == 1) {
                    sb.append(thisChar);
                    continue;
                }
                tokens.add(sb.toString());
                sb.setLength(0);
                sb.append(thisChar);
                type = 1;
                continue;
            }
            if (type != 0 && type != 3) {
                tokens.add(sb.toString());
            }
            sb.setLength(0);
            type = 0;
        }
        if (type > 0) {
            tokens.add(sb.toString());
        }
        paragraphs.add(tokens);
        return paragraphs;
    }

    public static ArrayList<ArrayList<Span>> tokenizeWParagraphs(String text, boolean includePunctuation) {
        return SimpleParser.tokenizeWParagraphs(text, includePunctuation, 0);
    }

    public static ArrayList<ArrayList<Span>> tokenizeWParagraphs(String text, boolean includePunctuation, int offset) {
        ArrayList<ArrayList<Span>> paragraphs = new ArrayList<ArrayList<Span>>();
        ArrayList<Span> tokens = new ArrayList<Span>();
        int type = 0;
        StringBuilder tmp = new StringBuilder();
        int tokenBegin = 0;
        boolean tokenEnd = false;
        boolean sentenceBegin = false;
        boolean sentenceEnd = false;
        for (int i = 0; i < text.length(); ++i) {
            char thisChar = text.charAt(i);
            if (WildCardChecker.isPunctuation(thisChar)) {
                if (type > 0) {
                    tokens.add(new Span(tokenBegin + offset, i + offset, tmp.toString()));
                    tmp.setLength(0);
                }
                tokenBegin = i;
                if (includePunctuation) {
                    tokens.add(new Span(tokenBegin, i + 1, String.valueOf(thisChar)));
                }
                type = 0;
                continue;
            }
            if (thisChar == '\n' || thisChar == '\r') {
                if (type > 0 && type != 3) {
                    tokens.add(new Span(tokenBegin + offset, i + offset, tmp.toString()));
                    tmp.setLength(0);
                }
                if (tokens.size() > 0 && type == 3) {
                    paragraphs.add(tokens);
                    tokenBegin = i;
                    tokens = new ArrayList();
                }
                type = 3;
                continue;
            }
            if (Character.isDigit(thisChar)) {
                if (type == 0) {
                    tokenBegin = i;
                    type = 2;
                } else if (type == 1) {
                    tokens.add(new Span(tokenBegin + offset, i + offset, tmp.toString()));
                    tmp.setLength(0);
                    tokenBegin = i;
                    type = 2;
                }
                tmp.append(thisChar);
                continue;
            }
            if (Character.isLetter(thisChar)) {
                if (type == 0 || type == 3) {
                    tokenBegin = i;
                    type = 1;
                } else if (type == 2) {
                    tokens.add(new Span(tokenBegin + offset, i + offset, tmp.toString()));
                    tmp.setLength(0);
                    tokenBegin = i;
                    type = 1;
                }
                tmp.append(thisChar);
                continue;
            }
            if (type != 0) {
                tokens.add(new Span(tokenBegin + offset, i + offset, tmp.toString()));
                tmp.setLength(0);
            }
            type = 0;
        }
        if (type == 1 || type == 2) {
            tokens.add(new Span(tokenBegin, text.length()));
        }
        paragraphs.add(tokens);
        return paragraphs;
    }

    public static ArrayList<Span> tokenize2Spans(String text, boolean includePunctuation) {
        return SimpleParser.tokenize2Spans(text, includePunctuation, 0, false);
    }

    public static ArrayList<Span> tokenize2Spans(String text, boolean includePunctuation, boolean caseSensitive) {
        return SimpleParser.tokenize2Spans(text, includePunctuation, 0, caseSensitive);
    }

    public static ArrayList<Span> tokenize2Spans(String text, boolean includePunctuation, int offset, boolean caseSensitive) {
        ArrayList<Span> tokens = new ArrayList<Span>();
        int type = 0;
        int tokenBegin = 0;
        boolean tokenEnd = false;
        boolean sentenceBegin = false;
        boolean sentenceEnd = false;
        StringBuilder tmp = new StringBuilder();
        for (int i = 0; i < text.length(); ++i) {
            char thisChar = text.charAt(i);
            if (WildCardChecker.isPunctuation(thisChar)) {
                if (type > 0) {
                    tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                    tmp.setLength(0);
                }
                tokenBegin = i;
                if (includePunctuation) {
                    tokens.add(new Span(tokenBegin, i + 1, String.valueOf(thisChar)));
                }
                type = 0;
                continue;
            }
            if (thisChar == '\n' || thisChar == '\r') {
                if (type > 0) {
                    tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                    tmp.setLength(0);
                }
                tokenBegin = i;
                type = 0;
                continue;
            }
            if (Character.isDigit(thisChar)) {
                if (type == 0) {
                    tokenBegin = i;
                    type = 2;
                } else if (type == 1) {
                    tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                    tmp.setLength(0);
                    tokenBegin = i;
                    type = 2;
                }
                tmp.append(thisChar);
                continue;
            }
            if (Character.isLetter(thisChar)) {
                if (type == 0) {
                    tokenBegin = i;
                    type = 1;
                } else if (type == 2) {
                    tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                    tmp.setLength(0);
                    tokenBegin = i;
                    type = 1;
                }
                tmp.append(thisChar);
                continue;
            }
            if (type != 0) {
                tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                tmp.setLength(0);
            }
            type = 0;
        }
        if (type == 1 || type == 2) {
            tokens.add(new Span(tokenBegin, text.length(), text.substring(tokenBegin)));
        }
        return tokens;
    }

    public static ArrayList<String> tokenize(String text, boolean includePunctuation) {
        StringBuilder sb = new StringBuilder();
        ArrayList<String> output = new ArrayList<String>();
        int type = 0;
        for (int i = 0; i < text.length(); ++i) {
            char thisChar = text.charAt(i);
            if (WildCardChecker.isPunctuation(thisChar)) {
                if (type > 0) {
                    output.add(sb.toString());
                }
                if (includePunctuation) {
                    output.add(Character.toString(thisChar));
                }
                sb.setLength(0);
                type = 0;
                continue;
            }
            if (Character.isDigit(thisChar)) {
                if (type == 0) {
                    sb.append(thisChar);
                    type = 2;
                    continue;
                }
                if (type == 1) {
                    output.add(sb.toString());
                    sb.setLength(0);
                    sb.append(thisChar);
                    type = 2;
                    continue;
                }
                sb.append(thisChar);
                continue;
            }
            if (Character.isLetter(thisChar)) {
                if (type == 0) {
                    sb.append(thisChar);
                    type = 1;
                    continue;
                }
                if (type == 1) {
                    sb.append(thisChar);
                    continue;
                }
                output.add(sb.toString());
                sb.setLength(0);
                sb.append(thisChar);
                type = 1;
                continue;
            }
            if (type > 0) {
                output.add(sb.toString());
            }
            sb.setLength(0);
            type = 0;
        }
        if (type > 0) {
            output.add(sb.toString());
        }
        return output;
    }

    public static ArrayList<Span> tokenizeDecimalSmart(String text, boolean includePunctuation) {
        return SimpleParser.tokenizeDecimalSmart(text, includePunctuation, 0, false);
    }

    public static ArrayList<Span> tokenizeDecimalSmart(String text, boolean includePunctuation, boolean caseSensitive) {
        return SimpleParser.tokenizeDecimalSmart(text, includePunctuation, 0, caseSensitive);
    }

    public static ArrayList<Span> tokenizeDecimalSmart(String text, boolean includePunctuation, int offset, boolean caseSensitive) {
        ArrayList<Span> tokens = new ArrayList<Span>();
        int type_2 = -1;
        int type_1 = -1;
        int type0 = -1;
        int tokenBegin = 0;
        boolean tokenEnd = false;
        boolean sentenceBegin = false;
        boolean sentenceEnd = false;
        StringBuilder tmp = new StringBuilder();
        for (int i = 0; i < text.length(); ++i) {
            char thisChar = text.charAt(i);
            type0 = thisChar == '.' ? 1 : (WildCardChecker.isPunctuation(thisChar) ? 0 : (thisChar == '\n' || thisChar == '\r' ? 2 : (Character.isDigit(thisChar) ? 4 : (Character.isLetter(thisChar) ? 3 : -1))));
            switch (type0) {
                case 1: {
                    if (type_1 == -1) {
                        tokenBegin = i;
                    }
                    if (i != text.length() - 1) break;
                    if (tmp.length() > 0) {
                        tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                        tmp.setLength(0);
                    }
                    if (!includePunctuation) break;
                    tokens.add(new Span(i + offset, i + 1 + offset, "."));
                    break;
                }
                case 0: {
                    if ((type_1 == 3 || type_1 == 4) && tmp.length() > 0) {
                        tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                        tmp.setLength(0);
                        tokenBegin = i;
                    }
                    if (!includePunctuation) break;
                    tmp.append(thisChar);
                    switch (type_1) {
                        case 0: {
                            break;
                        }
                        case 1: {
                            if (tmp.length() > 0) {
                                tokens.add(new Span(tokenBegin + offset, i - 1 + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                            }
                            tokens.add(new Span(i - 1 + offset, i + offset, "."));
                            tmp.setLength(0);
                            tokenBegin = i;
                            break;
                        }
                        case -1: 
                        case 2: {
                            tokenBegin = i;
                        }
                    }
                    break;
                }
                case -1: 
                case 2: {
                    switch (type_1) {
                        case 3: 
                        case 4: {
                            tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                            tmp.setLength(0);
                            break;
                        }
                        case 0: {
                            if (!includePunctuation) break;
                            tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                            tmp.setLength(0);
                            break;
                        }
                        case 1: {
                            if (tmp.length() > 0) {
                                tokens.add(new Span(tokenBegin + offset, i - 1 + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                                tmp.setLength(0);
                            }
                            if (!includePunctuation) break;
                            tokens.add(new Span(i - 1 + offset, i + offset, "."));
                        }
                    }
                    tokenBegin = i;
                    break;
                }
                case 4: {
                    switch (type_1) {
                        case -1: 
                        case 2: {
                            tokenBegin = i;
                            break;
                        }
                        case 0: {
                            if (includePunctuation) {
                                tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                                tmp.setLength(0);
                            }
                            tokenBegin = i;
                            break;
                        }
                        case 1: {
                            if (type_2 == 4 || type_2 == -1) {
                                tmp.append('.');
                                break;
                            }
                            if (tmp.length() > 0) {
                                tokens.add(new Span(tokenBegin + offset, i - 1 + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                                tmp.setLength(0);
                            }
                            if (includePunctuation) {
                                tokens.add(new Span(i - 1 + offset, i + offset, "."));
                            }
                            tokenBegin = i;
                            break;
                        }
                        case 3: {
                            tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                            tmp.setLength(0);
                            tokenBegin = i;
                        }
                    }
                    tmp.append(thisChar);
                    break;
                }
                case 3: {
                    switch (type_1) {
                        case -1: 
                        case 2: {
                            tokenBegin = i;
                            break;
                        }
                        case 1: {
                            if (tmp.length() > 0) {
                                tokens.add(new Span(tokenBegin + offset, i - 1 + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                                tmp.setLength(0);
                            }
                            if (includePunctuation) {
                                tokens.add(new Span(i - 1 + offset, i + offset, "."));
                            }
                            tokenBegin = i;
                            break;
                        }
                        case 0: {
                            if (includePunctuation) {
                                tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                                tmp.setLength(0);
                            }
                            tokenBegin = i;
                            break;
                        }
                        case 4: {
                            tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                            tmp.setLength(0);
                            tokenBegin = i;
                            break;
                        }
                    }
                    tmp.append(thisChar);
                }
            }
            type_2 = type_1;
            type_1 = type0;
        }
        if (tmp.length() > 0) {
            tokens.add(new Span(tokenBegin + offset, text.length() + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
        }
        return tokens;
    }

    public static ArrayList<ArrayList<Span>> tokenizeDecimalSmartWSentences(String text, boolean includePunctuation) {
        return SimpleParser.tokenizeDecimalSmartWSentences(text, includePunctuation, 0, false);
    }

    public static ArrayList<ArrayList<Span>> tokenizeDecimalSmartWSentences(String text, boolean includePunctuation, boolean caseSensitive) {
        return SimpleParser.tokenizeDecimalSmartWSentences(text, includePunctuation, 0, caseSensitive);
    }

    public static ArrayList<ArrayList<Span>> tokenizeDecimalSmartWSentences(String text, boolean includePunctuation, int offset, boolean caseSensitive) {
        ArrayList<ArrayList<Span>> sentences = new ArrayList<ArrayList<Span>>();
        ArrayList<Span> tokens = new ArrayList<Span>();
        int type0 = -1;
        int type1 = -1;
        int tokenBegin = 0;
        int tokenEnd = 0;
        boolean sentenceBegin = false;
        boolean sentenceEnd = false;
        StringBuilder tmp = new StringBuilder();
        for (int i = 0; i < text.length(); ++i) {
            char thisChar = text.charAt(i);
            if (thisChar == '.') {
                type0 = type1;
                type1 = 1;
                continue;
            }
            if (WildCardChecker.isPunctuation(thisChar)) {
                if (type1 == 3 || type1 == 4) {
                    tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                } else if (type1 == 0 && includePunctuation) {
                    tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                    tmp.setLength(0);
                } else if (type1 == 1) {
                    tokens.add(new Span(i - 2, i - 1, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                    if (includePunctuation) {
                        tokens.add(new Span(i - 1, i, "."));
                    }
                }
                tmp.setLength(0);
                if (includePunctuation) {
                    tmp.append(thisChar);
                }
                tokenBegin = i;
                type0 = type1;
                type1 = 0;
                continue;
            }
            if (thisChar == '\n' || thisChar == '\r') {
                if (type1 == 1) {
                    tokens.add(new Span(tokenBegin + offset, i - 1 + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                    if (includePunctuation) {
                        tokens.add(new Span(i - 1, i, "."));
                    }
                } else if (type1 != 2 && tmp.length() > 0) {
                    tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                }
                if (tokens.size() > 0) {
                    sentences.add(tokens);
                    tokens = new ArrayList();
                }
                tokenBegin = i;
                tmp.setLength(0);
                type0 = type1;
                type1 = 2;
                continue;
            }
            if (Character.isDigit(thisChar)) {
                if (type1 == 0) {
                    if (includePunctuation) {
                        tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                        tmp.setLength(0);
                    }
                } else if (type1 == 3) {
                    tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                    tmp.setLength(0);
                } else if (type1 == 1) {
                    if (type0 == 4) {
                        tmp.append(".");
                    } else {
                        tokens.add(new Span(tokenBegin + offset, i - 1 + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                        if (includePunctuation) {
                            tokens.add(new Span(i - 1, i, "."));
                        }
                        tmp.setLength(0);
                    }
                }
                if (type1 != 4 && (type1 != 1 || type0 != 4)) {
                    tokenBegin = i;
                }
                tmp.append(thisChar);
                type0 = type1;
                type1 = 4;
                continue;
            }
            if (Character.isLetter(thisChar)) {
                if (type1 == 4) {
                    tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                    tmp.setLength(0);
                }
                if (type1 == 0 && tmp.length() > 0) {
                    if (includePunctuation) {
                        tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                        tmp.setLength(0);
                    }
                } else if (type1 == 1) {
                    tokens.add(new Span(tokenBegin + offset, i - 1 + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                    tmp.setLength(0);
                    if (includePunctuation) {
                        tokens.add(new Span(i - 1, i, "."));
                    }
                }
                if (type1 != 3) {
                    tokenBegin = i;
                }
                tmp.append(thisChar);
                type0 = type1;
                type1 = 3;
                continue;
            }
            if (type1 == 1) {
                type0 = type1;
                tokens.add(new Span(tokenBegin + offset, i - 1 + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                tmp.setLength(0);
                if (includePunctuation) {
                    tokens.add(new Span(i - 1, i, "."));
                }
            } else if (type1 > 0 && type1 != 2) {
                tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                tmp.setLength(0);
            } else if (includePunctuation && tmp.length() > 0) {
                tokens.add(new Span(tokenBegin + offset, i + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
                tmp.setLength(0);
                type0 = type1;
            }
            type1 = -1;
        }
        if (type1 == 4) {
            tokens.add(new Span(tokenBegin, text.length(), text.substring(tokenBegin)));
        } else if (type1 == 0 || type1 == 1) {
            tokenEnd = text.length() - 1;
            tokens.add(new Span(tokenBegin, tokenEnd, text.substring(tokenBegin, tokenEnd)));
            if (includePunctuation) {
                tokens.add(new Span(tokenEnd, text.length(), text.substring(tokenEnd)));
            }
        } else if (tmp.length() > 0) {
            tokens.add(new Span(tokenBegin + offset, text.length() + offset, caseSensitive ? tmp.toString() : tmp.toString().toLowerCase()));
        }
        if (tokens.size() > 0) {
            sentences.add(tokens);
        }
        return sentences;
    }
}

