package org.whitesource.jninka;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:org/whitesource/jninka/SentenceSplitter.class */
public class SentenceSplitter extends StageProcessor {
    private static final String SEPARATOR_BREAK_REGEX = "^([^\\.\\!\\?\\:\n]*)([\\.\\!\\?\\:\n])(?=(.?))";
    private static final Pattern SEPARATOR_BREAK_PATTERN = Pattern.compile(SEPARATOR_BREAK_REGEX, 8);
    private static final Pattern LAST_WORD_ABBREVIATION_PATTERN = Pattern.compile("(.?)([^\\p{Punct}\\s]+)$");
    private static Logger logger = Logger.getLogger(SentenceSplitter.class.getCanonicalName());
    private List<String> abbreviations = new ArrayList();

    @Override // org.whitesource.jninka.StageProcessor
    public boolean process() {
        boolean z = true;
        try {
            ArrayList arrayList = new ArrayList();
            Matcher matcher = Pattern.compile("^([^\n]*)\n", 8).matcher(preProcessText(JNinkaUtils.joinArrayList(getInputInfo(), "\n") + "\n"));
            StringBuffer stringBuffer = new StringBuffer();
            while (matcher.find() && matcher.groupCount() >= 1) {
                String escapeForRegex = JNinkaRegullarExpression.escapeForRegex(matcher.group(1));
                matcher.appendReplacement(stringBuffer, escapeForRegex);
                int alphabeticCount = JNinkaUtils.alphabeticCount(escapeForRegex);
                List<String> splitText = splitText(escapeForRegex);
                int i = 0;
                for (String str : splitText) {
                    i += JNinkaUtils.alphabeticCount(str);
                    arrayList.add(JNinkaRegullarExpression.unescapeAfterRegex(cleanSentence(str)));
                }
                if (alphabeticCount != i) {
                    logger.severe("[" + escapeForRegex + "]");
                    Iterator<String> it = splitText.iterator();
                    while (it.hasNext()) {
                        logger.severe(cleanSentence(it.next()));
                    }
                    z = false;
                    logger.severe("Number of printable chars does not match!  [" + alphabeticCount + "][" + i + "]");
                }
            }
            setOutputInfo(arrayList);
        } catch (Exception e) {
            z = false;
            logger.log(Level.SEVERE, e.getMessage(), (Throwable) e);
        }
        return z;
    }

    public void loadAbbreviations() {
        this.abbreviations = new ArrayList();
        BufferedReader bufferedReader = null;
        try {
            try {
                bufferedReader = new BufferedReader(new InputStreamReader(SentenceSplitter.class.getResourceAsStream("/splitter.abv")));
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        JNinkaUtils.close(bufferedReader, logger);
                        return;
                    } else {
                        this.abbreviations.add(readLine.toLowerCase());
                    }
                }
            } catch (IOException e) {
                logger.log(Level.SEVERE, "cannot open abbreviations file: " + e.getMessage(), (Throwable) e);
                JNinkaUtils.close(bufferedReader, logger);
            }
        } catch (Throwable th) {
            JNinkaUtils.close(bufferedReader, logger);
            throw th;
        }
    }

    public void loadDictionary() {
    }

    private String cleanSentence(String str) {
        String applyReplace = JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(str, "^o ", ""), "^\\s*[0-9]+\\s*[\\-\\)]", ""), "^[ \t]+", ""), "[ \t]+$", ""), "^[ \t]*[\\-\\.\\s*] +", ""), "\\s+", " "), "['\"`]+", "<quotes>"), ":", "<colon>"), "\\.+$", ".");
        if (applyReplace.matches("\n")) {
            throw new IllegalArgumentException("text cannot be \\n");
        }
        return applyReplace;
    }

    private List<String> splitText(String str) throws Exception {
        ArrayList arrayList = new ArrayList();
        String str2 = "";
        Matcher matcher = SEPARATOR_BREAK_PATTERN.matcher(str);
        while (matcher.find()) {
            String groupValue = JNinkaRegullarExpression.getGroupValue(matcher, 1);
            String groupValue2 = JNinkaRegullarExpression.getGroupValue(matcher, 2);
            String str3 = groupValue + groupValue2;
            String groupValue3 = JNinkaRegullarExpression.getGroupValue(matcher, 3);
            str = JNinkaRegullarExpression.postMatch(SEPARATOR_BREAK_PATTERN, str);
            if (!" ".equals(groupValue3) && !"\t".equals(groupValue3)) {
                str2 = str2 + str3;
            } else if (":".equals(groupValue2) || "?".equals(groupValue2) || "!".equals(groupValue2)) {
                arrayList.add(str2 + str3);
                str2 = "";
            } else {
                if (!".".equals(groupValue2)) {
                    logger.severe("We have not dealt with this case");
                    throw new IllegalStateException();
                }
                Matcher matcher2 = LAST_WORD_ABBREVIATION_PATTERN.matcher(groupValue);
                if (matcher2.matches()) {
                    String groupValue4 = JNinkaRegullarExpression.getGroupValue(matcher2, 1);
                    String groupValue5 = JNinkaRegullarExpression.getGroupValue(matcher2, 2);
                    if (groupValue5.length() == 1) {
                        char charAt = groupValue5.charAt(0);
                        if (charAt < 'A' || charAt > 'Z') {
                            if (logger.isLoggable(Level.FINER)) {
                                logger.finer("last word an abbrev " + groupValue + " lastword [" + groupValue5 + "] before [" + groupValue4 + "]");
                            }
                            if (charAt == 'e' || charAt == 'i') {
                                str2 = str2 + str3;
                            } else if (logger.isLoggable(Level.FINER)) {
                                logger.finer("2 last word an abbrev " + groupValue + " lastword [" + groupValue5 + "] before [" + groupValue4 + "]");
                            }
                        } else {
                            str2 = str2 + str3;
                        }
                    } else {
                        String lowerCase = groupValue5.toLowerCase();
                        if (("".equals(groupValue4) || " ".equals(groupValue4)) && this.abbreviations.contains(lowerCase)) {
                            str2 = str2 + str3;
                        }
                    }
                }
                arrayList.add(str2 + str3);
                str2 = "";
            }
        }
        arrayList.add(str2 + str);
        return arrayList;
    }

    private String preProcessText(String str) {
        return JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(JNinkaRegullarExpression.applyReplace(str, "\\+?\\-{3,1000}\\+?", " ", 8), "={3,1000}", " ", 8), ":{3,1000}", " ", 8), "\\*{3,1000}", " ", 8), "\\|+", " ", 8), "\\\\+", " ", 8), "^[ \t]*/\\*", "", 8), "\\*\\/[ \t]*$", "", 8), "([^:])//", "$1", 8), "\r\n", "\n"), "^[ \t]{0,3}[\\*\\#\\/\\;]+", "", 8), "^[ \t]{0,3}[\\-]+", "", 8), "[\\*\\#\\/]+[ \t]{0,3}$", "", 8), "[\\-]+[ \t]{0,3}$", "", 8), "^[ \t]{0,3}[\\*\\#\\/\\;]+", "", 8), "[\\*\\#]+$", "", 8), "^[ \t]+$", "\n", 8), "\t", " "), "\n(?!\n)", "\t"), "\n\n+", "\n") + "\n";
    }
}
