package com.entopix.maui.filters;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.Utils;
import weka.filters.Filter;

/* loaded from: input_file:WEB-INF/lib/maui-1.4.3.jar:com/entopix/maui/filters/MauiPhraseFilter.class */
public class MauiPhraseFilter extends Filter implements OptionHandler {
    private static final Logger log = LoggerFactory.getLogger((Class<?>) MauiPhraseFilter.class);
    private static final long serialVersionUID = 1;
    protected Range m_SelectCols = new Range();
    protected boolean m_DisallowInternalPeriods = false;

    public String globalInfo() {
        return "This filter splits the text contained by the selected string attributes into phrases.";
    }

    @Override // weka.core.OptionHandler
    public Enumeration<Option> listOptions() {
        ArrayList arrayList = new ArrayList(3);
        arrayList.add(new Option("\tSpecify list of attributes to process. First and last are valid\n\tindexes. (default none)", "R", 1, "-R <index1,index2-index4,...>"));
        arrayList.add(new Option("\tInvert matching sense", "V", 0, "-V"));
        arrayList.add(new Option("\tDisallow internal periods", "P", 0, "-P"));
        return Collections.enumeration(arrayList);
    }

    @Override // weka.filters.Filter, weka.core.CapabilitiesHandler
    public Capabilities getCapabilities() {
        Capabilities capabilities = super.getCapabilities();
        capabilities.enableAllAttributes();
        capabilities.enable(Capabilities.Capability.MISSING_VALUES);
        capabilities.enable(Capabilities.Capability.NOMINAL_CLASS);
        capabilities.enable(Capabilities.Capability.NO_CLASS);
        return capabilities;
    }

    @Override // weka.core.OptionHandler
    public void setOptions(String[] strArr) throws Exception {
        String option = Utils.getOption('R', strArr);
        if (option.length() != 0) {
            setAttributeIndices(option);
        }
        setInvertSelection(Utils.getFlag('V', strArr));
        setDisallowInternalPeriods(Utils.getFlag('P', strArr));
        if (getInputFormat() != null) {
            setInputFormat(getInputFormat());
        }
    }

    @Override // weka.core.OptionHandler
    public String[] getOptions() {
        String[] strArr = new String[4];
        int i = 0;
        if (getInvertSelection()) {
            i = 0 + 1;
            strArr[0] = "-V";
        }
        if (getDisallowInternalPeriods()) {
            int i2 = i;
            i++;
            strArr[i2] = "-P";
        }
        if (!getAttributeIndices().equals("")) {
            int i3 = i;
            int i4 = i + 1;
            strArr[i3] = "-R";
            i = i4 + 1;
            strArr[i4] = getAttributeIndices();
        }
        while (i < strArr.length) {
            int i5 = i;
            i++;
            strArr[i5] = "";
        }
        return strArr;
    }

    @Override // weka.filters.Filter
    public boolean setInputFormat(Instances instances) throws Exception {
        super.setInputFormat(instances);
        setOutputFormat(instances);
        this.m_SelectCols.setUpper(instances.numAttributes() - 1);
        return true;
    }

    @Override // weka.filters.Filter
    public boolean input(Instance instance) throws Exception {
        if (getInputFormat() == null) {
            throw new Exception("No input instance format defined");
        }
        if (this.m_NewBatch) {
            resetQueue();
            this.m_NewBatch = false;
        }
        convertInstance(instance);
        return true;
    }

    @Override // weka.filters.Filter
    public boolean batchFinished() throws Exception {
        if (getInputFormat() == null) {
            throw new NullPointerException("No input instance format defined");
        }
        this.m_NewBatch = true;
        return numPendingOutput() != 0;
    }

    public static void main(String[] strArr) {
        try {
            if (Utils.getFlag('b', strArr)) {
                Filter.batchFilterFile(new MauiPhraseFilter(), strArr);
            } else {
                Filter.filterFile(new MauiPhraseFilter(), strArr);
            }
        } catch (Exception e) {
            log.error(e.getMessage(), (Throwable) e);
        }
    }

    private void convertInstance(Instance instance) throws Exception {
        double[] dArr = new double[instance.numAttributes()];
        for (int i = 0; i < instance.numAttributes(); i++) {
            if (!instance.attribute(i).isString() || instance.isMissing(i)) {
                dArr[i] = instance.value(i);
            } else if (this.m_SelectCols.isInRange(i)) {
                dArr[i] = getOutputFormat().attribute(i).addStringValue(tokenize(instance.stringValue(i)));
            } else {
                dArr[i] = getOutputFormat().attribute(i).addStringValue(instance.stringValue(i));
            }
        }
        Instance instance2 = new Instance(instance.weight(), dArr);
        instance2.setDataset(getOutputFormat());
        push(instance2);
    }

    public String tokenize(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        int i = 0;
        boolean z = true;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = false;
        while (i < str.length()) {
            boolean z5 = false;
            boolean z6 = false;
            int i2 = i;
            while (i < str.length()) {
                char charAt = str.charAt(i);
                if (!Character.isLetterOrDigit(charAt)) {
                    if ((this.m_DisallowInternalPeriods || charAt != '.') && charAt != '@' && charAt != '_' && charAt != '&' && charAt != '/' && charAt != '\'') {
                        if (charAt != '\'' || i <= 0 || !Character.isLetterOrDigit(str.charAt(i - 1))) {
                            break;
                        }
                        i++;
                    } else {
                        if (i <= 0 || i + 1 >= str.length() || !Character.isLetterOrDigit(str.charAt(i - 1)) || !Character.isLetterOrDigit(str.charAt(i + 1))) {
                            break;
                        }
                        i++;
                    }
                } else {
                    z6 = true;
                    if (Character.isLetter(charAt)) {
                        z5 = true;
                    }
                    i++;
                }
            }
            if (z5) {
                if (!z) {
                    if (z3) {
                        stringBuffer.append(' ');
                    } else if (z4) {
                        stringBuffer.append('/');
                    } else {
                        stringBuffer.append(' ');
                    }
                }
                stringBuffer.append(str.substring(i2, i));
                if (i == str.length()) {
                    break;
                }
                z = false;
                z2 = false;
                z3 = false;
                z4 = false;
                if (Character.isWhitespace(str.charAt(i))) {
                    if (str.charAt(i) == '\n') {
                        z2 = true;
                    }
                } else if (str.charAt(i) == '-') {
                    z3 = true;
                } else if (str.charAt(i) == '/') {
                    z4 = true;
                } else {
                    z = true;
                    stringBuffer.append('\n');
                }
                i++;
            } else {
                if (i == str.length()) {
                    break;
                }
                if (str.charAt(i) == '\n') {
                    if (z2) {
                        if (!z) {
                            stringBuffer.append('\n');
                            z = true;
                        }
                    } else if (z6 && !z) {
                        z = true;
                        stringBuffer.append('\n');
                    }
                    z2 = true;
                    i++;
                } else if (Character.isWhitespace(str.charAt(i))) {
                    if (z6 && !z) {
                        z = true;
                        stringBuffer.append('\n');
                    }
                    i++;
                } else {
                    if (!z) {
                        stringBuffer.append('\n');
                        z = true;
                    }
                    i++;
                }
            }
        }
        return stringBuffer.toString();
    }

    public String invertSelectionTipText() {
        return "If set to false, the specified attributes will be processed; If set to true, specified attributes won't be processed.";
    }

    public boolean getInvertSelection() {
        return this.m_SelectCols.getInvert();
    }

    public void setInvertSelection(boolean z) {
        this.m_SelectCols.setInvert(z);
    }

    public String disallowInternalPeriodsTipText() {
        return "If set to false, internal periods are allowed.";
    }

    public boolean getDisallowInternalPeriods() {
        return this.m_DisallowInternalPeriods;
    }

    public void setDisallowInternalPeriods(boolean z) {
        this.m_DisallowInternalPeriods = z;
    }

    public String attributeIndicesTipText() {
        return "Specify range of attributes to act on. This is a comma separated list of attribute indices, with \"first\" and \"last\" valid values. Specify an inclusive range with \"-\". E.g: \"first-3,5,6-10,last\".";
    }

    public String getAttributeIndices() {
        return this.m_SelectCols.getRanges();
    }

    public void setAttributeIndices(String str) {
        this.m_SelectCols.setRanges(str);
    }

    public void setAttributeIndicesArray(int[] iArr) {
        setAttributeIndices(Range.indicesToRangeList(iArr));
    }
}
