package org.tribuo.util.tokens.impl;

import com.oracle.labs.mlrg.olcut.config.Config;
import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance;
import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl;
import java.util.Arrays;
import org.tribuo.util.tokens.Token;
import org.tribuo.util.tokens.Tokenizer;

/* loaded from: input_file:org/tribuo/util/tokens/impl/SplitCharactersTokenizer.class */
public class SplitCharactersTokenizer implements Tokenizer {
    public static final char[] DEFAULT_SPLIT_CHARACTERS = {'*', '(', ')', '&', '[', ']', '{', '}', '`', '\'', '|', ';', ':', '\\', '!', '-', '?'};
    public static final char[] DEFAULT_SPLIT_EXCEPTING_IN_DIGITS_CHARACTERS = {'.', ',', '/'};

    @Config(description = "The characters to split on.")
    private char[] splitCharacters;

    @Config(description = "The characters to split on unless we're in a number.")
    private char[] splitXDigitsCharacters;
    private CharSequence cs;
    private int start;
    private int end;
    private int p;
    private StringBuilder token;
    private boolean ready;

    public SplitCharactersTokenizer() {
        this.splitCharacters = DEFAULT_SPLIT_CHARACTERS;
        this.splitXDigitsCharacters = DEFAULT_SPLIT_EXCEPTING_IN_DIGITS_CHARACTERS;
        this.token = new StringBuilder();
    }

    public SplitCharactersTokenizer(char[] cArr, char[] cArr2) {
        this.splitCharacters = DEFAULT_SPLIT_CHARACTERS;
        this.splitXDigitsCharacters = DEFAULT_SPLIT_EXCEPTING_IN_DIGITS_CHARACTERS;
        this.token = new StringBuilder();
        this.splitCharacters = cArr;
        this.splitXDigitsCharacters = cArr2;
    }

    public static SplitCharactersTokenizer createWhitespaceTokenizer() {
        return new SplitCharactersTokenizer(new char[0], new char[0]);
    }

    /* renamed from: getProvenance, reason: merged with bridge method [inline-methods] */
    public ConfiguredObjectProvenance m9getProvenance() {
        return new ConfiguredObjectProvenanceImpl(this, "Tokenizer");
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public void reset(CharSequence charSequence) {
        this.cs = charSequence;
        this.start = -1;
        this.end = -1;
        this.p = 0;
        this.token.delete(0, this.token.length());
        this.ready = false;
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public boolean advance() {
        if (this.cs == null) {
            throw new IllegalStateException("SplitCharactersTokenizer has not been reset.");
        }
        if (this.p >= this.cs.length()) {
            return false;
        }
        this.token.delete(0, this.token.length());
        while (this.p < this.cs.length()) {
            char charAt = this.cs.charAt(this.p);
            boolean z = !isSplitCharacter(charAt) && (!isSplitXDigitCharacter(charAt) || (this.p != 0 && this.p != this.cs.length() - 1 && Character.isDigit(this.cs.charAt(this.p - 1)) && Character.isDigit(this.cs.charAt(this.p + 1))));
            this.p++;
            if (z) {
                if (this.token.length() == 0) {
                    this.start = this.p - 1;
                }
                this.token.append(charAt);
                this.end = this.p;
            }
            if (!z && this.token.length() > 0) {
                break;
            }
        }
        if (this.token.length() <= 0) {
            return false;
        }
        this.ready = true;
        return true;
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public String getText() {
        if (this.ready) {
            return this.token.toString();
        }
        throw new IllegalStateException("SplitCharactersTokenizer is not ready.");
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public int getStart() {
        if (this.ready) {
            return this.start;
        }
        throw new IllegalStateException("SplitCharactersTokenizer is not ready.");
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public int getEnd() {
        if (this.ready) {
            return this.end;
        }
        throw new IllegalStateException("SplitCharactersTokenizer is not ready.");
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public Token.TokenType getType() {
        if (this.ready) {
            return Token.TokenType.WORD;
        }
        throw new IllegalStateException("SplitCharactersTokenizer is not ready.");
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    /* renamed from: clone, reason: merged with bridge method [inline-methods] */
    public SplitCharactersTokenizer m8clone() {
        try {
            SplitCharactersTokenizer splitCharactersTokenizer = (SplitCharactersTokenizer) super.clone();
            splitCharactersTokenizer.token = new StringBuilder();
            splitCharactersTokenizer.splitCharacters = this.splitCharacters == null ? null : Arrays.copyOf(this.splitCharacters, this.splitCharacters.length);
            splitCharactersTokenizer.splitXDigitsCharacters = this.splitXDigitsCharacters == null ? null : Arrays.copyOf(this.splitXDigitsCharacters, this.splitXDigitsCharacters.length);
            splitCharactersTokenizer.ready = false;
            splitCharactersTokenizer.cs = null;
            return splitCharactersTokenizer;
        } catch (CloneNotSupportedException e) {
            throw new AssertionError("SplitCharactersTokenizer is Cloneable, but clone call failed");
        }
    }

    public boolean isSplitCharacter(char c) {
        return isCharacter(c, this.splitCharacters) || Character.isWhitespace(c);
    }

    public boolean isSplitXDigitCharacter(char c) {
        return isCharacter(c, this.splitXDigitsCharacters);
    }

    private boolean isCharacter(char c, char[] cArr) {
        if (cArr == null) {
            return false;
        }
        for (char c2 : cArr) {
            if (c2 == c) {
                return true;
            }
        }
        return false;
    }

    public char[] getSplitCharacters() {
        return Arrays.copyOf(this.splitCharacters, this.splitCharacters.length);
    }

    public char[] getSplitXDigitsCharacters() {
        return Arrays.copyOf(this.splitXDigitsCharacters, this.splitXDigitsCharacters.length);
    }
}
