package org.tribuo.util.tokens.impl;

import com.oracle.labs.mlrg.olcut.config.Config;
import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance;
import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.tribuo.util.tokens.Token;
import org.tribuo.util.tokens.Tokenizer;

/* loaded from: input_file:org/tribuo/util/tokens/impl/SplitPatternTokenizer.class */
public class SplitPatternTokenizer implements Tokenizer {
    public static final String SIMPLE_DEFAULT_PATTERN = "[\\.,]?\\s+";

    @Config(description = "The regex to split with.")
    private String splitPatternRegex;
    private Pattern splitPattern;
    private CharSequence cs;
    private int start;
    private int end;
    private Matcher matcher;
    private int prevMatchEnd;
    private boolean done;
    private boolean ready;

    public SplitPatternTokenizer() {
        this.splitPatternRegex = SIMPLE_DEFAULT_PATTERN;
        postConfig();
    }

    public SplitPatternTokenizer(String str) {
        this.splitPatternRegex = SIMPLE_DEFAULT_PATTERN;
        this.splitPatternRegex = str;
        postConfig();
    }

    public void postConfig() {
        this.splitPattern = Pattern.compile(this.splitPatternRegex);
        this.ready = false;
        this.cs = null;
    }

    /* renamed from: getProvenance, reason: merged with bridge method [inline-methods] */
    public ConfiguredObjectProvenance m13getProvenance() {
        return new ConfiguredObjectProvenanceImpl(this, "Tokenizer");
    }

    public String getSplitPatternRegex() {
        return this.splitPatternRegex;
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public void reset(CharSequence charSequence) {
        this.cs = charSequence;
        this.matcher = this.splitPattern.matcher(charSequence);
        this.start = -1;
        this.end = -1;
        this.prevMatchEnd = 0;
        this.done = false;
        this.ready = false;
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public boolean advance() {
        if (this.cs == null) {
            throw new IllegalStateException("SplitPatternTokenizer has not been reset.");
        }
        if (this.done) {
            return false;
        }
        if (!this.matcher.find()) {
            this.start = this.prevMatchEnd;
            this.end = this.cs.length();
            this.done = true;
            this.ready = this.start < this.end;
        } else {
            if (this.matcher.start() == 0) {
                this.prevMatchEnd = this.matcher.end();
                return advance();
            }
            this.start = this.prevMatchEnd;
            this.end = this.matcher.start();
            this.prevMatchEnd = this.matcher.end();
            this.ready = true;
        }
        return this.ready;
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public String getText() {
        if (this.ready) {
            return this.cs.subSequence(this.start, this.end).toString();
        }
        throw new IllegalStateException("SplitPatternTokenizer is not ready.");
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public int getStart() {
        if (this.ready) {
            return this.start;
        }
        throw new IllegalStateException("SplitPatternTokenizer is not ready.");
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public int getEnd() {
        if (this.ready) {
            return this.end;
        }
        throw new IllegalStateException("SplitPatternTokenizer is not ready.");
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    public Token.TokenType getType() {
        if (this.ready) {
            return Token.TokenType.WORD;
        }
        throw new IllegalStateException("SplitPatternTokenizer is not ready.");
    }

    @Override // org.tribuo.util.tokens.Tokenizer
    /* renamed from: clone, reason: merged with bridge method [inline-methods] */
    public SplitPatternTokenizer m12clone() {
        try {
            SplitPatternTokenizer splitPatternTokenizer = (SplitPatternTokenizer) super.clone();
            splitPatternTokenizer.postConfig();
            return splitPatternTokenizer;
        } catch (CloneNotSupportedException e) {
            throw new AssertionError("SplitPatternTokenizer is Cloneable, but the clone call failed.");
        }
    }
}
