/*
 * Decompiled with CFR 0.152.
 */
package eu.stratosphere.sopremo.tokenizer;

import eu.stratosphere.sopremo.tokenizer.AbstractTokenizer;
import eu.stratosphere.sopremo.type.CachingArrayNode;
import eu.stratosphere.sopremo.type.TextNode;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javolution.text.TextFormat;

public class RegexTokenizer
extends AbstractTokenizer {
    public static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\p{javaWhitespace}+");
    private Pattern pattern = WHITESPACE_PATTERN;

    public RegexTokenizer() {
    }

    public RegexTokenizer(Pattern pattern) {
        this.pattern = pattern;
    }

    public void appendAsString(Appendable appendable) throws IOException {
        appendable.append("RegexTokenizer [pattern=");
        TextFormat.getInstance(Pattern.class).format((Object)this.pattern, appendable);
        appendable.append("]");
    }

    public Pattern getPattern() {
        return this.pattern;
    }

    public void setPattern(Pattern pattern) {
        if (pattern == null) {
            throw new NullPointerException("pattern must not be null");
        }
        this.pattern = pattern;
    }

    @Override
    public void tokenizeInto(CharSequence text, CachingArrayNode<TextNode> tokens) {
        Matcher matcher = this.pattern.matcher(text);
        tokens.clear();
        if (!matcher.find()) {
            this.addToken(tokens, text, 0, text.length());
            return;
        }
        int start = 0;
        int end = 0;
        do {
            if ((end = matcher.start()) > start) {
                this.addToken(tokens, text, start, end);
            }
            start = matcher.end();
        } while (matcher.find());
        this.addToken(tokens, text, start, text.length());
    }
}

