/*
 * Decompiled with CFR 0.152.
 */
package net.ontopia.topicmaps.classify;

import java.util.ArrayList;
import java.util.List;
import net.ontopia.topicmaps.classify.DelimiterTrimmerIF;
import net.ontopia.topicmaps.classify.Document;
import net.ontopia.topicmaps.classify.Region;
import net.ontopia.topicmaps.classify.TermDatabase;
import net.ontopia.topicmaps.classify.TermNormalizerIF;
import net.ontopia.topicmaps.classify.TextBlock;
import net.ontopia.topicmaps.classify.Token;
import net.ontopia.topicmaps.classify.TokenizerIF;

public class DocumentTokenizer {
    TermDatabase tdb;
    TokenizerIF tokenizer;
    DelimiterTrimmerIF delimiterTrimmer;
    List<TermNormalizerIF> termNormalizers = new ArrayList<TermNormalizerIF>();

    public DocumentTokenizer(TermDatabase tdb) {
        this.tdb = tdb;
    }

    public TermDatabase getTermDatabase() {
        return this.tdb;
    }

    public void setTermDatabase(TermDatabase tdb) {
        this.tdb = tdb;
    }

    public void setTokenizer(TokenizerIF tokenizer) {
        this.tokenizer = tokenizer;
    }

    public void setDelimiterTrimmer(DelimiterTrimmerIF trimmer) {
        this.delimiterTrimmer = trimmer;
    }

    public void addTermNormalizer(TermNormalizerIF normalizer) {
        this.termNormalizers.add(normalizer);
    }

    public void tokenize(Document doc) {
        this.tokenize(doc.getRoot());
        doc.setTokenized(true);
    }

    protected void tokenize(Region region) {
        for (Object child : region.getChildren()) {
            if (child instanceof TextBlock) {
                TextBlock tb = (TextBlock)child;
                this.tokenize(region, tb);
                continue;
            }
            Region tr = (Region)child;
            this.tokenize(tr);
        }
    }

    protected void tokenize(Region parent, TextBlock tb) {
        String text = tb.getText();
        this.tokenizer.setText(text);
        while (this.tokenizer.next()) {
            this.tokenize(tb, this.tokenizer.getToken());
        }
    }

    protected void tokenize(TextBlock tb, String token) {
        if (token == null) {
            return;
        }
        String delimiterBefore = null;
        String delimiterAfter = null;
        int six = this.delimiterTrimmer.trimStart(token);
        int eix = this.delimiterTrimmer.trimEnd(token);
        if (six > 0 && eix > six && eix < token.length() - 1) {
            delimiterBefore = token.substring(0, six);
            delimiterAfter = token.substring(eix + 1);
            token = token.substring(six, eix + 1);
        } else if (six > 0) {
            delimiterBefore = token.substring(0, six);
            token = token.substring(six);
        } else if (eix < token.length() - 1) {
            delimiterAfter = token.substring(eix + 1);
            token = token.substring(0, eix + 1);
        }
        String normalized = token;
        if (this.termNormalizers != null && !this.termNormalizers.isEmpty()) {
            TermNormalizerIF normalizer;
            int size = this.termNormalizers.size();
            for (int i = 0; i < size && (normalized = (normalizer = this.termNormalizers.get(i)).normalize(normalized)) != null; ++i) {
            }
        }
        Token t = normalized == null ? this.tdb.createDelimiter(normalized) : this.tdb.createVariant(normalized);
        if (delimiterBefore != null) {
            tb.addToken(this.tdb.createDelimiter(delimiterBefore));
        }
        tb.addToken(t);
        if (delimiterAfter != null) {
            tb.addToken(this.tdb.createDelimiter(delimiterAfter));
        }
    }
}

