/*
 * Decompiled with CFR 0.152.
 */
package net.ontopia.topicmaps.classify;

import java.util.Collection;
import net.ontopia.topicmaps.classify.CharacterAnalyzer;
import net.ontopia.topicmaps.classify.ClassifiableContentIF;
import net.ontopia.topicmaps.classify.CompoundAnalyzer;
import net.ontopia.topicmaps.classify.DefaultTokenizer;
import net.ontopia.topicmaps.classify.DistanceAnalyzer;
import net.ontopia.topicmaps.classify.Document;
import net.ontopia.topicmaps.classify.DocumentClassifier;
import net.ontopia.topicmaps.classify.DocumentTokenizer;
import net.ontopia.topicmaps.classify.FormatModule;
import net.ontopia.topicmaps.classify.JunkNormalizer;
import net.ontopia.topicmaps.classify.Language;
import net.ontopia.topicmaps.classify.RegexpTermAnalyzer;
import net.ontopia.topicmaps.classify.RegionBooster;
import net.ontopia.topicmaps.classify.RelativeScore;
import net.ontopia.topicmaps.classify.SpecialCharNormalizer;
import net.ontopia.topicmaps.classify.TermAnalyzerIF;
import net.ontopia.topicmaps.classify.TermDatabase;
import net.ontopia.topicmaps.classify.TermStemmerIF;
import net.ontopia.topicmaps.classify.TopicMapAnalyzer;
import net.ontopia.topicmaps.classify.Variant;
import net.ontopia.topicmaps.core.TopicIF;
import net.ontopia.topicmaps.core.TopicMapIF;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class TopicMapClassification {
    TermDatabase tdb = new TermDatabase();
    TopicMapAnalyzer ta;
    TermAnalyzerIF customTermAnalyzer;

    public TopicMapClassification() {
    }

    public TopicMapClassification(TopicMapIF topicmap) {
        this.ta = new TopicMapAnalyzer(topicmap);
    }

    public void setCustomTermAnalyzer(TermAnalyzerIF customTermAnalyzer) {
        this.customTermAnalyzer = customTermAnalyzer;
    }

    public void classify(ClassifiableContentIF cc) {
        Document doc = new Document();
        new FormatModule().readContent(cc, doc);
        DocumentTokenizer dt = new DocumentTokenizer(this.tdb);
        dt.setTokenizer(new DefaultTokenizer());
        SpecialCharNormalizer specialChars = new SpecialCharNormalizer();
        dt.setDelimiterTrimmer(specialChars);
        dt.addTermNormalizer(new JunkNormalizer());
        dt.addTermNormalizer(specialChars);
        dt.tokenize(doc);
        Language language = Language.detectLanguage(doc);
        DocumentClassifier dc = new DocumentClassifier(this.tdb);
        TermStemmerIF stemmer = language.getStemmer();
        dc.setTermStemmer(stemmer);
        dc.addDocumentAnalyzer(new DistanceAnalyzer());
        CompoundAnalyzer ca = new CompoundAnalyzer();
        ca.setTermStemmer(stemmer);
        dc.addDocumentAnalyzer(ca);
        RegionBooster rb = new RegionBooster();
        rb.addBoost("title", 1.15);
        dc.addTermAnalyzer(CharacterAnalyzer.getInstance());
        dc.addTermAnalyzer(language.getFrequencyAnalyzer());
        dc.addTermAnalyzer(new RegexpTermAnalyzer());
        dc.addTermAnalyzer(language.getStopListAnalyzer());
        if (this.customTermAnalyzer != null) {
            dc.addTermAnalyzer(this.customTermAnalyzer);
        }
        dc.addTermAnalyzer(ca);
        dc.addTermAnalyzer(language.getStopListAnalyzer());
        if (this.customTermAnalyzer != null) {
            dc.addTermAnalyzer(this.customTermAnalyzer);
        }
        if (this.ta != null) {
            dc.addTermAnalyzer(this.ta);
        }
        dc.addTermAnalyzer(new RelativeScore());
        dc.analyzeDocument(doc);
        dc.analyzeTerms();
    }

    public TermDatabase getTermDatabase() {
        return this.tdb;
    }

    public Collection<TopicIF> getTopics(Variant variant) {
        return this.ta.getTopics(variant);
    }

    public Collection<TopicIF> getCandidateTypes() {
        return this.ta.getCandidateTypes();
    }

    public Collection<TopicMapAnalyzer.AssociationType> getAssociationTypes() {
        return this.ta.getAssociationTypes();
    }
}

