package com.xzixi.algorithm.simhash.analyzer.extractor;

import com.xzixi.algorithm.simhash.common.AbstractKeywordsExtractor;
import com.xzixi.algorithm.simhash.common.SimHashException;
import java.io.IOException;
import java.util.List;
import org.lionsoul.jcseg.extractor.KeywordsExtractor;
import org.lionsoul.jcseg.extractor.impl.TextRankKeywordsExtractor;
import org.lionsoul.jcseg.tokenizer.core.DictionaryFactory;
import org.lionsoul.jcseg.tokenizer.core.JcsegException;
import org.lionsoul.jcseg.tokenizer.core.JcsegTaskConfig;
import org.lionsoul.jcseg.tokenizer.core.SegmentFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/xzixi/algorithm/simhash/analyzer/extractor/JcsegKeywordsExtractor.class */
public class JcsegKeywordsExtractor extends AbstractKeywordsExtractor {
    private static final Logger log = LoggerFactory.getLogger(JcsegKeywordsExtractor.class);
    private KeywordsExtractor extractor;

    public JcsegKeywordsExtractor() {
        JcsegTaskConfig jcsegTaskConfig = new JcsegTaskConfig(true);
        jcsegTaskConfig.setClearStopwords(true);
        jcsegTaskConfig.setAppendCJKSyn(false);
        jcsegTaskConfig.setKeepUnregWords(false);
        try {
            TextRankKeywordsExtractor textRankKeywordsExtractor = new TextRankKeywordsExtractor(SegmentFactory.createJcseg(2, new Object[]{jcsegTaskConfig, DictionaryFactory.createSingletonDictionary(jcsegTaskConfig)}));
            textRankKeywordsExtractor.setMaxIterateNum(100);
            textRankKeywordsExtractor.setWindowSize(5);
            textRankKeywordsExtractor.setKeywordsNum(15);
            this.extractor = textRankKeywordsExtractor;
        } catch (JcsegException e) {
            log.error("分词错误！");
            throw new SimHashException("分词错误！", e);
        }
    }

    protected List<String> seg(String str) {
        try {
            List<String> keywordsFromString = this.extractor.getKeywordsFromString(str);
            log.info(String.format("分词结果：%s", keywordsFromString.toString()));
            return keywordsFromString;
        } catch (IOException e) {
            log.error("提取关键词发生错误！", e);
            throw new SimHashException("提取关键词发生错误！");
        }
    }
}
