package com.xzixi.algorithm.simhash.common;

import com.xzixi.algorithm.simhash.common.util.CommonUtils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/xzixi/algorithm/simhash/common/AbstractKeywordsExtractor.class */
public abstract class AbstractKeywordsExtractor implements KeywordsExtractor {
    private static final Logger log = LoggerFactory.getLogger(AbstractKeywordsExtractor.class);
    private Set<String> stopWord = new HashSet();
    private Map<String, Double> idfMap = new HashMap();
    private double avgIdf;

    public AbstractKeywordsExtractor() {
        loadStopWord(Config.stopWordPath());
        loadIdf(Config.idfPath());
    }

    protected abstract List<String> seg(String str);

    @Override // com.xzixi.algorithm.simhash.common.KeywordsExtractor
    public Map<String, Double> extract(String str) {
        List<String> seg = seg(str);
        HashMap hashMap = new HashMap();
        for (String str2 : seg) {
            if (hashMap.containsKey(str2)) {
                hashMap.put(str2, Double.valueOf(((Double) hashMap.get(str2)).doubleValue() + 1.0d));
            } else {
                hashMap.put(str2, Double.valueOf(1.0d));
            }
        }
        Iterator it = hashMap.entrySet().iterator();
        while (it.hasNext()) {
            String str3 = (String) ((Map.Entry) it.next()).getKey();
            if (stop(str3)) {
                it.remove();
            } else {
                hashMap.put(str3, Double.valueOf(((Double) hashMap.get(str3)).doubleValue() * idf(str3)));
            }
        }
        log.info(String.format("关键词和权重：%s", hashMap));
        return hashMap;
    }

    private double idf(String str) {
        Double d = this.idfMap.get(str);
        return d != null ? d.doubleValue() : this.avgIdf;
    }

    private boolean stop(String str) {
        return this.stopWord.contains(str);
    }

    private void loadStopWord(String str) {
        InputStream inputStream = CommonUtils.getInputStream(str);
        if (inputStream == null) {
            String format = String.format("加载停用词词库错误！stopWordPath：%s。", str);
            log.error(format);
            throw new SimHashException(format);
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
        while (true) {
            try {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    return;
                } else {
                    this.stopWord.add(readLine);
                }
            } catch (IOException e) {
                log.error("读取停用词词库错误！", e);
                throw new SimHashException("读取停用词词库错误！");
            }
        }
    }

    private void loadIdf(String str) {
        InputStream inputStream = CommonUtils.getInputStream(str);
        if (inputStream == null) {
            String format = String.format("加载关键词权重词库错误！idfPath：%s。", str);
            log.error(format);
            throw new SimHashException(format);
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
        double d = 0.0d;
        while (true) {
            try {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    this.avgIdf = d / this.idfMap.size();
                    return;
                }
                String[] split = readLine.split("\\s+");
                if (split.length == 2) {
                    String str2 = split[0];
                    double parseDouble = Double.parseDouble(split[1]);
                    Double put = this.idfMap.put(str2, Double.valueOf(parseDouble));
                    if (put != null) {
                        d -= put.doubleValue();
                    }
                    d += parseDouble;
                }
            } catch (IOException e) {
                log.error("读取关键词权重词库错误！", e);
                throw new SimHashException("读取关键词权重词库错误！");
            }
        }
    }
}
