package eus.ixa.ixa.pipe.ml.pos;

import com.google.common.base.Joiner;
import eus.ixa.ixa.pipe.ml.utils.IOUtils;
import eus.ixa.ixa.pipe.ml.utils.Span;
import eus.ixa.ixa.pipe.ml.utils.StringUtils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.lang3.CharEncoding;

/* loaded from: input_file:eus/ixa/ixa/pipe/ml/pos/MultiWordMatcher.class */
public class MultiWordMatcher {
    private static final Pattern tabPattern = Pattern.compile(IOUtils.TAB_DELIMITER);
    private static final Pattern linePattern = Pattern.compile("#");
    private static Map<String, String> dictionary;

    public MultiWordMatcher(InputStream inputStream) throws IOException {
        if (dictionary == null) {
            loadDictionary(inputStream);
        }
    }

    private void loadDictionary(InputStream inputStream) throws IOException {
        dictionary = new HashMap();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, Charset.forName(CharEncoding.UTF_8)));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            String[] split = tabPattern.split(readLine);
            if (split.length == 4) {
                dictionary.put(linePattern.matcher(split[0].toLowerCase()).replaceAll(" "), split[2]);
            } else {
                System.err.println("WARNING: line starting with " + split[0] + " is not well-formed; skipping!!");
            }
        }
    }

    public final String[] getTokensWithMultiWords(String[] strArr) {
        Span[] multiWordsToSpans = multiWordsToSpans(strArr);
        ArrayList arrayList = new ArrayList(Arrays.asList(strArr));
        int i = 0;
        for (Span span : multiWordsToSpans) {
            int start = span.getStart() - i;
            int end = span.getEnd() - i;
            i = (i + arrayList.subList(start, end).size()) - 1;
            String join = Joiner.on("#").join(arrayList.subList(start, end));
            arrayList.subList(start, end).clear();
            arrayList.add(start, join);
        }
        return (String[]) arrayList.toArray(new String[arrayList.size()]);
    }

    public final Span[] multiWordsToSpans(String[] strArr) {
        int i;
        LinkedList linkedList = new LinkedList();
        int i2 = 0;
        while (i2 < strArr.length) {
            Span span = null;
            String[] strArr2 = new String[0];
            for (int i3 = i2; i3 < strArr.length && (i = (i3 - i2) + 1) <= getMaxTokenCount(); i3++) {
                String[] strArr3 = new String[i];
                System.arraycopy(strArr, i2, strArr3, 0, i);
                String str = dictionary.get(StringUtils.getStringFromTokens(strArr3).toLowerCase());
                if (str != null) {
                    span = new Span(i2, i3 + 1, str);
                }
            }
            if (span != null) {
                linkedList.add(span);
                i2 += span.length() - 1;
            }
            i2++;
        }
        return (Span[]) linkedList.toArray(new Span[linkedList.size()]);
    }

    public int getMaxTokenCount() {
        return dictionary.size();
    }
}
