package org.apache.uima.textmarker.textruler.learner.rapier;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.textmarker.textruler.core.TextRulerAnnotation;
import org.apache.uima.textmarker.textruler.core.TextRulerBasicLearner;
import org.apache.uima.textmarker.textruler.core.TextRulerExample;
import org.apache.uima.textmarker.textruler.core.TextRulerRule;
import org.apache.uima.textmarker.textruler.core.TextRulerRuleItem;
import org.apache.uima.textmarker.textruler.core.TextRulerRuleList;
import org.apache.uima.textmarker.textruler.core.TextRulerRulePattern;
import org.apache.uima.textmarker.textruler.core.TextRulerStatisticsCollector;
import org.apache.uima.textmarker.textruler.core.TextRulerTarget;
import org.apache.uima.textmarker.textruler.core.TextRulerToolkit;
import org.apache.uima.textmarker.textruler.core.TextRulerWordConstraint;
import org.apache.uima.textmarker.textruler.extension.TextRulerLearner;
import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerDelegate;

/* loaded from: input_file:org/apache/uima/textmarker/textruler/learner/rapier/Rapier.class */
public class Rapier extends TextRulerBasicLearner {
    public static final String COMPRESSION_FAIL_MAX_COUNT_KEY = "compressionFailMaxCount";
    public static final String RULELIST_SIZE_KEY = "ruleListSize";
    public static final String PAIR_COUNT_KEY = "pairCount";
    public static final String LIM_NO_IMPROVEMENTS_KEY = "limNoImprovements";
    public static final String NOISE_THESHOLD_KEY = "noiseThreshold";
    public static final String POSTAG_ROOTTYPE_KEY = "posTagRootType";
    public static final String MIN_COVERED_POSITIVES_KEY = "minCoveredPositives";
    public static final String USE_ALL_GENSETS_AT_SPECIALIZATION_KEY = "useAllGenSetsAtSpecialization";
    public static final int STANDARD_COMPRESSION_FAIL_MAX_COUNT = 3;
    public static final int STANDARD_RULELIST_SIZE = 50;
    public static final int STANDARD_PAIR_COUNT = 4;
    public static final int STANDARD_LIM_NO_IMPROVEMENTS = 3;
    public static final float STANDARD_NOISE_THREHSOLD = 0.9f;
    public static final String STANDARD_POSTAG_ROOTTYPE = "org.apache.uima.ml.ML.postag";
    public static final int STANDARD_MIN_COVERED_POSITIVES = 1;
    public static final boolean STANDARD_USE_ALL_GENSETS_AT_SPECIALIZATION = true;
    private int compressionFailMaxCount;
    private int ruleListSize;
    private int pairCount;
    private int limNoImprovements;
    private float noiseThreshold;
    private String posTagRootTypeName;
    private int minCoveredPositives;
    private boolean useAllGenSetsAtSpecialization;
    private Map<String, TextRulerStatisticsCollector> cachedTestedRuleStatistics;
    private int initialRuleBaseSize;
    private List<TextRulerExample> examples;
    private TextRulerRuleList slotRules;
    private RapierRulePriorityQueue ruleList;
    private String currentSlotName;

    public Rapier(String str, String str2, String str3, String[] strArr, Set<String> set, TextRulerLearnerDelegate textRulerLearnerDelegate) {
        super(str, str2, str3, strArr, set, textRulerLearnerDelegate);
        this.compressionFailMaxCount = 3;
        this.ruleListSize = 50;
        this.pairCount = 4;
        this.limNoImprovements = 3;
        this.noiseThreshold = 0.9f;
        this.posTagRootTypeName = "org.apache.uima.ml.ML.postag";
        this.minCoveredPositives = 1;
        this.useAllGenSetsAtSpecialization = true;
        this.cachedTestedRuleStatistics = new HashMap();
    }

    @Override // org.apache.uima.textmarker.textruler.core.TextRulerBasicLearner
    protected void doRun() {
        for (int i = 0; i < this.slotNames.length; i++) {
            int i2 = 0;
            this.currentSlotName = this.slotNames[i];
            this.cachedTestedRuleStatistics.clear();
            this.exampleDocuments.createExamplesForTarget(new TextRulerTarget(this.currentSlotName, this));
            this.examples = this.exampleDocuments.getAllPositiveExamples();
            if (shouldAbort()) {
                return;
            }
            this.slotRules = new TextRulerRuleList();
            this.ruleList = new RapierRulePriorityQueue(this.ruleListSize);
            TextRulerToolkit.log("--- RAPIER START for Slot " + this.currentSlotName);
            sendStatusUpdateToDelegate("Creating initial rule base...", TextRulerLearner.TextRulerLearnerState.ML_INITIALIZING, false);
            fillSlotRulesWithMostSpecificRules();
            updateCompressionStatusString();
            while (i2 < this.compressionFailMaxCount) {
                TextRulerToolkit.log("***** NEW COMPRESSION ROUND; FailCount = " + i2);
                if (shouldAbort()) {
                    return;
                }
                RapierRule findNewRule = findNewRule();
                if (findNewRule == null || findNewRule.getCoveringStatistics().getCoveredPositivesCount() < this.minCoveredPositives || findNewRule.noiseValue() < this.noiseThreshold || this.slotRules.contains(findNewRule)) {
                    i2++;
                } else {
                    addRuleAndRemoveEmpiricallySubsumedRules(findNewRule);
                }
            }
        }
        sendStatusUpdateToDelegate("Done", TextRulerLearner.TextRulerLearnerState.ML_DONE, true);
        this.cachedTestedRuleStatistics.clear();
        TextRulerToolkit.log("--- RAPIER END");
    }

    private void updateCompressionStatusString() {
        sendStatusUpdateToDelegate("Compressing... (Rules = " + this.slotRules.size() + "/" + this.initialRuleBaseSize + "  = " + Math.round((this.slotRules.size() / this.initialRuleBaseSize) * 100.0d) + " % ratio)", TextRulerLearner.TextRulerLearnerState.ML_RUNNING, true);
    }

    private void addAvailablePosTagConstraintToItem(RapierRuleItem rapierRuleItem, AnnotationFS annotationFS, TextRulerExample textRulerExample) {
        if (this.posTagRootTypeName == null || this.posTagRootTypeName.length() <= 0) {
            return;
        }
        CAS documentCAS = textRulerExample.getDocumentCAS();
        TypeSystem typeSystem = documentCAS.getTypeSystem();
        Type type = typeSystem.getType(this.posTagRootTypeName);
        if (typeSystem != null) {
            List<AnnotationFS> annotationsWithinBounds = TextRulerToolkit.getAnnotationsWithinBounds(documentCAS, annotationFS.getBegin(), annotationFS.getEnd(), null, type);
            if (annotationsWithinBounds.size() > 0) {
                AnnotationFS annotationFS2 = annotationsWithinBounds.get(0);
                if (annotationFS2.getBegin() == annotationFS.getBegin() && annotationFS2.getEnd() == annotationFS.getEnd()) {
                    rapierRuleItem.addTagConstraint(annotationFS2.getType().getShortName());
                }
            }
        }
    }

    private void fillSlotRulesWithMostSpecificRules() {
        this.slotRules.clear();
        for (TextRulerExample textRulerExample : this.examples) {
            RapierRule rapierRule = new RapierRule(this, textRulerExample.getTarget());
            TextRulerAnnotation annotation = textRulerExample.getAnnotation();
            Type type = textRulerExample.getDocumentCAS().getTypeSystem().getType(TextRulerToolkit.TM_ANY_TYPE_NAME);
            List<AnnotationFS> annotationsBeforePosition = TextRulerToolkit.getAnnotationsBeforePosition(textRulerExample.getDocumentCAS(), annotation.getBegin(), -1, TextRulerToolkit.getFilterSetWithSlotNames(this.slotNames, this.filterSet), type);
            List<AnnotationFS> annotationsAfterPosition = TextRulerToolkit.getAnnotationsAfterPosition(textRulerExample.getDocumentCAS(), annotation.getEnd(), -1, TextRulerToolkit.getFilterSetWithSlotNames(this.slotNames, this.filterSet), type);
            List<AnnotationFS> annotationsWithinBounds = TextRulerToolkit.getAnnotationsWithinBounds(textRulerExample.getDocumentCAS(), annotation.getBegin(), annotation.getEnd(), TextRulerToolkit.getFilterSetWithSlotNames(this.slotNames, this.filterSet), type);
            for (int size = annotationsBeforePosition.size() - 1; size >= 0; size--) {
                AnnotationFS annotationFS = annotationsBeforePosition.get(size);
                RapierRuleItem rapierRuleItem = new RapierRuleItem();
                rapierRuleItem.addWordConstraint(new TextRulerWordConstraint(new TextRulerAnnotation(annotationFS, textRulerExample.getDocument())));
                addAvailablePosTagConstraintToItem(rapierRuleItem, annotationFS, textRulerExample);
                rapierRule.addPreFillerItem(rapierRuleItem);
            }
            for (AnnotationFS annotationFS2 : annotationsWithinBounds) {
                RapierRuleItem rapierRuleItem2 = new RapierRuleItem();
                rapierRuleItem2.addWordConstraint(new TextRulerWordConstraint(new TextRulerAnnotation(annotationFS2, textRulerExample.getDocument())));
                addAvailablePosTagConstraintToItem(rapierRuleItem2, annotationFS2, textRulerExample);
                rapierRule.addFillerItem(rapierRuleItem2);
            }
            for (AnnotationFS annotationFS3 : annotationsAfterPosition) {
                RapierRuleItem rapierRuleItem3 = new RapierRuleItem();
                rapierRuleItem3.addWordConstraint(new TextRulerWordConstraint(new TextRulerAnnotation(annotationFS3, textRulerExample.getDocument())));
                addAvailablePosTagConstraintToItem(rapierRuleItem3, annotationFS3, textRulerExample);
                rapierRule.addPostFillerItem(rapierRuleItem3);
            }
            TextRulerStatisticsCollector textRulerStatisticsCollector = new TextRulerStatisticsCollector();
            textRulerStatisticsCollector.addCoveredPositive(textRulerExample);
            rapierRule.setCoveringStatistics(textRulerStatisticsCollector);
            this.slotRules.add(rapierRule);
        }
        this.initialRuleBaseSize = this.slotRules.size();
    }

    protected void addRuleAndRemoveEmpiricallySubsumedRules(RapierRule rapierRule) {
        if (this.slotRules.contains(rapierRule)) {
            return;
        }
        ArrayList arrayList = new ArrayList();
        Set<TextRulerExample> coveredPositiveExamples = rapierRule.getCoveringStatistics().getCoveredPositiveExamples();
        Iterator<TextRulerRule> it = this.slotRules.iterator();
        while (it.hasNext()) {
            TextRulerRule next = it.next();
            if (coveredPositiveExamples.containsAll(next.getCoveringStatistics().getCoveredPositiveExamples())) {
                arrayList.add(next);
            }
        }
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            this.slotRules.remove((TextRulerRule) it2.next());
        }
        this.slotRules.add(rapierRule);
        updateCompressionStatusString();
    }

    /* JADX WARN: Code restructure failed: missing block: B:105:0x0420, code lost:
    
        return r6.ruleList.peek();
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    protected org.apache.uima.textmarker.textruler.learner.rapier.RapierRule findNewRule() {
        /*
            Method dump skipped, instructions count: 1057
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: org.apache.uima.textmarker.textruler.learner.rapier.Rapier.findNewRule():org.apache.uima.textmarker.textruler.learner.rapier.RapierRule");
    }

    private List<RapierRule> getFillerGeneralizationsForRulePair(RapierRule rapierRule, RapierRule rapierRule2) {
        TextRulerToolkit.log("------------------------------------------------------------------------------------------");
        TextRulerToolkit.log("getFillerGeneralizationsForRulePair:");
        TextRulerToolkit.log("Rule1: " + rapierRule.getRuleString());
        TextRulerToolkit.log("Rule2: " + rapierRule2.getRuleString());
        ArrayList arrayList = new ArrayList();
        for (TextRulerRulePattern textRulerRulePattern : RapierGeneralizationHelper.getGeneralizationsForRuleItemPatterns(rapierRule.getFillerPattern(), rapierRule2.getFillerPattern())) {
            RapierRule rapierRule3 = new RapierRule(this, rapierRule.getTarget());
            Iterator<TextRulerRuleItem> it = textRulerRulePattern.iterator();
            while (it.hasNext()) {
                rapierRule3.addFillerItem(it.next().copy());
            }
            rapierRule3.setParent1(rapierRule.copy());
            rapierRule3.setParent1PreFiller_n(0);
            rapierRule3.setParent1PostFiller_n(0);
            rapierRule3.setParent2(rapierRule2.copy());
            rapierRule3.setParent2PreFiller_n(0);
            rapierRule3.setParent2PostFiller_n(0);
            arrayList.add(rapierRule3);
            rapierRule3.setNeedsCompile(true);
        }
        TextRulerToolkit.log("   getGeneralizationsForRulePair result list size = " + arrayList.size());
        return arrayList;
    }

    public List<RapierRule> specializePreFiller(RapierRule rapierRule, int i) {
        RapierRule parent1 = rapierRule.getParent1();
        RapierRule parent2 = rapierRule.getParent2();
        int parent1PreFiller_n = rapierRule.getParent1PreFiller_n();
        int parent2PreFiller_n = rapierRule.getParent2PreFiller_n();
        TextRulerRulePattern preFillerPattern = parent1.getPreFillerPattern();
        TextRulerRulePattern preFillerPattern2 = parent2.getPreFillerPattern();
        int size = (preFillerPattern.size() - parent1PreFiller_n) - 1;
        int size2 = (preFillerPattern2.size() - parent2PreFiller_n) - 1;
        TextRulerRulePattern textRulerRulePattern = new TextRulerRulePattern();
        TextRulerRulePattern textRulerRulePattern2 = new TextRulerRulePattern();
        for (int size3 = preFillerPattern.size() - i; size3 >= 0 && size3 <= size; size3++) {
            textRulerRulePattern.add(preFillerPattern.get(size3));
        }
        for (int size4 = (preFillerPattern2.size() - i) + 1; size4 >= 0 && size4 <= size2; size4++) {
            textRulerRulePattern2.add(preFillerPattern2.get(size4));
        }
        ArrayList<TextRulerRulePattern> generalizationsForRuleItemPatterns = textRulerRulePattern.size() + textRulerRulePattern2.size() > 0 ? RapierGeneralizationHelper.getGeneralizationsForRuleItemPatterns(textRulerRulePattern, textRulerRulePattern2) : null;
        ArrayList<TextRulerRulePattern> arrayList = null;
        if (this.useAllGenSetsAtSpecialization) {
            textRulerRulePattern.clear();
            textRulerRulePattern2.clear();
            for (int size5 = (preFillerPattern.size() - i) + 1; size5 >= 0 && size5 <= size; size5++) {
                textRulerRulePattern.add(preFillerPattern.get(size5));
            }
            for (int size6 = preFillerPattern2.size() - i; size6 >= 0 && size6 <= size2; size6++) {
                textRulerRulePattern2.add(preFillerPattern2.get(size6));
            }
            r18 = textRulerRulePattern.size() + textRulerRulePattern2.size() > 0 ? RapierGeneralizationHelper.getGeneralizationsForRuleItemPatterns(textRulerRulePattern, textRulerRulePattern2) : null;
            textRulerRulePattern.clear();
            textRulerRulePattern2.clear();
            for (int size7 = preFillerPattern.size() - i; size7 >= 0 && size7 <= size; size7++) {
                textRulerRulePattern.add(preFillerPattern.get(size7));
            }
            for (int size8 = preFillerPattern2.size() - i; size8 >= 0 && size8 <= size2; size8++) {
                textRulerRulePattern2.add(preFillerPattern2.get(size8));
            }
            if (textRulerRulePattern.size() + textRulerRulePattern2.size() > 0) {
                arrayList = RapierGeneralizationHelper.getGeneralizationsForRuleItemPatterns(textRulerRulePattern, textRulerRulePattern2);
            }
        }
        HashSet<TextRulerRulePattern> hashSet = new HashSet();
        if (generalizationsForRuleItemPatterns != null) {
            hashSet.addAll(generalizationsForRuleItemPatterns);
        }
        if (r18 != null) {
            hashSet.addAll(r18);
        }
        if (arrayList != null) {
            hashSet.addAll(arrayList);
        }
        ArrayList arrayList2 = new ArrayList();
        for (TextRulerRulePattern textRulerRulePattern3 : hashSet) {
            RapierRule copy = rapierRule.copy();
            for (int size9 = textRulerRulePattern3.size() - 1; size9 >= 0; size9--) {
                copy.addPreFillerItem(textRulerRulePattern3.get(size9));
            }
            copy.setParent1PreFiller_n(i);
            copy.setParent2PreFiller_n(i);
            arrayList2.add(copy);
        }
        return arrayList2;
    }

    public List<RapierRule> specializePostFiller(RapierRule rapierRule, int i) {
        if (i == 0) {
            TextRulerToolkit.log("ERROR ! N SHOULD NOT BE 0!");
        }
        RapierRule parent1 = rapierRule.getParent1();
        RapierRule parent2 = rapierRule.getParent2();
        int parent1PostFiller_n = rapierRule.getParent1PostFiller_n();
        int parent2PostFiller_n = rapierRule.getParent2PostFiller_n();
        TextRulerRulePattern postFillerPattern = parent1.getPostFillerPattern();
        TextRulerRulePattern postFillerPattern2 = parent2.getPostFillerPattern();
        TextRulerRulePattern textRulerRulePattern = new TextRulerRulePattern();
        TextRulerRulePattern textRulerRulePattern2 = new TextRulerRulePattern();
        for (int i2 = parent1PostFiller_n; i2 < postFillerPattern.size() && i2 < i; i2++) {
            textRulerRulePattern.add(postFillerPattern.get(i2));
        }
        for (int i3 = parent2PostFiller_n; i3 < postFillerPattern2.size() && i3 < i - 1; i3++) {
            textRulerRulePattern2.add(postFillerPattern2.get(i3));
        }
        ArrayList<TextRulerRulePattern> generalizationsForRuleItemPatterns = textRulerRulePattern.size() + textRulerRulePattern2.size() > 0 ? RapierGeneralizationHelper.getGeneralizationsForRuleItemPatterns(textRulerRulePattern, textRulerRulePattern2) : null;
        textRulerRulePattern.clear();
        textRulerRulePattern2.clear();
        for (int i4 = parent1PostFiller_n; i4 < postFillerPattern.size() && i4 < i - 1; i4++) {
            textRulerRulePattern.add(postFillerPattern.get(i4));
        }
        for (int i5 = parent2PostFiller_n; i5 < postFillerPattern2.size() && i5 < i; i5++) {
            textRulerRulePattern2.add(postFillerPattern2.get(i5));
        }
        ArrayList<TextRulerRulePattern> generalizationsForRuleItemPatterns2 = textRulerRulePattern.size() + textRulerRulePattern2.size() > 0 ? RapierGeneralizationHelper.getGeneralizationsForRuleItemPatterns(textRulerRulePattern, textRulerRulePattern2) : null;
        textRulerRulePattern.clear();
        textRulerRulePattern2.clear();
        for (int i6 = parent1PostFiller_n; i6 < postFillerPattern.size() && i6 < i; i6++) {
            textRulerRulePattern.add(postFillerPattern.get(i6));
        }
        for (int i7 = parent2PostFiller_n; i7 < postFillerPattern2.size() && i7 < i; i7++) {
            textRulerRulePattern2.add(postFillerPattern2.get(i7));
        }
        ArrayList<TextRulerRulePattern> generalizationsForRuleItemPatterns3 = textRulerRulePattern.size() + textRulerRulePattern2.size() > 0 ? RapierGeneralizationHelper.getGeneralizationsForRuleItemPatterns(textRulerRulePattern, textRulerRulePattern2) : null;
        HashSet<TextRulerRulePattern> hashSet = new HashSet();
        if (generalizationsForRuleItemPatterns != null) {
            hashSet.addAll(generalizationsForRuleItemPatterns);
        }
        if (generalizationsForRuleItemPatterns2 != null) {
            hashSet.addAll(generalizationsForRuleItemPatterns2);
        }
        if (generalizationsForRuleItemPatterns3 != null) {
            hashSet.addAll(generalizationsForRuleItemPatterns3);
        }
        ArrayList arrayList = new ArrayList();
        for (TextRulerRulePattern textRulerRulePattern3 : hashSet) {
            RapierRule copy = rapierRule.copy();
            Iterator<TextRulerRuleItem> it = textRulerRulePattern3.iterator();
            while (it.hasNext()) {
                copy.addPostFillerItem(it.next());
            }
            copy.setParent1PostFiller_n(i);
            copy.setParent2PostFiller_n(i);
            arrayList.add(copy);
        }
        return arrayList;
    }

    @Override // org.apache.uima.textmarker.textruler.core.TextRulerBasicLearner
    public boolean collectNegativeCoveredInstancesWhenTesting() {
        return false;
    }

    @Override // org.apache.uima.textmarker.textruler.extension.TextRulerLearner
    public String getResultString() {
        return this.slotRules != null ? this.slotRules.getTMFileString(getTMFileHeaderString(), 1000) : "No results available yet!";
    }

    @Override // org.apache.uima.textmarker.textruler.extension.TextRulerLearner
    public void setParameters(Map<String, Object> map) {
        if (map.containsKey(COMPRESSION_FAIL_MAX_COUNT_KEY)) {
            this.compressionFailMaxCount = ((Integer) map.get(COMPRESSION_FAIL_MAX_COUNT_KEY)).intValue();
        }
        if (map.containsKey(RULELIST_SIZE_KEY)) {
            this.ruleListSize = ((Integer) map.get(RULELIST_SIZE_KEY)).intValue();
        }
        if (map.containsKey(PAIR_COUNT_KEY)) {
            this.pairCount = ((Integer) map.get(PAIR_COUNT_KEY)).intValue();
        }
        if (map.containsKey(LIM_NO_IMPROVEMENTS_KEY)) {
            this.limNoImprovements = ((Integer) map.get(LIM_NO_IMPROVEMENTS_KEY)).intValue();
        }
        if (map.containsKey(NOISE_THESHOLD_KEY)) {
            this.noiseThreshold = ((Float) map.get(NOISE_THESHOLD_KEY)).floatValue();
        }
        if (map.containsKey("posTagRootType")) {
            this.posTagRootTypeName = (String) map.get("posTagRootType");
        }
        if (map.containsKey(MIN_COVERED_POSITIVES_KEY)) {
            this.minCoveredPositives = ((Integer) map.get(MIN_COVERED_POSITIVES_KEY)).intValue();
        }
        if (map.containsKey(USE_ALL_GENSETS_AT_SPECIALIZATION_KEY)) {
            this.useAllGenSetsAtSpecialization = ((Boolean) map.get(USE_ALL_GENSETS_AT_SPECIALIZATION_KEY)).booleanValue();
        }
    }

    protected void testRulesIfNotCached(List<RapierRule> list) {
        ArrayList arrayList = new ArrayList();
        for (RapierRule rapierRule : list) {
            String ruleString = rapierRule.getRuleString();
            if (this.cachedTestedRuleStatistics.containsKey(ruleString)) {
                rapierRule.setCoveringStatistics(this.cachedTestedRuleStatistics.get(ruleString).copy());
                TextRulerToolkit.log("CACHE HIT; size=" + this.cachedTestedRuleStatistics.size());
            } else {
                arrayList.add(rapierRule);
            }
        }
        if (arrayList.size() > 0) {
            testRulesOnDocumentSet(arrayList, this.exampleDocuments);
            if (shouldAbort()) {
                return;
            }
            while (this.cachedTestedRuleStatistics.size() + arrayList.size() > 10000 && this.cachedTestedRuleStatistics.keySet().iterator().hasNext()) {
                this.cachedTestedRuleStatistics.remove(this.cachedTestedRuleStatistics.keySet().iterator().next());
            }
            for (TextRulerRule textRulerRule : arrayList) {
                this.cachedTestedRuleStatistics.put(textRulerRule.getRuleString(), textRulerRule.getCoveringStatistics().copy());
            }
        }
    }
}
