package org.apache.uima.textmarker.textruler.learner.lp2;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.textmarker.textruler.TextRulerPlugin;
import org.apache.uima.textmarker.textruler.core.TextRulerBasicLearner;
import org.apache.uima.textmarker.textruler.core.TextRulerExample;
import org.apache.uima.textmarker.textruler.core.TextRulerExampleDocument;
import org.apache.uima.textmarker.textruler.core.TextRulerRule;
import org.apache.uima.textmarker.textruler.core.TextRulerRuleList;
import org.apache.uima.textmarker.textruler.core.TextRulerShiftExample;
import org.apache.uima.textmarker.textruler.core.TextRulerStatisticsCollector;
import org.apache.uima.textmarker.textruler.core.TextRulerTarget;
import org.apache.uima.textmarker.textruler.core.TextRulerToolkit;
import org.apache.uima.textmarker.textruler.extension.TextRulerLearner;
import org.apache.uima.textmarker.textruler.extension.TextRulerLearnerDelegate;
import org.apache.uima.util.FileUtils;

/* loaded from: input_file:org/apache/uima/textmarker/textruler/learner/lp2/BasicLP2.class */
public abstract class BasicLP2 extends TextRulerBasicLearner {
    public static final String WINDOW_SIZE_KEY = "windowSize";
    public static final String CURRENT_BEST_RULES_SIZE_KEY = "currentBestRulesSize";
    public static final String CURRENT_CONTEXTUAL_RULES_SIZE_KEY = "currentContextualRulesSize";
    public static final String MIN_COVERED_POSITIVES_PER_RULE_KEY = "minCoveredPositivesPerRule";
    public static final String MAX_ERROR_THRESHOLD_KEY = "maxErrorThreshold";
    public static final int STANDARD_WINDOW_SIZE = 2;
    public static final int STANDARD_MAX_CURRENT_BEST_RULES_COUNT = 4;
    public static final int STANDARD_MAX_CONTEXTUAL_RULES_COUNT = 4;
    public static final int STANDARD_MIN_COVERED_POSITIVES_PER_RULE = 1;
    public static final float STANDARD_MAX_ERROR_THRESHOLD = 0.1f;
    public static final String CORRECTION_ANNOTATION_NAME = "lp2shift";
    private static final int STANDARD_SHIFT_SIZE = 2;
    protected int maxCurrentBestRulesCount;
    protected int maxCurrentContextualRulesCount;
    protected int windowSize;
    protected int shiftSize;
    protected int minCoveredPositives;
    protected float maxErrorThreshold;
    protected List<TextRulerExample> examples;
    protected Set<TextRulerExample> coveredExamples;
    protected int slotMaximumTokenCount;
    protected LP2CurrentBestRulesQueue currentBestRules;
    protected LP2CurrentBestRulesQueue currentContextualRules;
    protected TextRulerRuleList bestRulesPool;
    protected TextRulerRuleList contextRulesPool;
    protected String leftBoundaryBestRulesString;
    protected String rightBoundaryBestRulesString;
    protected String leftBoundaryContextualRulesString;
    protected String rightBoundaryContextualRulesString;

    public BasicLP2(String str, String str2, String str3, String[] strArr, Set<String> set, TextRulerLearnerDelegate textRulerLearnerDelegate) {
        super(str, str2, str3, strArr, set, textRulerLearnerDelegate);
        this.maxCurrentBestRulesCount = 4;
        this.maxCurrentContextualRulesCount = 4;
        this.windowSize = 2;
        this.shiftSize = 2;
        this.minCoveredPositives = 1;
        this.maxErrorThreshold = 0.1f;
        this.slotMaximumTokenCount = 0;
        this.leftBoundaryBestRulesString = null;
        this.rightBoundaryBestRulesString = null;
        this.leftBoundaryContextualRulesString = null;
        this.rightBoundaryContextualRulesString = null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public TextRulerRuleList learnTaggingRules(TextRulerTarget textRulerTarget, TextRulerRuleList textRulerRuleList) {
        if (textRulerTarget.type == TextRulerTarget.MLTargetType.SINGLE_LEFT_BOUNDARY) {
            sendStatusUpdateToDelegate("Creating Left-Boundary Examples...", TextRulerLearner.TextRulerLearnerState.ML_RUNNING, false);
        } else if (textRulerTarget.type == TextRulerTarget.MLTargetType.SINGLE_RIGHT_BOUNDARY) {
            sendStatusUpdateToDelegate("Creating Right-Boundary Examples...", TextRulerLearner.TextRulerLearnerState.ML_RUNNING, false);
        } else if (textRulerTarget.type == TextRulerTarget.MLTargetType.SINGLE_LEFT_CORRECTION) {
            sendStatusUpdateToDelegate("Creating Left Correction Examples...", TextRulerLearner.TextRulerLearnerState.ML_RUNNING, false);
        } else {
            sendStatusUpdateToDelegate("Creating Right Correction Examples...", TextRulerLearner.TextRulerLearnerState.ML_RUNNING, false);
        }
        this.exampleDocuments.clearCurrentExamples();
        this.exampleDocuments.createExamplesForTarget(textRulerTarget);
        this.examples = this.exampleDocuments.getAllPositiveExamples();
        if (shouldAbort()) {
            return null;
        }
        this.bestRulesPool = new TextRulerRuleList();
        this.contextRulesPool = new TextRulerRuleList();
        this.coveredExamples = new HashSet();
        int i = 0;
        for (TextRulerExample textRulerExample : this.examples) {
            if (!this.coveredExamples.contains(textRulerExample)) {
                if (shouldAbort()) {
                    break;
                }
                i++;
                this.currentBestRules = new LP2CurrentBestRulesQueue(this.maxCurrentBestRulesCount);
                this.currentContextualRules = new LP2CurrentBestRulesQueue(this.maxCurrentContextualRulesCount);
                induceRulesFromExample(textRulerExample, i);
                Iterator<LP2Rule> it = this.currentBestRules.iterator();
                while (it.hasNext()) {
                    addToFinalBestRulesPool(it.next());
                }
                Iterator<LP2Rule> it2 = this.currentContextualRules.iterator();
                while (it2.hasNext()) {
                    addToFinalContextRulesPool(it2.next());
                }
                sendStatusUpdateToDelegate("New Rules added.", TextRulerLearner.TextRulerLearnerState.ML_RUNNING, true);
            }
        }
        TextRulerRuleList textRulerRuleList2 = this.bestRulesPool;
        if (textRulerRuleList != null) {
            Iterator<TextRulerRule> it3 = this.contextRulesPool.iterator();
            while (it3.hasNext()) {
                textRulerRuleList.add(it3.next());
            }
        }
        this.bestRulesPool = null;
        this.contextRulesPool = null;
        return textRulerRuleList2;
    }

    @Override // org.apache.uima.textmarker.textruler.core.TextRulerBasicLearner, org.apache.uima.textmarker.textruler.core.CasCacheLoader
    public CAS loadCAS(String str, CAS cas) {
        CAS loadCAS = super.loadCAS(str, cas);
        prepareCASWithBoundaries(loadCAS);
        return loadCAS;
    }

    public void prepareCASWithBoundaries(CAS cas) {
        for (String str : this.slotNames) {
            TextRulerExampleDocument.createBoundaryAnnotationsForCas(cas, str, this.filterSet);
        }
    }

    public void prepareCachedCASesWithBoundaries() {
        Iterator<CAS> it = this.exampleDocuments.getCachedCASes().iterator();
        while (it.hasNext()) {
            prepareCASWithBoundaries(it.next());
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.uima.textmarker.textruler.core.TextRulerBasicLearner
    public void cleanUp() {
        super.cleanUp();
        this.examples = null;
        this.coveredExamples = null;
        this.currentBestRules = null;
        this.currentContextualRules = null;
        this.bestRulesPool = null;
        this.contextRulesPool = null;
    }

    @Override // org.apache.uima.textmarker.textruler.core.TextRulerBasicLearner
    protected void doRun() {
        TextRulerToolkit.logIfDebug("--- LP2 START");
        prepareCachedCASesWithBoundaries();
        for (int i = 0; i < this.slotNames.length; i++) {
            runForSlotName(this.slotNames[i]);
        }
        sendStatusUpdateToDelegate("Done", TextRulerLearner.TextRulerLearnerState.ML_DONE, true);
        TextRulerToolkit.logIfDebug("--- LP2 END");
    }

    protected void runForSlotName(String str) {
        sendStatusUpdateToDelegate("Creating slot length histogram...", TextRulerLearner.TextRulerLearnerState.ML_RUNNING, false);
        List<Integer> tokenCountHistogrammForSlotName = this.exampleDocuments.getTokenCountHistogrammForSlotName(str, TextRulerToolkit.getFilterSetWithSlotNames(this.slotNames, this.filterSet));
        if (shouldAbort()) {
            return;
        }
        this.slotMaximumTokenCount = tokenCountHistogrammForSlotName.size() - 1;
        TextRulerRuleList textRulerRuleList = new TextRulerRuleList();
        TextRulerRuleList learnTaggingRules = learnTaggingRules(new TextRulerTarget(str, TextRulerTarget.MLTargetType.SINGLE_LEFT_BOUNDARY, this), textRulerRuleList);
        if (learnTaggingRules != null) {
            this.leftBoundaryBestRulesString = learnTaggingRules.getRulesString("");
            this.leftBoundaryContextualRulesString = textRulerRuleList.getRulesString("\t");
            learnTaggingRules.clear();
        }
        if (shouldAbort()) {
            return;
        }
        textRulerRuleList.clear();
        TextRulerRuleList learnTaggingRules2 = learnTaggingRules(new TextRulerTarget(str, TextRulerTarget.MLTargetType.SINGLE_RIGHT_BOUNDARY, this), textRulerRuleList);
        if (learnTaggingRules2 != null) {
            this.rightBoundaryBestRulesString = learnTaggingRules2.getRulesString("");
            this.rightBoundaryContextualRulesString = textRulerRuleList.getRulesString("\t");
        }
        try {
            FileUtils.saveString2File("PACKAGE org.apache.uima.ml;\n\nDocument{->FILTERTYPE(SPACE, BREAK, NBSP, MARKUP)};\n", new File(tempDirectory() + "rules.tm"));
        } catch (IOException e) {
            TextRulerPlugin.error(e);
        }
        TextRulerTarget textRulerTarget = new TextRulerTarget(str, TextRulerTarget.MLTargetType.SINGLE_LEFT_CORRECTION, this);
        textRulerTarget.setMaxShiftDistance(this.shiftSize);
        learnTaggingRules(textRulerTarget, null);
        sendStatusUpdateToDelegate("SLOT Done", TextRulerLearner.TextRulerLearnerState.ML_RUNNING, true);
        TextRulerToolkit.logIfDebug("--- LP2 END FOR SLOT:" + str);
    }

    protected abstract void induceRulesFromExample(TextRulerExample textRulerExample, int i);

    protected void addToFinalContextRulesPool(LP2Rule lP2Rule) {
        if (this.contextRulesPool.contains(lP2Rule)) {
            return;
        }
        this.contextRulesPool.add(lP2Rule);
    }

    protected void addToFinalBestRulesPool(LP2Rule lP2Rule) {
        if (this.bestRulesPool.contains(lP2Rule)) {
            return;
        }
        this.bestRulesPool.add(lP2Rule);
        this.coveredExamples.addAll(lP2Rule.getCoveringStatistics().getCoveredPositiveExamples());
    }

    @Override // org.apache.uima.textmarker.textruler.extension.TextRulerLearner
    public String getResultString() {
        String str;
        String str2 = getTMFileHeaderString() + "// LEFT BOUNDARY RULES:\n";
        if (this.leftBoundaryBestRulesString != null) {
            String str3 = (str2 + this.leftBoundaryBestRulesString) + "\n// RIGHT BOUNDARY RULES:\n";
            if (this.rightBoundaryBestRulesString != null) {
                str3 = str3 + this.rightBoundaryBestRulesString;
            } else if (this.bestRulesPool != null) {
                str3 = str3 + this.bestRulesPool.getRulesString("");
            }
            String str4 = (((str3 + "\nBLOCK(contextualRules) Document{}\n{\n\tDocument{->ASSIGN(redoContextualRules, false)}; // reset flag\n") + "\n\t// LEFT BOUNDARY CONTEXTUAL RULES:\n") + this.leftBoundaryContextualRulesString) + "\n\t// RIGHT BOUNDARY CONTEXTUAL RULES:\n";
            if (this.rightBoundaryBestRulesString != null) {
                str4 = str4 + this.rightBoundaryContextualRulesString;
            } else if (this.contextRulesPool != null) {
                str4 = str4 + this.contextRulesPool.getRulesString("\t");
            }
            str2 = str4 + "\n\tDocument{IF(redoContextualRules)->CALL(thisFile.contextualRules)};\n}\n";
        } else if (this.bestRulesPool != null) {
            str2 = (str2 + this.bestRulesPool.getRulesString("")) + "\n\t// LEFT BOUNDARY CONTEXTUAL RULES:\n";
            if (this.contextRulesPool != null) {
                str2 = str2 + this.contextRulesPool.getRulesString("");
            }
        }
        String typeShortName = TextRulerToolkit.getTypeShortName(new TextRulerTarget(this.slotNames[0], TextRulerTarget.MLTargetType.SINGLE_LEFT_BOUNDARY, this).getSingleSlotTypeName());
        String typeShortName2 = TextRulerToolkit.getTypeShortName(new TextRulerTarget(this.slotNames[0], TextRulerTarget.MLTargetType.SINGLE_RIGHT_BOUNDARY, this).getSingleSlotTypeName());
        String typeShortName3 = TextRulerToolkit.getTypeShortName(this.slotNames[0]);
        int i = (this.slotMaximumTokenCount * 3) - 2;
        String str5 = ((str2 + "\n//slot-building rules:\n") + typeShortName + "{IS(" + typeShortName2 + ")->UNMARK(" + typeShortName + "), UNMARK(" + typeShortName2 + "), MARKONCE(" + typeShortName3 + ")};\n") + typeShortName + "{->UNMARK(" + typeShortName + ")} ";
        if (i > 0) {
            str = (str5 + "ANY[0, " + i + "]? ") + typeShortName2 + "{->UNMARK(" + typeShortName2 + "), MARKONCE(" + typeShortName3 + ", 1, 3)};\n";
        } else {
            str = str5 + typeShortName2 + "{->UNMARK(" + typeShortName2 + "), MARKONCE(" + typeShortName3 + ", 1, 2)};\n";
        }
        return str + "\n//cleaning up:\n" + typeShortName + "{->UNMARK(" + typeShortName + ")};\n" + typeShortName2 + "{->UNMARK(" + typeShortName2 + ")};\n";
    }

    @Override // org.apache.uima.textmarker.textruler.extension.TextRulerLearner
    public void setParameters(Map<String, Object> map) {
        if (map.containsKey("windowSize")) {
            this.windowSize = ((Integer) map.get("windowSize")).intValue();
        }
        if (map.containsKey(CURRENT_BEST_RULES_SIZE_KEY)) {
            this.maxCurrentBestRulesCount = ((Integer) map.get(CURRENT_BEST_RULES_SIZE_KEY)).intValue();
        }
        if (map.containsKey(CURRENT_CONTEXTUAL_RULES_SIZE_KEY)) {
            this.maxCurrentContextualRulesCount = ((Integer) map.get(CURRENT_CONTEXTUAL_RULES_SIZE_KEY)).intValue();
        }
        if (map.containsKey(MIN_COVERED_POSITIVES_PER_RULE_KEY)) {
            this.minCoveredPositives = ((Integer) map.get(MIN_COVERED_POSITIVES_PER_RULE_KEY)).intValue();
        }
        if (map.containsKey(MAX_ERROR_THRESHOLD_KEY)) {
            this.maxErrorThreshold = ((Float) map.get(MAX_ERROR_THRESHOLD_KEY)).floatValue();
        }
    }

    protected String correctionRulesInputDirectory(TextRulerTarget textRulerTarget) {
        return textRulerTarget.isLeftBoundary() ? tempDirectory() + "leftCorrectionDocs" : tempDirectory() + "rightCorrectionDocs";
    }

    protected boolean testTaggingRulesAndCreateCorrectionRulesExamples(TextRulerTarget textRulerTarget, int i) {
        try {
            File file = new File(correctionRulesInputDirectory(textRulerTarget));
            if (!file.exists()) {
                file.mkdir();
            }
            this.exampleDocuments.clearCurrentExamples();
            this.exampleDocuments.createExamplesForTarget(textRulerTarget);
            this.examples = this.exampleDocuments.getAllPositiveExamples();
            TextRulerExampleDocument[] sortedDocumentsInCacheOptimizedOrder = this.exampleDocuments.getSortedDocumentsInCacheOptimizedOrder();
            Type type = sortedDocumentsInCacheOptimizedOrder[0].getCAS().getTypeSystem().getType(TextRulerToolkit.TM_ANY_TYPE_NAME);
            FileUtils.saveString2File(FileUtils.file2String(new File("/testinput/testrules/rules.tm")), new File(getTempRulesFileName()));
            CAS testCAS = getTestCAS();
            for (TextRulerExampleDocument textRulerExampleDocument : sortedDocumentsInCacheOptimizedOrder) {
                TextRulerStatisticsCollector textRulerStatisticsCollector = new TextRulerStatisticsCollector();
                textRulerExampleDocument.resetAndFillTestCAS(testCAS, textRulerTarget);
                CAS cas = textRulerExampleDocument.getCAS();
                this.ae.process(testCAS);
                compareOriginalDocumentWithTestCAS(textRulerExampleDocument, testCAS, textRulerTarget, textRulerStatisticsCollector, true);
                List<TextRulerExample> positiveExamples = textRulerExampleDocument.getPositiveExamples();
                ArrayList<TextRulerExample> arrayList = new ArrayList(textRulerStatisticsCollector.getCoveredNegativeExamples());
                ArrayList arrayList2 = new ArrayList();
                for (TextRulerExample textRulerExample : arrayList) {
                    List<AnnotationFS> annotationsBeforePosition = TextRulerToolkit.getAnnotationsBeforePosition(cas, textRulerExample.getAnnotation().getBegin(), i, TextRulerToolkit.getFilterSetWithSlotNames(this.slotNames, this.filterSet), type);
                    List<AnnotationFS> annotationsAfterPosition = TextRulerToolkit.getAnnotationsAfterPosition(cas, textRulerExample.getAnnotation().getEnd(), i, TextRulerToolkit.getFilterSetWithSlotNames(this.slotNames, this.filterSet), type);
                    int i2 = 0;
                    TextRulerExample textRulerExample2 = null;
                    for (int size = annotationsBeforePosition.size() - 1; size >= 0; size--) {
                        i2++;
                        textRulerExample2 = TextRulerToolkit.exampleListContainsAnnotation(positiveExamples, TextRulerToolkit.convertToTargetAnnotation(annotationsBeforePosition.get(size), textRulerExampleDocument, textRulerTarget, cas.getTypeSystem()));
                        if (textRulerExample2 != null) {
                            break;
                        }
                    }
                    int i3 = 0;
                    TextRulerExample textRulerExample3 = null;
                    Iterator<AnnotationFS> it = annotationsAfterPosition.iterator();
                    while (it.hasNext()) {
                        i3++;
                        textRulerExample3 = TextRulerToolkit.exampleListContainsAnnotation(positiveExamples, TextRulerToolkit.convertToTargetAnnotation(it.next(), textRulerExampleDocument, textRulerTarget, cas.getTypeSystem()));
                        if (textRulerExample3 != null) {
                            break;
                        }
                    }
                    TextRulerExample textRulerExample4 = (i3 >= i2 || textRulerExample3 == null) ? (i3 <= i2 || textRulerExample2 == null) ? (textRulerTarget.type != TextRulerTarget.MLTargetType.SINGLE_LEFT_BOUNDARY || textRulerExample3 == null) ? textRulerExample2 : textRulerExample3 : textRulerExample2 : textRulerExample3;
                    if (textRulerExample4 != null) {
                        TextRulerToolkit.log("FOUND BAD EXAMPLE FOR SHIFTING !!");
                        arrayList2.add(new TextRulerShiftExample(textRulerExampleDocument, textRulerExample.getAnnotation(), textRulerExample4.getAnnotation(), true, textRulerTarget));
                    }
                }
                TextRulerToolkit.writeCAStoXMIFile(testCAS, file + File.pathSeparator + textRulerExampleDocument.getCasFileName());
            }
            testCAS.reset();
            return true;
        } catch (Exception e) {
            TextRulerPlugin.error(e);
            return false;
        }
    }

    @Override // org.apache.uima.textmarker.textruler.core.TextRulerBasicLearner
    public String getTMFileHeaderString() {
        return super.getTMFileHeaderString() + "BOOLEAN redoContextualRules;\n\n";
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.uima.textmarker.textruler.core.TextRulerBasicLearner
    public boolean checkForMandatoryTypes() {
        if (!super.checkForMandatoryTypes()) {
            return false;
        }
        TypeSystem typeSystem = getTestCAS().getTypeSystem();
        ArrayList<String> arrayList = new ArrayList();
        arrayList.add(new TextRulerTarget(this.slotNames[0], TextRulerTarget.MLTargetType.SINGLE_LEFT_BOUNDARY, this).getSingleSlotTypeName());
        arrayList.add(new TextRulerTarget(this.slotNames[0], TextRulerTarget.MLTargetType.SINGLE_RIGHT_BOUNDARY, this).getSingleSlotTypeName());
        boolean z = true;
        ArrayList arrayList2 = new ArrayList();
        for (String str : arrayList) {
            if (typeSystem.getType(str) == null) {
                arrayList2.add(str);
                z = false;
            }
        }
        String str2 = "";
        Iterator it = arrayList2.iterator();
        while (it.hasNext()) {
            str2 = str2 + ((String) it.next()) + ", ";
        }
        if (!StringUtils.isEmpty(str2)) {
            str2 = str2.substring(0, str2.length() - 2);
        }
        if (!z) {
            sendStatusUpdateToDelegate("Error: Some Slot- or Helper-Types were not found in TypeSystem: " + str2, TextRulerLearner.TextRulerLearnerState.ML_ERROR, false);
        }
        return z;
    }
}
