package org.corpus_tools.salt.common.tokenizer.tests;

import com.neovisionaries.i18n.LanguageCode;
import java.util.List;
import java.util.Vector;
import org.corpus_tools.salt.SaltFactory;
import org.corpus_tools.salt.common.SDocumentGraph;
import org.corpus_tools.salt.common.STextualDS;
import org.corpus_tools.salt.common.STextualRelation;
import org.corpus_tools.salt.common.tokenizer.Clitics;
import org.corpus_tools.salt.common.tokenizer.Tokenizer;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

/* loaded from: input_file:org/corpus_tools/salt/common/tokenizer/tests/TokenizerTest.class */
public class TokenizerTest {
    private Tokenizer fixture = null;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/corpus_tools/salt/common/tokenizer/tests/TokenizerTest$Token.class */
    public static class Token {
        String text;
        Integer startPos;
        Integer endPos;

        public Token(String str, int i, int i2) {
            this.text = str;
            this.startPos = Integer.valueOf(i);
            this.endPos = Integer.valueOf(i2);
        }
    }

    public void setFixture(Tokenizer tokenizer) {
        this.fixture = tokenizer;
    }

    public Tokenizer getFixture() {
        return this.fixture;
    }

    @Before
    public void setUp() {
        setFixture(new Tokenizer());
    }

    @Test
    public void testLANGUAGE_DESCRIPTION() {
        LanguageCode languageCode = LanguageCode.de;
        getFixture();
        Assert.assertEquals(languageCode, Tokenizer.mapISOLanguageCode("german"));
        LanguageCode languageCode2 = LanguageCode.en;
        getFixture();
        Assert.assertEquals(languageCode2, Tokenizer.mapISOLanguageCode("english"));
        LanguageCode languageCode3 = LanguageCode.fr;
        getFixture();
        Assert.assertEquals(languageCode3, Tokenizer.mapISOLanguageCode("french"));
        LanguageCode languageCode4 = LanguageCode.it;
        getFixture();
        Assert.assertEquals(languageCode4, Tokenizer.mapISOLanguageCode("italian"));
    }

    @Test
    public void testDetectLanguage() {
        LanguageCode languageCode = LanguageCode.en;
        getFixture();
        Assert.assertEquals(languageCode, Tokenizer.checkLanguage("The Java Text Categorizing Library (JTCL) is a pure java 1.5 implementation of libTextCat which in turn is \"a library that was primarily developed for language guessing, a task on which it is known to perform with near-perfect accuracy\"."));
        LanguageCode languageCode2 = LanguageCode.de;
        getFixture();
        Assert.assertEquals(languageCode2, Tokenizer.checkLanguage("Die Java-Text Kategorisierung Library (JTCL) ist eine reine Java 1.5 Implementierung von libtextcat die wiederum \"eine Bibliothek, die vor allem für die Sprache zu raten, eine Aufgabe, auf denen bekannt ist, mit nahezu perfekter Genauigkeit durchzuführen ist, wurde entwickelt.\""));
        LanguageCode languageCode3 = LanguageCode.fr;
        getFixture();
        Assert.assertEquals(languageCode3, Tokenizer.checkLanguage("La bibliothèque Java Catégorisation Texte (JTCL) est un pur Java 1.5 mise en œuvre de libtextcat qui à son tour \"une bibliothèque qui a été développé pour la langue deviner Primar, une tâche sur laquelle il est connu de réaliser avec une précision quasi-parfaite.\""));
        LanguageCode languageCode4 = LanguageCode.it;
        getFixture();
        Assert.assertEquals(languageCode4, Tokenizer.checkLanguage("Text Library Java Categorizzare (JTCL) è un puro Java 1.5 realizzazione di libtextcat che a sua volta è \"una libreria che è stato sviluppato per la lingua indovinare Primar, un compito su cui è conosciuto per eseguire con precisione quasi perfetta.\""));
    }

    public List<Token> createCase1() {
        Vector vector = new Vector();
        vector.add(new Token("Die", 0, 3));
        vector.add(new Token("Ölpest", 4, 10));
        vector.add(new Token("im", 11, 13));
        vector.add(new Token("Golf", 14, 18));
        vector.add(new Token("von", 19, 22));
        vector.add(new Token("Mexiko", 23, 29));
        vector.add(new Token("sei", 30, 33));
        vector.add(new Token("eine", 34, 38));
        vector.add(new Token("\"", 39, 40));
        vector.add(new Token("f�rchterliche", 40, 53));
        vector.add(new Token("Trag�die", 54, 62));
        vector.add(new Token(",", 62, 63));
        vector.add(new Token("f�r", 64, 67));
        vector.add(new Token("die", 68, 71));
        vector.add(new Token("ich", 72, 75));
        vector.add(new Token("als", 76, 79));
        vector.add(new Token("Verantwortlicher", 80, 96));
        vector.add(new Token("bei", 97, 100));
        vector.add(new Token("BP", 101, 103));
        vector.add(new Token("immer", 104, 109));
        vector.add(new Token("eine", 110, 114));
        vector.add(new Token("gro�e", 115, 120));
        vector.add(new Token("Verantwortung", 121, 134));
        vector.add(new Token("f�hlen", 135, 141));
        vector.add(new Token("werde", 142, 147));
        vector.add(new Token("\"", 147, 148));
        vector.add(new Token(",", 148, 149));
        vector.add(new Token("erkl�rte", 150, 158));
        vector.add(new Token("der", 159, 162));
        vector.add(new Token("scheidende", 163, 173));
        vector.add(new Token("Konzernchef", 174, 185));
        vector.add(new Token("Hayward", 186, 193));
        vector.add(new Token(".", 193, 194));
        vector.add(new Token("BP", 195, 197));
        vector.add(new Token("werde", 198, 203));
        vector.add(new Token("sich", 204, 208));
        vector.add(new Token("durch", 209, 214));
        vector.add(new Token("den", 215, 218));
        vector.add(new Token("Vorfall", 219, 226));
        vector.add(new Token("ver�ndern", 227, 236));
        vector.add(new Token("und", 237, 240));
        vector.add(new Token("solle", 241, 246));
        vector.add(new Token("unter", 247, 252));
        vector.add(new Token("neuer", 253, 258));
        vector.add(new Token("F�hrung", 259, 266));
        vector.add(new Token("in", 267, 269));
        vector.add(new Token("diese", 270, 275));
        vector.add(new Token("Phase", 276, 281));
        vector.add(new Token("starten", 282, 289));
        vector.add(new Token(",", 289, 290));
        vector.add(new Token("begr�ndete", 291, 301));
        vector.add(new Token("er", 302, 304));
        vector.add(new Token("seinen", 305, 311));
        vector.add(new Token("R�ckzug", 312, 319));
        vector.add(new Token("zum", 320, 323));
        vector.add(new Token("1.", 324, 326));
        vector.add(new Token("Oktober", 327, 334));
        vector.add(new Token(".", 334, 335));
        return vector;
    }

    @Test
    public void testCase1() {
        List<Token> createCase1 = createCase1();
        SDocumentGraph createSDocumentGraph = SaltFactory.createSDocumentGraph();
        STextualDS createTextualDS = createSDocumentGraph.createTextualDS("Die Ölpest im Golf von Mexiko sei eine \"f�rchterliche Trag�die, f�r die ich als Verantwortlicher bei BP immer eine gro�e Verantwortung f�hlen werde\", erkl�rte der scheidende Konzernchef Hayward. BP werde sich durch den Vorfall ver�ndern und solle unter neuer F�hrung in diese Phase starten, begr�ndete er seinen R�ckzug zum 1. Oktober.");
        getFixture().setsDocumentGraph(createSDocumentGraph);
        getFixture().tokenize(createTextualDS);
        Assert.assertEquals(createCase1.size(), createSDocumentGraph.getTokens().size());
        int i = 0;
        for (STextualRelation sTextualRelation : createSDocumentGraph.getTextualRelations()) {
            Assert.assertTrue(createCase1.size() >= i);
            Assert.assertNotNull(createCase1.get(i));
            Assert.assertNotNull(sTextualRelation.getSource());
            Assert.assertNotNull(sTextualRelation.getTarget());
            Assert.assertEquals(createCase1.get(i).startPos, sTextualRelation.getStart());
            Assert.assertEquals(createCase1.get(i).endPos, sTextualRelation.getEnd());
            Assert.assertEquals(createCase1.get(i).text, sTextualRelation.getTarget().getText().substring(((Integer) sTextualRelation.getStart()).intValue(), ((Integer) sTextualRelation.getEnd()).intValue()));
            i++;
        }
    }

    @Test
    public void testCase2() {
        List<Token> createCase1 = createCase1();
        SDocumentGraph createSDocumentGraph = SaltFactory.createSDocumentGraph();
        STextualDS createTextualDS = createSDocumentGraph.createTextualDS("Die Ölpest im Golf von Mexiko sei eine \"f�rchterliche Trag�die, f�r die ich als Verantwortlicher bei BP immer eine gro�e Verantwortung f�hlen werde\", erkl�rte der scheidende Konzernchef Hayward. BP werde sich durch den Vorfall ver�ndern und solle unter neuer F�hrung in diese Phase starten, begr�ndete er seinen R�ckzug zum 1. Oktober.");
        getFixture().setsDocumentGraph(createSDocumentGraph);
        getFixture().tokenize(createTextualDS, LanguageCode.de);
        Assert.assertEquals(createCase1.size(), createSDocumentGraph.getTokens().size());
        int i = 0;
        for (STextualRelation sTextualRelation : createSDocumentGraph.getTextualRelations()) {
            Assert.assertTrue(createCase1.size() >= i);
            Assert.assertNotNull(createCase1.get(i));
            Assert.assertNotNull(sTextualRelation.getSource());
            Assert.assertNotNull(sTextualRelation.getTarget());
            Assert.assertEquals(createCase1.get(i).startPos, sTextualRelation.getStart());
            Assert.assertEquals(createCase1.get(i).endPos, sTextualRelation.getEnd());
            Assert.assertEquals(createCase1.get(i).text, sTextualRelation.getTarget().getText().substring(((Integer) sTextualRelation.getStart()).intValue(), ((Integer) sTextualRelation.getEnd()).intValue()));
            i++;
        }
    }

    @Test
    public void testCase4() {
        Vector vector = new Vector();
        vector.add(new Token("Die", 0, 3));
        vector.add(new Token("Ölpest", 4, 10));
        SDocumentGraph createSDocumentGraph = SaltFactory.createSDocumentGraph();
        STextualDS createTextualDS = createSDocumentGraph.createTextualDS("Die Ölpest");
        getFixture().setsDocumentGraph(createSDocumentGraph);
        getFixture().tokenize(createTextualDS, LanguageCode.de);
        Assert.assertEquals(vector.size(), createSDocumentGraph.getTokens().size());
        int i = 0;
        for (STextualRelation sTextualRelation : createSDocumentGraph.getTextualRelations()) {
            Assert.assertTrue(vector.size() >= i);
            Assert.assertNotNull(vector.get(i));
            Assert.assertNotNull(sTextualRelation.getSource());
            Assert.assertNotNull(sTextualRelation.getTarget());
            Assert.assertEquals(((Token) vector.get(i)).startPos, sTextualRelation.getStart());
            Assert.assertEquals(((Token) vector.get(i)).endPos, sTextualRelation.getEnd());
            Assert.assertEquals(((Token) vector.get(i)).text, sTextualRelation.getTarget().getText().substring(((Integer) sTextualRelation.getStart()).intValue(), ((Integer) sTextualRelation.getEnd()).intValue()));
            i++;
        }
    }

    @Test
    public void testCase5() {
        Vector vector = new Vector();
        vector.add(new Token("Feigenblatt", 0, 11));
        SDocumentGraph createSDocumentGraph = SaltFactory.createSDocumentGraph();
        STextualDS createTextualDS = createSDocumentGraph.createTextualDS("Feigenblatt  ");
        getFixture().setsDocumentGraph(createSDocumentGraph);
        getFixture().tokenize(createTextualDS, LanguageCode.de);
        Assert.assertEquals(vector.size(), createSDocumentGraph.getTokens().size());
        int i = 0;
        for (STextualRelation sTextualRelation : createSDocumentGraph.getTextualRelations()) {
            Assert.assertTrue(vector.size() >= i);
            Assert.assertNotNull(vector.get(i));
            Assert.assertNotNull(sTextualRelation.getSource());
            Assert.assertNotNull(sTextualRelation.getTarget());
            Assert.assertEquals(((Token) vector.get(i)).startPos, sTextualRelation.getStart());
            Assert.assertEquals(((Token) vector.get(i)).endPos, sTextualRelation.getEnd());
            Assert.assertEquals(((Token) vector.get(i)).text, sTextualRelation.getTarget().getText().substring(((Integer) sTextualRelation.getStart()).intValue(), ((Integer) sTextualRelation.getEnd()).intValue()));
            i++;
        }
    }

    @Test
    public void testCase6() {
        Vector vector = new Vector();
        vector.add(new Token("Die", 0, 3));
        vector.add(new Token("Ölpest", 4, 10));
        SDocumentGraph createSDocumentGraph = SaltFactory.createSDocumentGraph();
        STextualDS createTextualDS = createSDocumentGraph.createTextualDS("Die Ölpest");
        getFixture().setsDocumentGraph(createSDocumentGraph);
        getFixture().tokenize(createTextualDS, LanguageCode.de);
        Assert.assertEquals(vector.size(), createSDocumentGraph.getTokens().size());
        int i = 0;
        for (STextualRelation sTextualRelation : createSDocumentGraph.getTextualRelations()) {
            Assert.assertTrue(vector.size() >= i);
            Assert.assertNotNull(vector.get(i));
            Assert.assertNotNull(sTextualRelation.getSource());
            Assert.assertNotNull(sTextualRelation.getTarget());
            Assert.assertEquals(((Token) vector.get(i)).startPos, sTextualRelation.getStart());
            Assert.assertEquals(((Token) vector.get(i)).endPos, sTextualRelation.getEnd());
            Assert.assertEquals(((Token) vector.get(i)).text, sTextualRelation.getTarget().getText().substring(((Integer) sTextualRelation.getStart()).intValue(), ((Integer) sTextualRelation.getEnd()).intValue()));
            i++;
        }
    }

    @Test
    public void testCase7() {
        Vector vector = new Vector();
        vector.add(new Token("O.K.", 0, 4));
        vector.add(new Token(",", 4, 5));
        vector.add(new Token("so", 6, 8));
        vector.add(new Token("the", 9, 12));
        vector.add(new Token("answer", 13, 19));
        vector.add(new Token("'s", 19, 21));
        vector.add(new Token("obvious", 22, 29));
        vector.add(new Token(".", 29, 30));
        SDocumentGraph createSDocumentGraph = SaltFactory.createSDocumentGraph();
        STextualDS createTextualDS = createSDocumentGraph.createTextualDS("O.K., so the answer's obvious.");
        getFixture().setsDocumentGraph(createSDocumentGraph);
        getFixture().tokenize(createTextualDS, LanguageCode.en);
        Assert.assertEquals(vector.size(), createSDocumentGraph.getTokens().size());
        int i = 0;
        for (STextualRelation sTextualRelation : createSDocumentGraph.getTextualRelations()) {
            Assert.assertTrue(vector.size() >= i);
            Assert.assertNotNull(vector.get(i));
            Assert.assertNotNull(sTextualRelation.getSource());
            Assert.assertNotNull(sTextualRelation.getTarget());
            Assert.assertEquals(((Token) vector.get(i)).startPos, sTextualRelation.getStart());
            Assert.assertEquals(((Token) vector.get(i)).endPos, sTextualRelation.getEnd());
            Assert.assertEquals(((Token) vector.get(i)).text, sTextualRelation.getTarget().getText().substring(((Integer) sTextualRelation.getStart()).intValue(), ((Integer) sTextualRelation.getEnd()).intValue()));
            i++;
        }
    }

    @Test
    public void testDefaultClitics() {
        Vector vector = new Vector();
        Vector vector2 = new Vector();
        vector.add(new Token("Riuscire", 0, 8));
        vector.add(new Token("all'", 9, 13));
        vector.add(new Token("università", 13, 23));
        vector2.add(new Token("ou", 0, 2));
        vector2.add(new Token("ceux", 3, 7));
        vector2.add(new Token("-là", 7, 10));
        vector2.add(new Token("mêmes", 11, 16));
        vector2.add(new Token("qu'", 17, 20));
        vector2.add(new Token("il", 20, 22));
        vector2.add(new Token("s'", 23, 25));
        vector2.add(new Token("affirmaient", 25, 36));
        SDocumentGraph createSDocumentGraph = SaltFactory.createSDocumentGraph();
        STextualDS createTextualDS = createSDocumentGraph.createTextualDS("ou ceux-là mêmes qu'il s'affirmaient");
        getFixture().setsDocumentGraph(createSDocumentGraph);
        getFixture().tokenize(createTextualDS, LanguageCode.fr);
        int i = 0;
        for (STextualRelation sTextualRelation : createSDocumentGraph.getTextualRelations()) {
            Assert.assertTrue(vector2.size() >= i);
            Assert.assertNotNull(vector2.get(i));
            Assert.assertNotNull(sTextualRelation.getSource());
            Assert.assertNotNull(sTextualRelation.getTarget());
            Assert.assertEquals(((Token) vector2.get(i)).startPos, sTextualRelation.getStart());
            Assert.assertEquals(((Token) vector2.get(i)).endPos, sTextualRelation.getEnd());
            Assert.assertEquals(((Token) vector2.get(i)).text, sTextualRelation.getTarget().getText().substring(((Integer) sTextualRelation.getStart()).intValue(), ((Integer) sTextualRelation.getEnd()).intValue()));
            i++;
        }
        SDocumentGraph createSDocumentGraph2 = SaltFactory.createSDocumentGraph();
        STextualDS createTextualDS2 = createSDocumentGraph2.createTextualDS("Riuscire all'università");
        getFixture().setsDocumentGraph(createSDocumentGraph2);
        getFixture().tokenize(createTextualDS2, LanguageCode.it);
        int i2 = 0;
        for (STextualRelation sTextualRelation2 : createSDocumentGraph2.getTextualRelations()) {
            Assert.assertTrue(vector.size() >= i2);
            Assert.assertNotNull(vector.get(i2));
            Assert.assertNotNull(sTextualRelation2.getSource());
            Assert.assertNotNull(sTextualRelation2.getTarget());
            Assert.assertEquals(((Token) vector.get(i2)).startPos, sTextualRelation2.getStart());
            Assert.assertEquals(((Token) vector.get(i2)).endPos, sTextualRelation2.getEnd());
            Assert.assertEquals(((Token) vector.get(i2)).text, sTextualRelation2.getTarget().getText().substring(((Integer) sTextualRelation2.getStart()).intValue(), ((Integer) sTextualRelation2.getEnd()).intValue()));
            i2++;
        }
        SDocumentGraph createSDocumentGraph3 = SaltFactory.createSDocumentGraph();
        STextualDS createTextualDS3 = createSDocumentGraph3.createTextualDS("ou ceux-là mêmes qu'il s'affirmaient");
        getFixture().setsDocumentGraph(createSDocumentGraph3);
        getFixture().tokenize(createTextualDS3);
        int i3 = 0;
        for (STextualRelation sTextualRelation3 : createSDocumentGraph3.getTextualRelations()) {
            Assert.assertTrue(vector2.size() >= i3);
            Assert.assertNotNull(vector2.get(i3));
            Assert.assertNotNull(sTextualRelation3.getSource());
            Assert.assertNotNull(sTextualRelation3.getTarget());
            Assert.assertEquals(((Token) vector2.get(i3)).startPos, sTextualRelation3.getStart());
            Assert.assertEquals(((Token) vector2.get(i3)).endPos, sTextualRelation3.getEnd());
            Assert.assertEquals(((Token) vector2.get(i3)).text, sTextualRelation3.getTarget().getText().substring(((Integer) sTextualRelation3.getStart()).intValue(), ((Integer) sTextualRelation3.getEnd()).intValue()));
            i3++;
        }
        SDocumentGraph createSDocumentGraph4 = SaltFactory.createSDocumentGraph();
        STextualDS createTextualDS4 = createSDocumentGraph4.createTextualDS("Riuscire all'università");
        getFixture().setsDocumentGraph(createSDocumentGraph4);
        getFixture().tokenize(createTextualDS4);
        int i4 = 0;
        for (STextualRelation sTextualRelation4 : createSDocumentGraph4.getTextualRelations()) {
            Assert.assertTrue(vector.size() >= i4);
            Assert.assertNotNull(vector.get(i4));
            Assert.assertNotNull(sTextualRelation4.getSource());
            Assert.assertNotNull(sTextualRelation4.getTarget());
            Assert.assertEquals(((Token) vector.get(i4)).startPos, sTextualRelation4.getStart());
            Assert.assertEquals(((Token) vector.get(i4)).endPos, sTextualRelation4.getEnd());
            Assert.assertEquals(((Token) vector.get(i4)).text, sTextualRelation4.getTarget().getText().substring(((Integer) sTextualRelation4.getStart()).intValue(), ((Integer) sTextualRelation4.getEnd()).intValue()));
            i4++;
        }
    }

    @Test
    public void testFantasyClitics() {
        Vector vector = new Vector();
        vector.add(new Token("S", 0, 1));
        vector.add(new Token("Z-S-z", 2, 7));
        vector.add(new Token("X-", 8, 10));
        vector.add(new Token("S", 10, 11));
        vector.add(new Token("-x", 11, 13));
        vector.add(new Token("Y^", 14, 16));
        vector.add(new Token("S", 16, 17));
        vector.add(new Token("^y", 17, 19));
        vector.add(new Token("Z.", 20, 22));
        vector.add(new Token("S", 22, 23));
        vector.add(new Token(".z", 23, 25));
        vector.add(new Token("x^S.Y", 26, 31));
        SDocumentGraph createSDocumentGraph = SaltFactory.createSDocumentGraph();
        STextualDS createTextualDS = createSDocumentGraph.createTextualDS("S Z-S-z X-S-x Y^S^y Z.S.z x^S.Y");
        getFixture().addClitics(LanguageCode.aa, new Clitics("([Xx]-|[Yy]\\^|[Zz]\\.)", "(-[Xx]|\\^[Yy]|\\.[Zz])"));
        getFixture().tokenize(createTextualDS, LanguageCode.aa);
        int i = 0;
        for (STextualRelation sTextualRelation : createSDocumentGraph.getTextualRelations()) {
            Assert.assertTrue(vector.size() >= i);
            Assert.assertNotNull(vector.get(i));
            Assert.assertNotNull(sTextualRelation.getSource());
            Assert.assertNotNull(sTextualRelation.getTarget());
            Assert.assertEquals(((Token) vector.get(i)).startPos, sTextualRelation.getStart());
            Assert.assertEquals(((Token) vector.get(i)).endPos, sTextualRelation.getEnd());
            Assert.assertEquals(((Token) vector.get(i)).text, sTextualRelation.getTarget().getText().substring(((Integer) sTextualRelation.getStart()).intValue(), ((Integer) sTextualRelation.getEnd()).intValue()));
            i++;
        }
    }
}
