package edu.umd.cloud9.integration.collection.clue;

import com.google.common.base.Joiner;
import edu.umd.cloud9.collection.clue.ClueWarcDocnoMapping;
import edu.umd.cloud9.collection.clue.ClueWarcDocnoMappingBuilder;
import edu.umd.cloud9.collection.clue.ClueWarcForwardIndex;
import edu.umd.cloud9.collection.clue.ClueWarcForwardIndexBuilder;
import edu.umd.cloud9.collection.clue.CountClueWarcRecords;
import edu.umd.cloud9.integration.IntegrationUtils;
import edu.umd.hooka.Vocab;
import java.util.Random;
import junit.framework.JUnit4TestAdapter;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.LineReader;
import org.junit.Assert;
import org.junit.Test;

/* loaded from: input_file:edu/umd/cloud9/integration/collection/clue/IntegrationTest.class */
public class IntegrationTest {
    private static final Random random = new Random();
    private static final Path collectionPathRepacked = new Path("/shared/collections/ClueWeb09/collection.compressed.block/en.01");
    private static final Path collectionPathRaw = new Path("/shared/collections/ClueWeb09/collection.raw/");
    private static final String tmpPrefix = "tmp-" + IntegrationTest.class.getCanonicalName() + "-" + random.nextInt(10000);
    private static final String mappingFile = tmpPrefix + "-mapping.dat";

    @Test
    public void runTests() throws Exception {
        testDocnoMapping();
        testDemoCountDocsRepacked();
        testDemoCountDocsRaw();
        testForwardIndex();
    }

    private void testDocnoMapping() throws Exception {
        FileSystem fileSystem = FileSystem.get(IntegrationUtils.getBespinConfiguration());
        Assert.assertTrue(fileSystem.exists(collectionPathRepacked));
        IntegrationUtils.exec(Joiner.on(" ").join(new String[]{"hadoop jar", IntegrationUtils.getJar("dist", "cloud9"), ClueWarcDocnoMappingBuilder.class.getCanonicalName(), "-libjars=" + IntegrationUtils.getJar("lib", "guava"), "-collection=" + collectionPathRepacked, "-docnoMapping=" + mappingFile}));
        ClueWarcDocnoMapping clueWarcDocnoMapping = new ClueWarcDocnoMapping();
        clueWarcDocnoMapping.loadMapping(new Path(mappingFile), fileSystem);
        Assert.assertEquals("clueweb09-en0000-00-00000", clueWarcDocnoMapping.getDocid(1));
        Assert.assertEquals("clueweb09-en0000-29-13313", clueWarcDocnoMapping.getDocid(Vocab.MAX_VOCAB_INDEX));
        Assert.assertEquals(1L, clueWarcDocnoMapping.getDocno("clueweb09-en0000-00-00000"));
        Assert.assertEquals(1000000L, clueWarcDocnoMapping.getDocno("clueweb09-en0000-29-13313"));
    }

    private void testDemoCountDocsRepacked() throws Exception {
        FileSystem fileSystem = FileSystem.get(IntegrationUtils.getBespinConfiguration());
        Assert.assertTrue(fileSystem.exists(collectionPathRepacked));
        String str = tmpPrefix + "-records.txt";
        IntegrationUtils.exec(Joiner.on(" ").join(new String[]{"hadoop jar", IntegrationUtils.getJar("dist", "cloud9"), CountClueWarcRecords.class.getCanonicalName(), "-libjars=" + IntegrationUtils.getJar("lib", "guava"), "-repacked", "-path=" + collectionPathRepacked, "-docnoMapping=" + mappingFile, "-countOutput=" + str}));
        LineReader lineReader = new LineReader(fileSystem.open(new Path(str)));
        lineReader.readLine(new Text());
        lineReader.close();
        Assert.assertEquals(50220423L, Integer.parseInt(r0.toString()));
    }

    private void testDemoCountDocsRaw() throws Exception {
        FileSystem fileSystem = FileSystem.get(IntegrationUtils.getBespinConfiguration());
        Assert.assertTrue(fileSystem.exists(collectionPathRaw));
        String str = tmpPrefix + "-records.txt";
        IntegrationUtils.exec(Joiner.on(" ").join(new String[]{"hadoop jar", IntegrationUtils.getJar("dist", "cloud9"), CountClueWarcRecords.class.getCanonicalName(), "-libjars=" + IntegrationUtils.getJar("lib", "guava"), "-original", "-segment=1", "-path=" + collectionPathRaw, "-docnoMapping=" + mappingFile, "-countOutput=" + str}));
        LineReader lineReader = new LineReader(fileSystem.open(new Path(str)));
        lineReader.readLine(new Text());
        lineReader.close();
        Assert.assertEquals(50220423L, Integer.parseInt(r0.toString()));
    }

    private void testForwardIndex() throws Exception {
        FileSystem fileSystem = FileSystem.get(IntegrationUtils.getBespinConfiguration());
        Assert.assertTrue(fileSystem.exists(collectionPathRepacked));
        String str = tmpPrefix + "-findex.dat";
        IntegrationUtils.exec(Joiner.on(" ").join(new String[]{"hadoop jar", IntegrationUtils.getJar("dist", "cloud9"), ClueWarcForwardIndexBuilder.class.getCanonicalName(), "-libjars=" + IntegrationUtils.getJar("lib", "guava"), "-collection=" + collectionPathRepacked, "-index=" + str}));
        ClueWarcForwardIndex clueWarcForwardIndex = new ClueWarcForwardIndex();
        clueWarcForwardIndex.loadIndex(new Path(str), new Path(mappingFile), fileSystem);
        Assert.assertTrue(clueWarcForwardIndex.getDocument(14069750).getContent().contains("Vizergy: How Design and SEO work together"));
        Assert.assertTrue(clueWarcForwardIndex.getDocument("clueweb09-en0008-76-19728").getContent().contains("Jostens - Homeschool Yearbooks"));
        Assert.assertEquals(1L, clueWarcForwardIndex.getFirstDocno());
        Assert.assertEquals(50220423L, clueWarcForwardIndex.getLastDocno());
    }

    public static junit.framework.Test suite() {
        return new JUnit4TestAdapter(IntegrationTest.class);
    }
}
