/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.clustering.minhash;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.clustering.minhash.HashFactory;
import org.apache.mahout.clustering.minhash.HashFunction;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class MinHashMapper
extends Mapper<Text, VectorWritable, Text, Writable> {
    private static final Logger log = LoggerFactory.getLogger(MinHashMapper.class);
    private HashFunction[] hashFunction;
    private int numHashFunctions;
    private int keyGroups;
    private int minVectorSize;
    private boolean debugOutput;
    private int[] minHashValues;
    private byte[] bytesToHash;

    protected void setup(Mapper.Context context) throws IOException, InterruptedException {
        HashFactory.HashType hashType;
        super.setup(context);
        Configuration conf = context.getConfiguration();
        this.numHashFunctions = conf.getInt("numHashFunctions", 10);
        this.minHashValues = new int[this.numHashFunctions];
        this.bytesToHash = new byte[4];
        this.keyGroups = conf.getInt("keyGroups", 1);
        this.minVectorSize = conf.getInt("minVectorSize", 5);
        String htype = conf.get("hashType", "linear");
        this.debugOutput = conf.getBoolean("debugOutput", false);
        try {
            hashType = HashFactory.HashType.valueOf(htype);
        }
        catch (IllegalArgumentException iae) {
            log.warn("No valid hash type found in configuration for {}, assuming type: {}", (Object)htype, (Object)HashFactory.HashType.LINEAR);
            hashType = HashFactory.HashType.LINEAR;
        }
        this.hashFunction = HashFactory.createHashFunctions(hashType, this.numHashFunctions);
    }

    public void map(Text item, VectorWritable features, Mapper.Context context) throws IOException, InterruptedException {
        int i;
        Vector featureVector = features.get();
        if (featureVector.size() < this.minVectorSize) {
            return;
        }
        for (i = 0; i < this.numHashFunctions; ++i) {
            this.minHashValues[i] = Integer.MAX_VALUE;
        }
        for (i = 0; i < this.numHashFunctions; ++i) {
            for (Vector.Element ele : featureVector) {
                int value = (int)ele.get();
                this.bytesToHash[0] = (byte)(value >> 24);
                this.bytesToHash[1] = (byte)(value >> 16);
                this.bytesToHash[2] = (byte)(value >> 8);
                this.bytesToHash[3] = (byte)value;
                int hashIndex = this.hashFunction[i].hash(this.bytesToHash);
                if (this.minHashValues[i] <= hashIndex) continue;
                this.minHashValues[i] = hashIndex;
            }
        }
        for (i = 0; i < this.numHashFunctions; ++i) {
            StringBuilder clusterIdBuilder = new StringBuilder();
            for (int j = 0; j < this.keyGroups; ++j) {
                clusterIdBuilder.append(this.minHashValues[(i + j) % this.numHashFunctions]).append('-');
            }
            String clusterId = clusterIdBuilder.toString();
            clusterId = clusterId.substring(0, clusterId.lastIndexOf(45));
            Text cluster = new Text(clusterId);
            VectorWritable point = this.debugOutput ? new VectorWritable(featureVector.clone()) : new Text(item.toString());
            context.write((Object)cluster, (Object)point);
        }
    }
}

