package org.apache.hadoop.mapred.lib;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.partition.InputSampler;

/* JADX WARN: Classes with same name are omitted:
  input_file:classes/org/apache/hadoop/mapred/lib/InputSampler.class
 */
@InterfaceStability.Stable
@InterfaceAudience.Public
/* loaded from: input_file:hadoop-mapreduce-client-core-2.6.3.jar:org/apache/hadoop/mapred/lib/InputSampler.class */
public class InputSampler<K, V> extends org.apache.hadoop.mapreduce.lib.partition.InputSampler<K, V> {
    private static final Log LOG = LogFactory.getLog(InputSampler.class);

    /* JADX WARN: Classes with same name are omitted:
      input_file:classes/org/apache/hadoop/mapred/lib/InputSampler$IntervalSampler.class
     */
    /* loaded from: input_file:hadoop-mapreduce-client-core-2.6.3.jar:org/apache/hadoop/mapred/lib/InputSampler$IntervalSampler.class */
    public static class IntervalSampler<K, V> extends InputSampler.IntervalSampler<K, V> implements Sampler<K, V> {
        public IntervalSampler(double d) {
            this(d, Integer.MAX_VALUE);
        }

        public IntervalSampler(double d, int i) {
            super(d, i);
        }

        @Override // org.apache.hadoop.mapred.lib.InputSampler.Sampler
        public K[] getSample(InputFormat<K, V> inputFormat, JobConf jobConf) throws IOException {
            InputSplit[] splits = inputFormat.getSplits(jobConf, jobConf.getNumMapTasks());
            ArrayList arrayList = new ArrayList();
            int min = Math.min(this.maxSplitsSampled, splits.length);
            int length = splits.length / min;
            long j = 0;
            long j2 = 0;
            for (int i = 0; i < min; i++) {
                RecordReader<K, V> recordReader = inputFormat.getRecordReader(splits[i * length], jobConf, Reporter.NULL);
                K createKey = recordReader.createKey();
                V createValue = recordReader.createValue();
                while (recordReader.next(createKey, createValue)) {
                    j++;
                    if (j2 / j < this.freq) {
                        j2++;
                        arrayList.add(createKey);
                        createKey = recordReader.createKey();
                    }
                }
                recordReader.close();
            }
            return (K[]) arrayList.toArray();
        }
    }

    /* JADX WARN: Classes with same name are omitted:
      input_file:classes/org/apache/hadoop/mapred/lib/InputSampler$RandomSampler.class
     */
    /* loaded from: input_file:hadoop-mapreduce-client-core-2.6.3.jar:org/apache/hadoop/mapred/lib/InputSampler$RandomSampler.class */
    public static class RandomSampler<K, V> extends InputSampler.RandomSampler<K, V> implements Sampler<K, V> {
        public RandomSampler(double d, int i) {
            this(d, i, Integer.MAX_VALUE);
        }

        public RandomSampler(double d, int i, int i2) {
            super(d, i, i2);
        }

        @Override // org.apache.hadoop.mapred.lib.InputSampler.Sampler
        public K[] getSample(InputFormat<K, V> inputFormat, JobConf jobConf) throws IOException {
            InputSplit[] splits = inputFormat.getSplits(jobConf, jobConf.getNumMapTasks());
            ArrayList arrayList = new ArrayList(this.numSamples);
            int min = Math.min(this.maxSplitsSampled, splits.length);
            Random random = new Random();
            long nextLong = random.nextLong();
            random.setSeed(nextLong);
            InputSampler.LOG.debug("seed: " + nextLong);
            for (int i = 0; i < splits.length; i++) {
                InputSplit inputSplit = splits[i];
                int nextInt = random.nextInt(splits.length);
                splits[i] = splits[nextInt];
                splits[nextInt] = inputSplit;
            }
            int i2 = 0;
            while (true) {
                if (i2 < min || (i2 < splits.length && arrayList.size() < this.numSamples)) {
                    RecordReader<K, V> recordReader = inputFormat.getRecordReader(splits[i2], jobConf, Reporter.NULL);
                    K createKey = recordReader.createKey();
                    V createValue = recordReader.createValue();
                    while (recordReader.next(createKey, createValue)) {
                        if (random.nextDouble() <= this.freq) {
                            if (arrayList.size() < this.numSamples) {
                                arrayList.add(createKey);
                            } else {
                                int nextInt2 = random.nextInt(this.numSamples);
                                if (nextInt2 != this.numSamples) {
                                    arrayList.set(nextInt2, createKey);
                                }
                                this.freq *= (this.numSamples - 1) / this.numSamples;
                            }
                            createKey = recordReader.createKey();
                        }
                    }
                    recordReader.close();
                    i2++;
                }
            }
            return (K[]) arrayList.toArray();
        }
    }

    /* JADX WARN: Classes with same name are omitted:
      input_file:classes/org/apache/hadoop/mapred/lib/InputSampler$Sampler.class
     */
    /* loaded from: input_file:hadoop-mapreduce-client-core-2.6.3.jar:org/apache/hadoop/mapred/lib/InputSampler$Sampler.class */
    public interface Sampler<K, V> extends InputSampler.Sampler<K, V> {
        K[] getSample(InputFormat<K, V> inputFormat, JobConf jobConf) throws IOException;
    }

    /* JADX WARN: Classes with same name are omitted:
      input_file:classes/org/apache/hadoop/mapred/lib/InputSampler$SplitSampler.class
     */
    /* loaded from: input_file:hadoop-mapreduce-client-core-2.6.3.jar:org/apache/hadoop/mapred/lib/InputSampler$SplitSampler.class */
    public static class SplitSampler<K, V> extends InputSampler.SplitSampler<K, V> implements Sampler<K, V> {
        public SplitSampler(int i) {
            this(i, Integer.MAX_VALUE);
        }

        public SplitSampler(int i, int i2) {
            super(i, i2);
        }

        @Override // org.apache.hadoop.mapred.lib.InputSampler.Sampler
        public K[] getSample(InputFormat<K, V> inputFormat, JobConf jobConf) throws IOException {
            InputSplit[] splits = inputFormat.getSplits(jobConf, jobConf.getNumMapTasks());
            ArrayList arrayList = new ArrayList(this.numSamples);
            int min = Math.min(this.maxSplitsSampled, splits.length);
            int length = splits.length / min;
            int i = this.numSamples / min;
            long j = 0;
            for (int i2 = 0; i2 < min; i2++) {
                RecordReader<K, V> recordReader = inputFormat.getRecordReader(splits[i2 * length], jobConf, Reporter.NULL);
                K createKey = recordReader.createKey();
                V createValue = recordReader.createValue();
                while (recordReader.next(createKey, createValue)) {
                    arrayList.add(createKey);
                    createKey = recordReader.createKey();
                    j++;
                    if ((i2 + 1) * i <= j) {
                        break;
                    }
                }
                recordReader.close();
            }
            return (K[]) arrayList.toArray();
        }
    }

    public InputSampler(JobConf jobConf) {
        super(jobConf);
    }

    public static <K, V> void writePartitionFile(JobConf jobConf, Sampler<K, V> sampler) throws IOException, ClassNotFoundException, InterruptedException {
        writePartitionFile(new Job((Configuration) jobConf), sampler);
    }
}
