/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.df.data;

import com.google.common.base.Preconditions;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.StringTokenizer;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.df.data.Data;
import org.apache.mahout.df.data.DataConverter;
import org.apache.mahout.df.data.Dataset;
import org.apache.mahout.df.data.DescriptorException;
import org.apache.mahout.df.data.DescriptorUtils;
import org.apache.mahout.df.data.Instance;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Vector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class DataLoader {
    private static final Logger log = LoggerFactory.getLogger(DataLoader.class);

    private DataLoader() {
    }

    private static Instance parseString(int id, Dataset.Attribute[] attrs, List<String>[] values, String string) {
        StringTokenizer tokenizer = new StringTokenizer(string, ", ");
        Preconditions.checkArgument((tokenizer.countTokens() == attrs.length ? 1 : 0) != 0, (Object)"Wrong number of attributes in the string");
        String[] tokens = new String[attrs.length];
        for (int attr = 0; attr < attrs.length; ++attr) {
            String token = tokenizer.nextToken();
            if (attrs[attr].isIgnored()) continue;
            if ("?".equals(token)) {
                return null;
            }
            tokens[attr] = token;
        }
        int nbattrs = Dataset.countAttributes(attrs);
        DenseVector vector = new DenseVector(nbattrs);
        int aId = 0;
        int label = -1;
        for (int attr = 0; attr < attrs.length; ++attr) {
            if (attrs[attr].isIgnored()) continue;
            String token = tokens[attr];
            if (attrs[attr].isNumerical()) {
                vector.set(aId++, Double.parseDouble(token));
                continue;
            }
            if (values[attr] == null) {
                values[attr] = new ArrayList<String>();
            }
            if (!values[attr].contains(token)) {
                values[attr].add(token);
            }
            if (attrs[attr].isCategorical()) {
                vector.set(aId++, (double)values[attr].indexOf(token));
                continue;
            }
            label = values[attr].indexOf(token);
        }
        if (label == -1) {
            throw new IllegalStateException("Label not found!");
        }
        return new Instance(id, (Vector)vector, label);
    }

    public static Data loadData(Dataset dataset, FileSystem fs, Path fpath) throws IOException {
        FSDataInputStream input = fs.open(fpath);
        Scanner scanner = new Scanner((InputStream)input);
        ArrayList<Instance> instances = new ArrayList<Instance>();
        DataConverter converter = new DataConverter(dataset);
        while (scanner.hasNextLine()) {
            String line = scanner.nextLine();
            if (line.isEmpty()) {
                log.warn("{}: empty string", (Object)instances.size());
                continue;
            }
            Instance instance = converter.convert(instances.size(), line);
            if (instance == null) {
                log.warn("{}: missing values", (Object)instances.size());
                continue;
            }
            instances.add(instance);
        }
        scanner.close();
        return new Data(dataset, instances);
    }

    public static Data loadData(Dataset dataset, String[] data) {
        ArrayList<Instance> instances = new ArrayList<Instance>();
        DataConverter converter = new DataConverter(dataset);
        for (String line : data) {
            if (line.isEmpty()) {
                log.warn("{}: empty string", (Object)instances.size());
                continue;
            }
            Instance instance = converter.convert(instances.size(), line);
            if (instance == null) {
                log.warn("{}: missing values", (Object)instances.size());
                continue;
            }
            instances.add(instance);
        }
        return new Data(dataset, instances);
    }

    public static Dataset generateDataset(String descriptor, FileSystem fs, Path path) throws DescriptorException, IOException {
        Dataset.Attribute[] attrs = DescriptorUtils.parseDescriptor(descriptor);
        FSDataInputStream input = fs.open(path);
        Scanner scanner = new Scanner((InputStream)input);
        List[] values = new List[attrs.length];
        int id = 0;
        while (scanner.hasNextLine()) {
            String line = scanner.nextLine();
            if (line.isEmpty() || DataLoader.parseString(id, attrs, values, line) == null) continue;
            ++id;
        }
        scanner.close();
        return new Dataset(attrs, values, id);
    }

    public static Dataset generateDataset(String descriptor, String[] data) throws DescriptorException {
        Dataset.Attribute[] attrs = DescriptorUtils.parseDescriptor(descriptor);
        List[] values = new List[attrs.length];
        int id = 0;
        for (String aData : data) {
            if (aData.isEmpty() || DataLoader.parseString(id, attrs, values, aData) == null) continue;
            ++id;
        }
        return new Dataset(attrs, values, id);
    }
}

