/*
 * Decompiled with CFR 0.152.
 */
package io.cdap.directives.nlp;

import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.directives.nlp.internal.PorterStemmer;
import io.cdap.wrangler.api.Arguments;
import io.cdap.wrangler.api.Directive;
import io.cdap.wrangler.api.DirectiveExecutionException;
import io.cdap.wrangler.api.DirectiveParseException;
import io.cdap.wrangler.api.ExecutorContext;
import io.cdap.wrangler.api.Row;
import io.cdap.wrangler.api.annotations.Categories;
import io.cdap.wrangler.api.lineage.Lineage;
import io.cdap.wrangler.api.lineage.Many;
import io.cdap.wrangler.api.lineage.Mutation;
import io.cdap.wrangler.api.parser.ColumnName;
import io.cdap.wrangler.api.parser.TokenType;
import io.cdap.wrangler.api.parser.UsageDefinition;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

@Plugin(type="directive")
@Name(value="stemming")
@Categories(categories={"nlp"})
@Description(value="Apply Porter Stemming on the column value.")
public class Stemming
implements Directive,
Lineage {
    public static final String NAME = "stemming";
    private String column;
    private PorterStemmer stemmer;
    private String porterCol;

    public UsageDefinition define() {
        UsageDefinition.Builder builder = UsageDefinition.builder((String)NAME);
        builder.define("column", TokenType.COLUMN_NAME);
        return builder.build();
    }

    public void initialize(Arguments args) throws DirectiveParseException {
        this.column = ((ColumnName)args.value("column")).value();
        this.stemmer = new PorterStemmer();
        this.porterCol = String.format("%s_porter", this.column);
    }

    public void destroy() {
    }

    public List<Row> execute(List<Row> rows, ExecutorContext context) throws DirectiveExecutionException {
        for (Row row : rows) {
            List<Object> stemmed = new ArrayList();
            int idx = row.find(this.column);
            if (idx != -1) {
                Object object = row.getValue(idx);
                if (object == null) {
                    throw new DirectiveExecutionException(NAME, String.format("Column '%s' has null value. It should be a non-null 'String', 'Array of String' or 'List of String'.", this.column));
                }
                if (object instanceof List || object instanceof String[] || object instanceof String) {
                    List<String> words = null;
                    if (object instanceof String[]) {
                        words = Arrays.asList((String[])object);
                    } else if (object instanceof List) {
                        words = (List<String>)object;
                    } else {
                        String phrase = (String)object;
                        String[] w = phrase.split("\\W+");
                        words = Arrays.asList(w);
                    }
                    try {
                        stemmed = this.stemmer.process(words);
                        row.add(this.porterCol, stemmed);
                        continue;
                    }
                    catch (IOException e) {
                        throw new DirectiveExecutionException(NAME, String.format("Unable to apply porter stemmer on column '%s'. %s", this.column, e.getMessage()), (Throwable)e);
                    }
                }
                throw new DirectiveExecutionException(NAME, String.format("Invalid type '%s' of column '%s'. It should be of type 'String', Array of String' or 'List of String'.", this.column, object.getClass().getSimpleName()));
            }
            row.add(this.porterCol, stemmed);
        }
        return rows;
    }

    public Mutation lineage() {
        return Mutation.builder().readable("Reduced derived words using Porter technique from column '%s'", new Object[]{this.column}).relation(this.column, Many.of((String[])new String[]{this.column, this.porterCol})).build();
    }
}

