package morfologik.tools;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Locale;
import morfologik.dependencies.org.apache.commons.cli.CommandLine;
import morfologik.dependencies.org.apache.commons.cli.Options;
import morfologik.dependencies.org.apache.commons.lang.StringEscapeUtils;
import morfologik.fsa.MatchResult;
import morfologik.stemming.EncoderType;

/* loaded from: input_file:morfologik/tools/MorphEncodingTool.class */
class MorphEncodingTool extends Tool {
    private static Charset US_ASCII = Charset.forName("US-ASCII");
    private boolean noWarn = false;
    private SequenceAssembler encoder;
    private byte separatorByte;
    private char separator;

    MorphEncodingTool() {
    }

    @Override // morfologik.tools.Tool
    protected void go(CommandLine commandLine) {
        this.noWarn = commandLine.hasOption(SharedOptions.noWarnIfTwoFields.getOpt());
        EncoderType encoderType = EncoderType.SUFFIX;
        if (commandLine.hasOption(SharedOptions.encoder.getOpt())) {
            String optionValue = commandLine.getOptionValue(SharedOptions.encoder.getOpt());
            try {
                encoderType = EncoderType.valueOf(optionValue.toUpperCase());
            } catch (IllegalArgumentException e) {
                throw new IllegalArgumentException("Invalid encoder: " + optionValue + ", allowed values: " + Arrays.toString(EncoderType.values()));
            }
        }
        this.separator = '+';
        if (commandLine.hasOption(SharedOptions.annotationSeparatorCharacterOption.getLongOpt())) {
            String unescapeJava = StringEscapeUtils.unescapeJava(commandLine.getOptionValue(SharedOptions.annotationSeparatorCharacterOption.getLongOpt()));
            if (unescapeJava.length() != 1) {
                throw new IllegalArgumentException("Field separator must be a single character: " + unescapeJava);
            }
            if (unescapeJava.charAt(0) > 255) {
                throw new IllegalArgumentException("Field separator not within byte range: " + ((int) unescapeJava.charAt(0)));
            }
            this.separator = unescapeJava.charAt(0);
            this.separatorByte = FSABuildTool.checkSingleByte(Character.toString(this.separator), Charset.defaultCharset());
        }
        this.encoder = new SequenceAssembler(SequenceEncoders.forType(encoderType), (byte) this.separator);
        DataInputStream initializeInput = initializeInput(commandLine);
        DataOutputStream initializeOutput = initializeOutput(commandLine);
        try {
            process(initializeInput, initializeOutput);
            initializeOutput.flush();
            initializeInput.close();
            initializeOutput.close();
        } catch (Throwable th) {
            initializeInput.close();
            initializeOutput.close();
            throw th;
        }
    }

    protected void process(DataInputStream dataInputStream, DataOutputStream dataOutputStream) {
        int read;
        long j = 0;
        try {
            int i = 0;
            byte[] bArr = new byte[0];
            ArrayList arrayList = new ArrayList();
            do {
                read = dataInputStream.read();
                switch (read) {
                    case MatchResult.NO_MATCH /* -1 */:
                    case 10:
                        j++;
                        if (i != 0 || !arrayList.isEmpty()) {
                            arrayList.add(Arrays.copyOf(bArr, i));
                            if (arrayList.size() >= 2 && arrayList.size() <= 3) {
                                if (arrayList.size() == 2 && !this.noWarn) {
                                    System.err.println(String.format(Locale.ROOT, "Line %d has %d columns. US-ASCII version of this line: %s", Long.valueOf(j), Integer.valueOf(arrayList.size()), toAscii(arrayList)));
                                }
                                byte[] bArr2 = (byte[]) arrayList.get(0);
                                byte[] bArr3 = (byte[]) arrayList.get(1);
                                if (!contains(bArr2, this.separatorByte) && !contains(bArr3, this.separatorByte)) {
                                    dataOutputStream.write(this.encoder.encode(bArr2, bArr3, arrayList.size() > 2 ? (byte[]) arrayList.get(2) : null));
                                    dataOutputStream.writeByte(10);
                                    i = 0;
                                    arrayList.clear();
                                    break;
                                } else {
                                    throw new IllegalArgumentException(String.format(Locale.ROOT, "Either word or lemma in line %d contain the annotation byte '%s': %s", Long.valueOf(j), Character.valueOf(this.separator), toAscii(arrayList)));
                                }
                            } else {
                                throw new IllegalArgumentException(String.format(Locale.ROOT, "Every \\n-delimited 'line' must contain 2 or 3 columns, line %d has %d. US-ASCII version of this line: %s", Long.valueOf(j), Integer.valueOf(arrayList.size()), toAscii(arrayList)));
                            }
                        } else if (read != -1) {
                            System.err.println(String.format(Locale.ROOT, "Ignoring empty line %d.", Long.valueOf(j)));
                            break;
                        }
                        break;
                    case 9:
                        arrayList.add(Arrays.copyOf(bArr, i));
                        i = 0;
                        break;
                    case 13:
                        break;
                    default:
                        if (i >= bArr.length) {
                            bArr = Arrays.copyOf(bArr, bArr.length + 1024);
                        }
                        int i2 = i;
                        i++;
                        bArr[i2] = (byte) read;
                        break;
                }
            } while (read != -1);
        } finally {
            dataInputStream.close();
        }
    }

    private boolean contains(byte[] bArr, byte b) {
        for (byte b2 : bArr) {
            if (b2 == b) {
                return true;
            }
        }
        return false;
    }

    private String toAscii(ArrayList arrayList) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < arrayList.size(); i++) {
            if (i > 0) {
                sb.append("\t");
            }
            sb.append(new String((byte[]) arrayList.get(i), US_ASCII));
        }
        return sb.toString();
    }

    @Override // morfologik.tools.Tool
    protected void initializeOptions(Options options) {
        options.addOption(SharedOptions.inputFileOption);
        options.addOption(SharedOptions.outputFileOption);
        options.addOption(SharedOptions.encoder);
        options.addOption(SharedOptions.noWarnIfTwoFields);
        options.addOption(SharedOptions.annotationSeparatorCharacterOption);
    }

    private static DataOutputStream initializeOutput(CommandLine commandLine) {
        String opt = SharedOptions.outputFileOption.getOpt();
        return commandLine.hasOption(opt) ? new DataOutputStream(new BufferedOutputStream(new FileOutputStream((File) commandLine.getParsedOptionValue(opt)))) : new DataOutputStream(new BufferedOutputStream(System.out));
    }

    private static DataInputStream initializeInput(CommandLine commandLine) {
        String opt = SharedOptions.inputFileOption.getOpt();
        return commandLine.hasOption(opt) ? new DataInputStream(new BufferedInputStream(new FileInputStream((File) commandLine.getParsedOptionValue(opt)))) : new DataInputStream(new BufferedInputStream(System.in));
    }

    public static void main(String... strArr) {
        new MorphEncodingTool().go(strArr);
    }
}
