/*
 * Decompiled with CFR 0.152.
 */
package org.graalvm.compiler.lir.aarch64;

import java.util.Arrays;
import jdk.vm.ci.aarch64.AArch64;
import jdk.vm.ci.aarch64.AArch64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.Value;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.aarch64.AArch64ASIMDAssembler;
import org.graalvm.compiler.asm.aarch64.AArch64Address;
import org.graalvm.compiler.asm.aarch64.AArch64Assembler;
import org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler;
import org.graalvm.compiler.debug.GraalError;
import org.graalvm.compiler.lir.LIRInstruction;
import org.graalvm.compiler.lir.LIRInstructionClass;
import org.graalvm.compiler.lir.StubPort;
import org.graalvm.compiler.lir.StubPorts;
import org.graalvm.compiler.lir.aarch64.AArch64AESEncryptOp;
import org.graalvm.compiler.lir.aarch64.AArch64LIRInstruction;
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;

@StubPorts(value={@StubPort(path="src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp", lineStart=5855, lineEnd=5989, commit="77e21c57ce00463db4cc3d87f93729cbfe2c96b4", sha1="f11f84b57df21c9b49473f204e11efc0e6da53d0"), @StubPort(path="src/hotspot/cpu/aarch64/macroAssembler_aarch64_aes.cpp", lineStart=285, lineEnd=691, commit="2fe0ce01485d7b84dc109d3d4f24bdd908c0e7cf", sha1="75163bb4c510e3fa9f2347c5017561493d893691")})
public final class AArch64GHASHProcessBlocksOp
extends AArch64LIRInstruction {
    public static final LIRInstructionClass<AArch64GHASHProcessBlocksOp> TYPE = LIRInstructionClass.create(AArch64GHASHProcessBlocksOp.class);
    private static final int REGISTER_STRIDE = 7;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value stateValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value htblValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value originalDataValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value originalBlocksValue;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value dataValue;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value blocksValue;
    @LIRInstruction.Temp
    protected Value[] temps;

    public AArch64GHASHProcessBlocksOp(LIRGeneratorTool tool, AllocatableValue stateValue, AllocatableValue htblValue, AllocatableValue originalDataValue, AllocatableValue originalBlocksValue) {
        super((LIRInstructionClass<? extends AArch64LIRInstruction>)TYPE);
        this.stateValue = stateValue;
        this.htblValue = htblValue;
        this.originalDataValue = originalDataValue;
        this.originalBlocksValue = originalBlocksValue;
        this.dataValue = tool.newVariable(originalDataValue.getValueKind());
        this.blocksValue = tool.newVariable(originalBlocksValue.getValueKind());
        this.temps = (Value[])Arrays.stream(AArch64.simdRegisters.toArray()).map(Register::asValue).toArray(Value[]::new);
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
        assert (this.stateValue.getPlatformKind().equals(AArch64Kind.QWORD)) : this.stateValue;
        assert (this.htblValue.getPlatformKind().equals(AArch64Kind.QWORD)) : this.htblValue;
        assert (this.originalDataValue.getPlatformKind().equals(AArch64Kind.QWORD)) : this.originalDataValue;
        assert (this.originalBlocksValue.getPlatformKind().equals(AArch64Kind.DWORD)) : this.originalBlocksValue;
        Label labelSmall = new Label();
        Label labelDone = new Label();
        Register state = ValueUtil.asRegister((Value)this.stateValue);
        Register subkeyH = ValueUtil.asRegister((Value)this.htblValue);
        Register originalData = ValueUtil.asRegister((Value)this.originalDataValue);
        Register originalBlocks = ValueUtil.asRegister((Value)this.originalBlocksValue);
        Register data = ValueUtil.asRegister((Value)this.dataValue);
        Register blocks = ValueUtil.asRegister((Value)this.blocksValue);
        masm.mov(64, data, originalData);
        masm.mov(32, blocks, originalBlocks);
        masm.compare(32, blocks, 8);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.LT, labelSmall);
        AArch64GHASHProcessBlocksOp.ghashProcessBlocksWide(masm, state, subkeyH, data, blocks, 4);
        masm.compare(32, blocks, 0);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.LE, labelDone);
        masm.bind(labelSmall);
        AArch64GHASHProcessBlocksOp.generateGhashProcessBlocks(masm, state, subkeyH, data, blocks);
        masm.bind(labelDone);
    }

    private static void generateGhashProcessBlocks(AArch64MacroAssembler masm, Register state, Register subkeyH, Register data, Register blocks) {
        Register vzr = AArch64.v30;
        masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vzr, vzr, vzr);
        try (AArch64MacroAssembler.ScratchRegister sc = masm.getScratchRegister();){
            Register scratch = sc.getRegister();
            masm.mov(scratch, 135L);
            masm.neon.dupVG(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.DoubleWord, AArch64.v24, scratch);
        }
        masm.fldr(128, AArch64.v0, AArch64Address.createBaseRegisterOnlyAddress(128, state));
        masm.fldr(128, AArch64.v1, AArch64Address.createBaseRegisterOnlyAddress(128, subkeyH));
        masm.neon.rev64VV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v0, AArch64.v0);
        masm.neon.rbitVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64.v0, AArch64.v0);
        masm.neon.rev64VV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v1, AArch64.v1);
        masm.neon.rbitVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64.v1, AArch64.v1);
        masm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64.v4, AArch64.v1, AArch64.v1, 8);
        masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64.v4, AArch64.v4, AArch64.v1);
        Label labelGHASHLoop = new Label();
        masm.bind(labelGHASHLoop);
        masm.fldr(128, AArch64.v2, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, data, 16));
        masm.neon.rbitVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64.v2, AArch64.v2);
        masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64.v2, AArch64.v0, AArch64.v2);
        AArch64GHASHProcessBlocksOp.ghashMultiply(masm, AArch64.v5, AArch64.v7, AArch64.v1, AArch64.v2, AArch64.v4, AArch64.v6, AArch64.v3, AArch64.v2);
        AArch64GHASHProcessBlocksOp.ghashReduce(masm, AArch64.v0, AArch64.v5, AArch64.v7, AArch64.v24, vzr, AArch64.v3);
        masm.sub(32, blocks, blocks, 1);
        masm.cbnz(32, blocks, labelGHASHLoop);
        masm.neon.rev64VV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v0, AArch64.v0);
        masm.neon.rbitVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64.v0, AArch64.v0);
        masm.fstr(128, AArch64.v0, AArch64Address.createBaseRegisterOnlyAddress(128, state));
    }

    private static void ghashProcessBlocksWide(AArch64MacroAssembler masm, Register state, Register subkeyH, Register data, Register blocks, int unrolls) {
        int i;
        int i2;
        Register a1XORa0 = AArch64.v28;
        Register hPrime = AArch64.v29;
        Register vzr = AArch64.v30;
        Register p = AArch64.v31;
        masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, vzr, vzr, vzr);
        try (AArch64MacroAssembler.ScratchRegister sc = masm.getScratchRegister();){
            Register scratch = sc.getRegister();
            masm.mov(scratch, 135L);
            masm.neon.dupVG(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.DoubleWord, p, scratch);
        }
        masm.fldr(128, AArch64.v0, AArch64Address.createBaseRegisterOnlyAddress(128, state));
        masm.fldr(128, hPrime, AArch64Address.createBaseRegisterOnlyAddress(128, subkeyH));
        masm.neon.rev64VV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v0, AArch64.v0);
        masm.neon.rbitVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64.v0, AArch64.v0);
        masm.neon.rev64VV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, hPrime, hPrime);
        masm.neon.rbitVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, hPrime, hPrime);
        Label labelAlreadyCalculated = new Label();
        Label labelDone = new Label();
        try (AArch64MacroAssembler.ScratchRegister sc1 = masm.getScratchRegister();
             AArch64MacroAssembler.ScratchRegister sc2 = masm.getScratchRegister();){
            Register rscratch1 = sc1.getRegister();
            Register rscratch2 = sc2.getRegister();
            masm.ldp(64, rscratch1, rscratch2, AArch64Address.createImmediateAddress(64, AArch64Address.AddressingMode.IMMEDIATE_PAIR_SIGNED_SCALED, subkeyH, 16 * (unrolls - 1)));
            masm.orr(64, rscratch1, rscratch1, rscratch2);
            masm.cbnz(64, rscratch1, labelAlreadyCalculated);
        }
        masm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64.v6, hPrime, hPrime);
        for (i2 = 1; i2 < unrolls; ++i2) {
            masm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, a1XORa0, hPrime, hPrime, 8);
            masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, a1XORa0, a1XORa0, hPrime);
            AArch64GHASHProcessBlocksOp.ghashModmul(masm, AArch64.v6, AArch64.v5, AArch64.v4, AArch64.v6, hPrime, vzr, a1XORa0, p, AArch64.v1, AArch64.v3, AArch64.v2);
            masm.neon.rev64VV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v1, AArch64.v6);
            masm.neon.rbitVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64.v1, AArch64.v1);
            masm.fstr(128, AArch64.v1, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_SIGNED_UNSCALED, subkeyH, 16 * i2));
        }
        masm.jmp(labelDone);
        masm.bind(labelAlreadyCalculated);
        masm.fldr(128, AArch64.v6, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_SIGNED_UNSCALED, subkeyH, 16 * (unrolls - 1)));
        masm.neon.rev64VV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v6, AArch64.v6);
        masm.neon.rbitVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64.v6, AArch64.v6);
        masm.bind(labelDone);
        masm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, hPrime, AArch64.v6, AArch64.v6);
        for (i2 = 1; i2 < unrolls; ++i2) {
            int ofs = i2 * 7;
            masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64AESEncryptOp.asFloatRegister(AArch64.v0, ofs), AArch64AESEncryptOp.asFloatRegister(AArch64.v0, ofs), AArch64AESEncryptOp.asFloatRegister(AArch64.v0, ofs));
        }
        masm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, a1XORa0, hPrime, hPrime, 8);
        masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, a1XORa0, a1XORa0, hPrime);
        for (int ofs = 0; ofs < unrolls * 7; ofs += 7) {
            masm.fldr(128, AArch64AESEncryptOp.asFloatRegister(AArch64.v2, ofs), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, data, 16));
        }
        Label labelGHASHLoop = new Label();
        masm.bind(labelGHASHLoop);
        for (int ofs = 0; ofs < unrolls * 7; ofs += 7) {
            masm.neon.rbitVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64AESEncryptOp.asFloatRegister(AArch64.v2, ofs), AArch64AESEncryptOp.asFloatRegister(AArch64.v2, ofs));
            masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64AESEncryptOp.asFloatRegister(AArch64.v2, ofs), AArch64AESEncryptOp.asFloatRegister(AArch64.v0, ofs), AArch64AESEncryptOp.asFloatRegister(AArch64.v2, ofs));
        }
        new GHASHMultiplyGenerator(masm, unrolls, AArch64.v5, AArch64.v4, AArch64.v2, hPrime, a1XORa0, p, vzr, AArch64.v1, AArch64.v3, AArch64.v2).unroll();
        new GHASHReduceGenerator(masm, unrolls, AArch64.v0, AArch64.v5, AArch64.v4, p, vzr, data, AArch64.v2, AArch64.v3, true).unroll();
        masm.sub(32, blocks, blocks, unrolls);
        masm.compare(32, blocks, unrolls * 2);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.GE, labelGHASHLoop);
        for (i = 0; i < unrolls; ++i) {
            int ofs = i * 7;
            masm.fldr(128, hPrime, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_SIGNED_UNSCALED, subkeyH, 16 * (unrolls - i - 1)));
            masm.neon.rbitVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64AESEncryptOp.asFloatRegister(AArch64.v2, ofs), AArch64AESEncryptOp.asFloatRegister(AArch64.v2, ofs));
            masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64AESEncryptOp.asFloatRegister(AArch64.v2, ofs), AArch64AESEncryptOp.asFloatRegister(AArch64.v0, ofs), AArch64AESEncryptOp.asFloatRegister(AArch64.v2, ofs));
            masm.neon.rev64VV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, hPrime, hPrime);
            masm.neon.rbitVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, hPrime, hPrime);
            masm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, a1XORa0, hPrime, hPrime, 8);
            masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, a1XORa0, a1XORa0, hPrime);
            AArch64GHASHProcessBlocksOp.ghashModmul(masm, AArch64AESEncryptOp.asFloatRegister(AArch64.v0, ofs), AArch64AESEncryptOp.asFloatRegister(AArch64.v5, ofs), AArch64AESEncryptOp.asFloatRegister(AArch64.v4, ofs), AArch64AESEncryptOp.asFloatRegister(AArch64.v2, ofs), hPrime, vzr, a1XORa0, p, AArch64AESEncryptOp.asFloatRegister(AArch64.v1, ofs), AArch64AESEncryptOp.asFloatRegister(AArch64.v3, ofs), AArch64AESEncryptOp.asFloatRegister(AArch64.v2, ofs));
        }
        for (i = 0; i < unrolls - 1; ++i) {
            int ofs = i * 7;
            masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64.v0, AArch64.v0, AArch64AESEncryptOp.asFloatRegister(AArch64.v0, ofs + 7));
        }
        masm.sub(32, blocks, blocks, unrolls);
        masm.neon.rev64VV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v0, AArch64.v0);
        masm.neon.rbitVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64.v0, AArch64.v0);
        masm.fstr(128, AArch64.v0, AArch64Address.createBaseRegisterOnlyAddress(128, state));
    }

    private static void ghashModmul(AArch64MacroAssembler masm, Register result, Register resultLo, Register resultHi, Register b, Register a, Register vzr, Register a1XORa0, Register p, Register t1, Register t2, Register t3) {
        AArch64GHASHProcessBlocksOp.ghashMultiply(masm, resultLo, resultHi, a, b, a1XORa0, t1, t2, t3);
        AArch64GHASHProcessBlocksOp.ghashReduce(masm, result, resultLo, resultHi, p, vzr, t1);
    }

    private static void ghashReduce(AArch64MacroAssembler masm, Register result, Register lo, Register hi, Register p, Register vzr, Register t1) {
        Register t0 = result;
        masm.neon.pmull2VVV(AArch64ASIMDAssembler.ElementSize.DoubleWord, t0, hi, p);
        masm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, t1, t0, vzr, 8);
        masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, hi, hi, t1);
        masm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, t1, vzr, t0, 8);
        masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, lo, lo, t1);
        masm.neon.pmullVVV(AArch64ASIMDAssembler.ElementSize.DoubleWord, t0, hi, p);
        masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, result, lo, t0);
    }

    private static void ghashMultiply(AArch64MacroAssembler masm, Register resultLo, Register resultHi, Register a, Register b, Register a1XORa0, Register tmp1, Register tmp2, Register tmp3) {
        masm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, tmp1, b, b, 8);
        masm.neon.pmull2VVV(AArch64ASIMDAssembler.ElementSize.DoubleWord, resultHi, b, a);
        masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, tmp1, tmp1, b);
        masm.neon.pmullVVV(AArch64ASIMDAssembler.ElementSize.DoubleWord, resultLo, b, a);
        masm.neon.pmullVVV(AArch64ASIMDAssembler.ElementSize.DoubleWord, tmp2, tmp1, a1XORa0);
        masm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, tmp1, resultLo, resultHi, 8);
        masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, tmp3, resultHi, resultLo);
        masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, tmp2, tmp2, tmp1);
        masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, tmp2, tmp2, tmp3);
        masm.neon.insXX(AArch64ASIMDAssembler.ElementSize.DoubleWord, resultHi, 0, tmp2, 1);
        masm.neon.insXX(AArch64ASIMDAssembler.ElementSize.DoubleWord, resultLo, 1, tmp2, 0);
    }

    static final class GHASHReduceGenerator
    extends AArch64AESEncryptOp.KernelGenerator {
        private final AArch64MacroAssembler masm;
        private final Register result;
        private final Register lo;
        private final Register hi;
        private final Register p;
        private final Register vzr;
        private final Register dataPtr;
        private final Register data;
        private final Register t1;
        private final boolean once;

        GHASHReduceGenerator(AArch64MacroAssembler masm, int unrolls, Register result, Register lo, Register hi, Register p, Register vzr, Register dataPtr, Register data, Register t1, boolean once) {
            super(unrolls);
            this.masm = masm;
            this.result = result;
            this.lo = lo;
            this.hi = hi;
            this.p = p;
            this.vzr = vzr;
            this.dataPtr = dataPtr;
            this.data = data;
            this.t1 = t1;
            this.once = once;
        }

        @Override
        public void generate(int index) {
            Register t0 = this.result;
            switch (index) {
                case 0: {
                    this.masm.neon.pmull2VVV(AArch64ASIMDAssembler.ElementSize.DoubleWord, t0, this.hi, this.p);
                    break;
                }
                case 1: {
                    this.masm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, this.t1, t0, this.vzr, 8);
                    break;
                }
                case 2: {
                    this.masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, this.hi, this.hi, this.t1);
                    break;
                }
                case 3: {
                    this.masm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, this.t1, this.vzr, t0, 8);
                    break;
                }
                case 4: {
                    this.masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, this.lo, this.lo, this.t1);
                    break;
                }
                case 5: {
                    this.masm.neon.pmullVVV(AArch64ASIMDAssembler.ElementSize.DoubleWord, t0, this.hi, this.p);
                    break;
                }
                case 6: {
                    this.masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, this.result, this.lo, t0);
                    break;
                }
                default: {
                    throw GraalError.shouldNotReachHere();
                }
            }
            if (!Register.None.equals((Object)this.data) && this.once) {
                assert (this.length() >= this.unrolls) : "not enough room for interleaved loads";
                if (index < this.unrolls) {
                    this.masm.fldr(128, AArch64AESEncryptOp.asFloatRegister(this.data, index * 7), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, this.dataPtr, 16));
                }
            }
        }

        @Override
        public AArch64AESEncryptOp.KernelGenerator next() {
            return new GHASHReduceGenerator(this.masm, this.unrolls, AArch64AESEncryptOp.asFloatRegister(this.result, 7), AArch64AESEncryptOp.asFloatRegister(this.lo, 7), AArch64AESEncryptOp.asFloatRegister(this.hi, 7), this.p, this.vzr, this.dataPtr, this.data, AArch64AESEncryptOp.asFloatRegister(this.t1, 7), false);
        }

        @Override
        public int length() {
            return 7;
        }
    }

    static final class GHASHMultiplyGenerator
    extends AArch64AESEncryptOp.KernelGenerator {
        private final AArch64MacroAssembler masm;
        private final Register resultLo;
        private final Register resultHi;
        private final Register b;
        private final Register a;
        private final Register vzr;
        private final Register a1XORa0;
        private final Register p;
        private final Register tmp1;
        private final Register tmp2;
        private final Register tmp3;

        GHASHMultiplyGenerator(AArch64MacroAssembler masm, int unrolls, Register resultLo, Register resultHi, Register b, Register a, Register a1XORa0, Register p, Register vzr, Register tmp1, Register tmp2, Register tmp3) {
            super(unrolls);
            this.masm = masm;
            this.resultLo = resultLo;
            this.resultHi = resultHi;
            this.b = b;
            this.a = a;
            this.a1XORa0 = a1XORa0;
            this.p = p;
            this.vzr = vzr;
            this.tmp1 = tmp1;
            this.tmp2 = tmp2;
            this.tmp3 = tmp3;
        }

        @Override
        public void generate(int index) {
            switch (index) {
                case 0: {
                    this.masm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, this.tmp1, this.b, this.b, 8);
                    break;
                }
                case 1: {
                    this.masm.neon.pmull2VVV(AArch64ASIMDAssembler.ElementSize.DoubleWord, this.resultHi, this.b, this.a);
                    break;
                }
                case 2: {
                    this.masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, this.tmp1, this.tmp1, this.b);
                    break;
                }
                case 3: {
                    this.masm.neon.pmullVVV(AArch64ASIMDAssembler.ElementSize.DoubleWord, this.resultLo, this.b, this.a);
                    break;
                }
                case 4: {
                    this.masm.neon.pmullVVV(AArch64ASIMDAssembler.ElementSize.DoubleWord, this.tmp2, this.tmp1, this.a1XORa0);
                    break;
                }
                case 5: {
                    this.masm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, this.tmp1, this.resultLo, this.resultHi, 8);
                    break;
                }
                case 6: {
                    this.masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, this.tmp3, this.resultHi, this.resultLo);
                    break;
                }
                case 7: {
                    this.masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, this.tmp2, this.tmp2, this.tmp1);
                    break;
                }
                case 8: {
                    this.masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, this.tmp2, this.tmp2, this.tmp3);
                    break;
                }
                case 9: {
                    this.masm.neon.insXX(AArch64ASIMDAssembler.ElementSize.DoubleWord, this.resultHi, 0, this.tmp2, 1);
                    break;
                }
                case 10: {
                    this.masm.neon.insXX(AArch64ASIMDAssembler.ElementSize.DoubleWord, this.resultLo, 1, this.tmp2, 0);
                    break;
                }
                default: {
                    throw GraalError.shouldNotReachHere();
                }
            }
        }

        @Override
        public AArch64AESEncryptOp.KernelGenerator next() {
            return new GHASHMultiplyGenerator(this.masm, this.unrolls, AArch64AESEncryptOp.asFloatRegister(this.resultLo, 7), AArch64AESEncryptOp.asFloatRegister(this.resultHi, 7), AArch64AESEncryptOp.asFloatRegister(this.b, 7), this.a, this.a1XORa0, this.p, this.vzr, AArch64AESEncryptOp.asFloatRegister(this.tmp1, 7), AArch64AESEncryptOp.asFloatRegister(this.tmp2, 7), AArch64AESEncryptOp.asFloatRegister(this.tmp3, 7));
        }

        @Override
        public int length() {
            return 11;
        }
    }
}

