/*
 * Decompiled with CFR 0.152.
 */
package org.graalvm.compiler.lir.amd64;

import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.CodeUtil;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.PlatformKind;
import jdk.vm.ci.meta.Value;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address;
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
import org.graalvm.compiler.asm.amd64.AVXKind;
import org.graalvm.compiler.core.common.LIRKind;
import org.graalvm.compiler.core.common.Stride;
import org.graalvm.compiler.lir.LIRInstruction;
import org.graalvm.compiler.lir.LIRInstructionClass;
import org.graalvm.compiler.lir.Opcode;
import org.graalvm.compiler.lir.amd64.AMD64ComplexVectorOp;
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;

@Opcode(value="AMD64_STRING_COMPRESS")
public final class AMD64StringUTF16CompressOp
extends AMD64ComplexVectorOp {
    public static final LIRInstructionClass<AMD64StringUTF16CompressOp> TYPE = LIRInstructionClass.create(AMD64StringUTF16CompressOp.class);
    private final int useAVX3Threshold;
    @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
    private Value rres;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value rsrc;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value rdst;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value rlen;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rsrcTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rdstTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rlenTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value vtmp1;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value vtmp2;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value vtmp3;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value vtmp4;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rtmp5;

    public AMD64StringUTF16CompressOp(LIRGeneratorTool tool, int useAVX3Threshold, Value res, Value src, Value dst, Value len) {
        super(TYPE, tool, null, AMD64StringUTF16CompressOp.supportsAVX512VLBW(tool.target(), null) && AMD64StringUTF16CompressOp.supports(tool.target(), null, AMD64.CPUFeature.BMI2) ? AVXKind.AVXSize.ZMM : AVXKind.AVXSize.XMM);
        assert (CodeUtil.isPowerOf2((int)useAVX3Threshold)) : "AVX3Threshold must be power of 2";
        this.useAVX3Threshold = useAVX3Threshold;
        assert (ValueUtil.asRegister((Value)src).equals((Object)AMD64.rsi));
        assert (ValueUtil.asRegister((Value)dst).equals((Object)AMD64.rdi));
        assert (ValueUtil.asRegister((Value)len).equals((Object)AMD64.rdx));
        assert (ValueUtil.asRegister((Value)res).equals((Object)AMD64.rax));
        this.rres = res;
        this.rsrcTemp = this.rsrc = src;
        this.rdstTemp = this.rdst = dst;
        this.rlenTemp = this.rlen = len;
        LIRKind vkind = LIRKind.value((PlatformKind)this.getVectorKind(JavaKind.Byte));
        this.vtmp1 = tool.newVariable(vkind);
        this.vtmp2 = tool.newVariable(vkind);
        this.vtmp3 = tool.newVariable(vkind);
        this.vtmp4 = tool.newVariable(vkind);
        this.rtmp5 = tool.newVariable(LIRKind.value((PlatformKind)AMD64Kind.DWORD));
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
        Register res = ValueUtil.asRegister((Value)this.rres);
        Register src = ValueUtil.asRegister((Value)this.rsrc);
        Register dst = ValueUtil.asRegister((Value)this.rdst);
        Register len = ValueUtil.asRegister((Value)this.rlen);
        Register tmp1 = ValueUtil.asRegister((Value)this.vtmp1);
        Register tmp2 = ValueUtil.asRegister((Value)this.vtmp2);
        Register tmp3 = ValueUtil.asRegister((Value)this.vtmp3);
        Register tmp4 = ValueUtil.asRegister((Value)this.vtmp4);
        Register tmp5 = ValueUtil.asRegister((Value)this.rtmp5);
        this.charArrayCompress(masm, src, dst, len, tmp1, tmp2, tmp3, tmp4, tmp5, res);
    }

    private void charArrayCompress(AMD64MacroAssembler masm, Register src, Register dst, Register len, Register tmp1Reg, Register tmp2Reg, Register tmp3Reg, Register tmp4Reg, Register tmp5, Register result) {
        Label labelCopy32Loop;
        assert (tmp1Reg.getRegisterCategory().equals((Object)AMD64.XMM));
        assert (tmp2Reg.getRegisterCategory().equals((Object)AMD64.XMM));
        assert (tmp3Reg.getRegisterCategory().equals((Object)AMD64.XMM));
        assert (tmp4Reg.getRegisterCategory().equals((Object)AMD64.XMM));
        Label labelCopyCharsLoop = new Label();
        Label labelReturnLength = new Label();
        Label labelReturnZero = new Label();
        Label labelDone = new Label();
        assert (len.number != result.number);
        masm.push(len);
        if (this.useAVX3Threshold == 0 && this.supportsAVX512VLBWAndZMM() && this.supportsBMI2()) {
            labelCopy32Loop = new Label();
            Label labelCopyLoopTail = new Label();
            Label labelBelowThreshold = new Label();
            Label labelPostAlignment = new Label();
            masm.testlAndJcc(len, -32, AMD64Assembler.ConditionFlag.Zero, labelBelowThreshold, false);
            masm.movl(result, 255);
            masm.evpbroadcastw(tmp2Reg, result);
            masm.testlAndJcc(len, -64, AMD64Assembler.ConditionFlag.Zero, labelPostAlignment, false);
            masm.movl(tmp5, dst);
            masm.andl(tmp5, 31);
            masm.negl(tmp5);
            masm.andl(tmp5, 31);
            masm.testlAndJcc(tmp5, tmp5, AMD64Assembler.ConditionFlag.Zero, labelPostAlignment, false);
            masm.movl(result, -1);
            masm.shlxl(result, result, tmp5);
            masm.notl(result);
            masm.kmovd(AMD64.k3, result);
            masm.evmovdqu16(tmp1Reg, AMD64.k3, new AMD64Address(src));
            masm.evpcmpuw(AMD64.k2, AMD64.k3, tmp1Reg, tmp2Reg, 2);
            masm.ktestd(AMD64.k2, AMD64.k3);
            masm.jcc(AMD64Assembler.ConditionFlag.CarryClear, labelReturnZero);
            masm.evpmovwb(new AMD64Address(dst), AMD64.k3, tmp1Reg);
            masm.addq(src, tmp5);
            masm.addq(src, tmp5);
            masm.addq(dst, tmp5);
            masm.subl(len, tmp5);
            masm.bind(labelPostAlignment);
            masm.movl(tmp5, len);
            masm.andl(tmp5, 31);
            masm.andlAndJcc(len, -32, AMD64Assembler.ConditionFlag.Zero, labelCopyLoopTail, false);
            masm.leaq(src, new AMD64Address(src, len, Stride.S2));
            masm.leaq(dst, new AMD64Address(dst, len, Stride.S1));
            masm.negq(len);
            masm.bind(labelCopy32Loop);
            masm.evmovdqu16(tmp1Reg, new AMD64Address(src, len, Stride.S2));
            masm.evpcmpuw(AMD64.k2, tmp1Reg, tmp2Reg, 2);
            masm.kortestd(AMD64.k2, AMD64.k2);
            masm.jcc(AMD64Assembler.ConditionFlag.CarryClear, labelReturnZero);
            masm.evpmovwb(new AMD64Address(dst, len, Stride.S1), tmp1Reg);
            masm.addqAndJcc(len, 32, AMD64Assembler.ConditionFlag.NotZero, labelCopy32Loop, false);
            masm.bind(labelCopyLoopTail);
            masm.testlAndJcc(tmp5, tmp5, AMD64Assembler.ConditionFlag.Zero, labelReturnLength, false);
            masm.movl(len, tmp5);
            masm.movl(result, -1);
            masm.shlxl(result, result, len);
            masm.notl(result);
            masm.kmovd(AMD64.k3, result);
            masm.evmovdqu16(tmp1Reg, AMD64.k3, new AMD64Address(src));
            masm.evpcmpuw(AMD64.k2, AMD64.k3, tmp1Reg, tmp2Reg, 2);
            masm.ktestd(AMD64.k2, AMD64.k3);
            masm.jcc(AMD64Assembler.ConditionFlag.CarryClear, labelReturnZero);
            masm.evpmovwb(new AMD64Address(dst), AMD64.k3, tmp1Reg);
            masm.jmp(labelReturnLength);
            masm.bind(labelBelowThreshold);
        }
        if (masm.supports(AMD64.CPUFeature.SSE4_2)) {
            labelCopy32Loop = new Label();
            Label labelCopy16 = new Label();
            Label labelCopyTail = new Label();
            masm.movl(result, len);
            masm.movl(tmp5, -16711936);
            masm.andl(len, -16);
            masm.andl(result, 15);
            masm.testlAndJcc(len, len, AMD64Assembler.ConditionFlag.Zero, labelCopy16, false);
            masm.movdl(tmp1Reg, tmp5);
            masm.pshufd(tmp1Reg, tmp1Reg, 0);
            masm.pxor(tmp4Reg, tmp4Reg);
            masm.leaq(src, new AMD64Address(src, len, Stride.S2));
            masm.leaq(dst, new AMD64Address(dst, len, Stride.S1));
            masm.negq(len);
            masm.bind(labelCopy32Loop);
            masm.movdqu(tmp2Reg, new AMD64Address(src, len, Stride.S2));
            masm.por(tmp4Reg, tmp2Reg);
            masm.movdqu(tmp3Reg, new AMD64Address(src, len, Stride.S2, 16));
            masm.por(tmp4Reg, tmp3Reg);
            masm.ptest(tmp4Reg, tmp1Reg);
            masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelReturnZero);
            masm.packuswb(tmp2Reg, tmp3Reg);
            masm.movdqu(new AMD64Address(dst, len, Stride.S1), tmp2Reg);
            masm.addqAndJcc(len, 16, AMD64Assembler.ConditionFlag.NotZero, labelCopy32Loop, false);
            masm.bind(labelCopy16);
            masm.movl(len, result);
            masm.andl(len, -8);
            masm.andl(result, 7);
            masm.testlAndJcc(len, len, AMD64Assembler.ConditionFlag.Zero, labelCopyTail, true);
            masm.movdl(tmp1Reg, tmp5);
            masm.pshufd(tmp1Reg, tmp1Reg, 0);
            masm.pxor(tmp3Reg, tmp3Reg);
            masm.movdqu(tmp2Reg, new AMD64Address(src));
            masm.ptest(tmp2Reg, tmp1Reg);
            masm.jccb(AMD64Assembler.ConditionFlag.NotZero, labelReturnZero);
            masm.packuswb(tmp2Reg, tmp3Reg);
            masm.movq(new AMD64Address(dst), tmp2Reg);
            masm.addq(src, 16);
            masm.addq(dst, 8);
            masm.bind(labelCopyTail);
            masm.movl(len, result);
        }
        masm.testlAndJcc(len, len, AMD64Assembler.ConditionFlag.Zero, labelReturnLength, true);
        masm.leaq(src, new AMD64Address(src, len, Stride.S2));
        masm.leaq(dst, new AMD64Address(dst, len, Stride.S1));
        masm.negq(len);
        masm.bind(labelCopyCharsLoop);
        masm.movzwl(result, new AMD64Address(src, len, Stride.S2));
        masm.testlAndJcc(result, 65280, AMD64Assembler.ConditionFlag.NotZero, labelReturnZero, true);
        masm.movb(new AMD64Address(dst, len, Stride.S1), result);
        masm.incqAndJcc(len, AMD64Assembler.ConditionFlag.NotZero, labelCopyCharsLoop, false);
        masm.bind(labelReturnLength);
        masm.pop(result);
        masm.jmpb(labelDone);
        masm.bind(labelReturnZero);
        masm.xorl(result, result);
        masm.addq(AMD64.rsp, 8);
        masm.bind(labelDone);
    }
}

