/*
 * Decompiled with CFR 0.152.
 */
package org.graalvm.compiler.lir.amd64;

import java.util.Objects;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.JavaConstant;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.PlatformKind;
import jdk.vm.ci.meta.Value;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address;
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
import org.graalvm.compiler.asm.amd64.AMD64BaseAssembler;
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
import org.graalvm.compiler.asm.amd64.AVXKind;
import org.graalvm.compiler.core.common.LIRKind;
import org.graalvm.compiler.core.common.NumUtil;
import org.graalvm.compiler.lir.ConstantValue;
import org.graalvm.compiler.lir.LIRInstruction;
import org.graalvm.compiler.lir.LIRInstructionClass;
import org.graalvm.compiler.lir.Opcode;
import org.graalvm.compiler.lir.amd64.AMD64LIRInstruction;
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;

@Opcode(value="AMD64_ARRAY_INDEX_OF")
public final class AMD64ArrayIndexOfOp
extends AMD64LIRInstruction {
    public static final LIRInstructionClass<AMD64ArrayIndexOfOp> TYPE = LIRInstructionClass.create(AMD64ArrayIndexOfOp.class);
    private final JavaKind valueKind;
    private final int nValues;
    private final boolean findTwoConsecutive;
    private final AMD64Kind vectorKind;
    private final int arrayBaseOffset;
    private final AMD64Address.Scale arrayIndexScale;
    @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
    protected Value resultValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    protected Value arrayPtrValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    protected Value arrayLengthValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    protected Value fromIndexValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK, LIRInstruction.OperandFlag.CONST})
    protected Value searchValue1;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK, LIRInstruction.OperandFlag.CONST, LIRInstruction.OperandFlag.ILLEGAL})
    protected Value searchValue2;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK, LIRInstruction.OperandFlag.CONST, LIRInstruction.OperandFlag.ILLEGAL})
    protected Value searchValue3;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK, LIRInstruction.OperandFlag.CONST, LIRInstruction.OperandFlag.ILLEGAL})
    protected Value searchValue4;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    protected Value comparisonResult1;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    protected Value comparisonResult2;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    protected Value vectorCompareVal1;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    protected Value vectorCompareVal2;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    protected Value vectorCompareVal3;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    protected Value vectorCompareVal4;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    protected Value vectorArray1;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    protected Value vectorArray2;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    protected Value vectorArray3;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    protected Value vectorArray4;

    public AMD64ArrayIndexOfOp(JavaKind arrayKind, JavaKind valueKind, boolean findTwoConsecutive, int maxVectorSize, LIRGeneratorTool tool, Value result, Value arrayPtr, Value arrayLength, Value fromIndex, Value ... searchValues) {
        super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
        this.valueKind = valueKind;
        this.arrayBaseOffset = tool.getProviders().getMetaAccess().getArrayBaseOffset(arrayKind);
        this.arrayIndexScale = Objects.requireNonNull(AMD64Address.Scale.fromInt(tool.getProviders().getMetaAccess().getArrayIndexScale(valueKind)));
        this.findTwoConsecutive = findTwoConsecutive;
        assert (0 < searchValues.length && searchValues.length <= 4);
        assert (AMD64ArrayIndexOfOp.byteMode(valueKind) || AMD64ArrayIndexOfOp.charMode(valueKind));
        assert (AMD64ArrayIndexOfOp.supports(tool, AMD64.CPUFeature.SSE2) || AMD64ArrayIndexOfOp.supports(tool, AMD64.CPUFeature.AVX) || AMD64ArrayIndexOfOp.supportsAVX2(tool));
        this.nValues = searchValues.length;
        assert (!findTwoConsecutive || this.nValues == 1);
        this.resultValue = result;
        this.arrayPtrValue = arrayPtr;
        this.arrayLengthValue = arrayLength;
        this.fromIndexValue = fromIndex;
        this.searchValue1 = searchValues[0];
        this.searchValue2 = this.nValues > 1 ? searchValues[1] : Value.ILLEGAL;
        this.searchValue3 = this.nValues > 2 ? searchValues[2] : Value.ILLEGAL;
        this.searchValue4 = this.nValues > 3 ? searchValues[3] : Value.ILLEGAL;
        this.comparisonResult1 = tool.newVariable(LIRKind.value(tool.target().arch.getWordKind()));
        AllocatableValue allocatableValue = this.comparisonResult2 = findTwoConsecutive ? tool.newVariable(LIRKind.value(tool.target().arch.getWordKind())) : Value.ILLEGAL;
        this.vectorKind = AMD64ArrayIndexOfOp.supportsAVX2(tool) && (maxVectorSize < 0 || maxVectorSize >= 32) ? (AMD64ArrayIndexOfOp.byteMode(valueKind) ? AMD64Kind.V256_BYTE : AMD64Kind.V256_WORD) : (AMD64ArrayIndexOfOp.byteMode(valueKind) ? AMD64Kind.V128_BYTE : AMD64Kind.V128_WORD);
        this.vectorCompareVal1 = tool.newVariable(LIRKind.value((PlatformKind)this.vectorKind));
        this.vectorCompareVal2 = this.nValues > 1 ? tool.newVariable(LIRKind.value((PlatformKind)this.vectorKind)) : Value.ILLEGAL;
        this.vectorCompareVal3 = this.nValues > 2 ? tool.newVariable(LIRKind.value((PlatformKind)this.vectorKind)) : Value.ILLEGAL;
        this.vectorCompareVal4 = this.nValues > 3 ? tool.newVariable(LIRKind.value((PlatformKind)this.vectorKind)) : Value.ILLEGAL;
        this.vectorArray1 = tool.newVariable(LIRKind.value((PlatformKind)this.vectorKind));
        this.vectorArray2 = tool.newVariable(LIRKind.value((PlatformKind)this.vectorKind));
        this.vectorArray3 = tool.newVariable(LIRKind.value((PlatformKind)this.vectorKind));
        this.vectorArray4 = tool.newVariable(LIRKind.value((PlatformKind)this.vectorKind));
    }

    private static boolean byteMode(JavaKind kind) {
        return kind == JavaKind.Byte;
    }

    private static boolean charMode(JavaKind kind) {
        return kind == JavaKind.Char;
    }

    private JavaKind getComparisonKind() {
        return this.findTwoConsecutive ? (AMD64ArrayIndexOfOp.byteMode(this.valueKind) ? JavaKind.Char : JavaKind.Int) : this.valueKind;
    }

    private AVXKind.AVXSize getVectorSize() {
        return AVXKind.getDataSize(this.vectorKind);
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler asm) {
        int imm;
        int i;
        int nVectors = this.nValues == 1 ? 4 : (this.nValues == 2 ? 2 : 1);
        Register arrayPtr = ValueUtil.asRegister((Value)this.arrayPtrValue);
        Register arrayLength = ValueUtil.asRegister((Value)this.arrayLengthValue);
        Register fromIndex = ValueUtil.asRegister((Value)this.fromIndexValue);
        Register index = ValueUtil.asRegister((Value)this.resultValue);
        Value[] searchValue = new Value[]{this.nValues > 0 ? this.searchValue1 : null, this.nValues > 1 ? this.searchValue2 : null, this.nValues > 2 ? this.searchValue3 : null, this.nValues > 3 ? this.searchValue4 : null};
        Register[] vecCmp = new Register[]{this.nValues > 0 ? ValueUtil.asRegister((Value)this.vectorCompareVal1) : null, this.nValues > 1 ? ValueUtil.asRegister((Value)this.vectorCompareVal2) : null, this.nValues > 2 ? ValueUtil.asRegister((Value)this.vectorCompareVal3) : null, this.nValues > 3 ? ValueUtil.asRegister((Value)this.vectorCompareVal4) : null};
        Register[] vecArray = new Register[]{ValueUtil.asRegister((Value)this.vectorArray1), ValueUtil.asRegister((Value)this.vectorArray2), ValueUtil.asRegister((Value)this.vectorArray3), ValueUtil.asRegister((Value)this.vectorArray4)};
        Register[] cmpResult = new Register[]{ValueUtil.asRegister((Value)this.comparisonResult1), this.findTwoConsecutive ? ValueUtil.asRegister((Value)this.comparisonResult2) : null};
        Label ret = new Label();
        Label bulkVectorLoop = new Label();
        Label singleVectorLoop = new Label();
        Label[] vectorFound = new Label[]{new Label(), new Label(), new Label(), new Label()};
        Label runVectorized = new Label();
        Label elementWiseLoop = new Label();
        Label elementWiseFound = new Label();
        Label elementWiseNotFound = new Label();
        Label skipBulkVectorLoop = new Label();
        int vectorSize = this.getVectorSize().getBytes() / this.valueKind.getByteCount();
        int bulkSize = vectorSize * nVectors;
        JavaKind vectorCompareKind = this.valueKind;
        if (this.findTwoConsecutive) {
            bulkSize /= 2;
            vectorCompareKind = AMD64ArrayIndexOfOp.byteMode(this.valueKind) ? JavaKind.Char : JavaKind.Int;
        }
        asm.leaq(index, new AMD64Address(fromIndex, vectorSize + (this.findTwoConsecutive ? 1 : 0)));
        asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.LessEqual, runVectorized, true);
        asm.subq(index, vectorSize);
        asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.GreaterEqual, elementWiseNotFound, true);
        asm.bind(elementWiseLoop);
        AMD64BaseAssembler.OperandSize cmpSize = AMD64ArrayIndexOfOp.getOpSize(this.getComparisonKind());
        AMD64Address arrayAddr = new AMD64Address(arrayPtr, index, this.arrayIndexScale, this.arrayBaseOffset - (this.findTwoConsecutive ? this.valueKind.getByteCount() : 0));
        boolean valuesOnStack = this.searchValuesOnStack(searchValue);
        if (valuesOnStack) {
            (cmpSize == AMD64BaseAssembler.OperandSize.BYTE ? AMD64Assembler.AMD64RMOp.MOVB : AMD64Assembler.AMD64RMOp.MOV).emit((AMD64Assembler)asm, cmpSize, cmpResult[0], arrayAddr);
            for (i = 0; i < this.nValues; ++i) {
                if (AMD64ArrayIndexOfOp.isConstant(searchValue[i])) {
                    imm = AMD64ArrayIndexOfOp.asConstant(searchValue[i]).asInt();
                    AMD64Assembler.AMD64BinaryArithmetic.CMP.getMIOpcode(cmpSize, NumUtil.isByte(imm)).emit((AMD64Assembler)asm, cmpSize, cmpResult[0], imm);
                } else if (ValueUtil.isStackSlot((Value)searchValue[i])) {
                    AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(cmpSize).emit((AMD64Assembler)asm, cmpSize, cmpResult[0], (AMD64Address)crb.asAddress(searchValue[i]));
                } else {
                    AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(cmpSize).emit((AMD64Assembler)asm, cmpSize, cmpResult[0], ValueUtil.asRegister((Value)searchValue[i]));
                }
                asm.jccb(AMD64Assembler.ConditionFlag.Equal, elementWiseFound);
            }
        } else {
            for (i = 0; i < this.nValues; ++i) {
                if (AMD64ArrayIndexOfOp.isConstant(searchValue[i])) {
                    imm = AMD64ArrayIndexOfOp.asConstant(searchValue[i]).asInt();
                    AMD64Assembler.AMD64BinaryArithmetic.CMP.getMIOpcode(cmpSize, NumUtil.isByte(imm)).emit((AMD64Assembler)asm, cmpSize, arrayAddr, imm);
                } else {
                    AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(cmpSize).emit((AMD64Assembler)asm, cmpSize, ValueUtil.asRegister((Value)searchValue[i]), arrayAddr);
                }
                asm.jccb(AMD64Assembler.ConditionFlag.Equal, elementWiseFound);
            }
        }
        asm.incrementq(index, 1);
        asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.Less, elementWiseLoop, true);
        asm.bind(elementWiseNotFound);
        asm.xorq(index, index);
        if (this.findTwoConsecutive) {
            asm.bind(elementWiseFound);
            asm.decrementq(index, 1);
        } else {
            asm.decrementq(index, 1);
            asm.bind(elementWiseFound);
        }
        asm.jmp(ret);
        asm.bind(runVectorized);
        for (i = 0; i < this.nValues; ++i) {
            this.broadcastSearchValue(crb, asm, vecCmp[i], searchValue[i], cmpResult[0], vecArray[0]);
        }
        this.emitVectorCompare(asm, vectorCompareKind, this.findTwoConsecutive ? 2 : 1, arrayPtr, index, vecCmp, vecArray, cmpResult, vectorFound, false, false);
        asm.leaq(cmpResult[0], new AMD64Address(arrayPtr, this.arrayBaseOffset));
        if (AMD64ArrayIndexOfOp.charMode(this.valueKind)) {
            asm.shrq(cmpResult[0], 1);
        }
        asm.addq(index, cmpResult[0]);
        asm.andq(index, ~(vectorSize - 1));
        asm.subq(index, cmpResult[0]);
        asm.addq(index, bulkSize);
        asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.Greater, skipBulkVectorLoop, true);
        AMD64ArrayIndexOfOp.emitAlign(crb, asm);
        asm.bind(bulkVectorLoop);
        this.emitVectorCompare(asm, vectorCompareKind, nVectors, arrayPtr, index, vecCmp, vecArray, cmpResult, vectorFound, false, !this.findTwoConsecutive);
        asm.addq(index, bulkSize);
        asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.LessEqual, bulkVectorLoop, true);
        asm.bind(skipBulkVectorLoop);
        if (this.findTwoConsecutive && nVectors == 2 || nVectors == 1) {
            asm.movq(index, arrayLength);
            this.emitVectorCompare(asm, vectorCompareKind, this.findTwoConsecutive ? 2 : 1, arrayPtr, index, vecCmp, vecArray, cmpResult, vectorFound, true, false);
        } else {
            asm.subq(index, bulkSize);
            AMD64ArrayIndexOfOp.emitAlign(crb, asm);
            asm.bind(singleVectorLoop);
            asm.addq(index, vectorSize);
            asm.cmpq(index, arrayLength);
            asm.cmovq(AMD64Assembler.ConditionFlag.Greater, index, arrayLength);
            this.emitVectorCompare(asm, vectorCompareKind, this.findTwoConsecutive ? 2 : 1, arrayPtr, index, vecCmp, vecArray, cmpResult, vectorFound, true, false);
            asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.Less, singleVectorLoop, true);
        }
        asm.movl(index, -1);
        asm.jmpb(ret);
        if (this.findTwoConsecutive) {
            Label vectorFound2Done = new Label();
            asm.bind(vectorFound[2]);
            asm.subq(index, this.getResultIndexDelta(2));
            asm.jmpb(vectorFound2Done);
            asm.bind(vectorFound[0]);
            asm.subq(index, this.getResultIndexDelta(0));
            asm.bind(vectorFound2Done);
            asm.bsfq(cmpResult[0], cmpResult[0]);
            if (AMD64ArrayIndexOfOp.charMode(this.valueKind)) {
                asm.shrl(cmpResult[0], 1);
            }
            asm.addq(index, cmpResult[0]);
            asm.jmpb(ret);
            Label minResult = new Label();
            Label minResultDone = new Label();
            if (nVectors > 2) {
                asm.bind(vectorFound[3]);
                asm.subq(index, this.getResultIndexDelta(3));
                asm.jmpb(minResult);
            }
            asm.bind(vectorFound[1]);
            asm.subq(index, this.getResultIndexDelta(1));
            asm.bind(minResult);
            asm.bsfq(cmpResult[1], cmpResult[1]);
            asm.testqAndJcc(cmpResult[0], cmpResult[0], AMD64Assembler.ConditionFlag.Zero, minResultDone, true);
            asm.bsfq(cmpResult[0], cmpResult[0]);
            asm.addq(cmpResult[0], this.valueKind.getByteCount());
            asm.cmpq(cmpResult[1], cmpResult[0]);
            asm.cmovq(AMD64Assembler.ConditionFlag.Greater, cmpResult[1], cmpResult[0]);
            asm.bind(minResultDone);
            if (AMD64ArrayIndexOfOp.charMode(this.valueKind)) {
                asm.shrl(cmpResult[1], 1);
            }
            asm.addq(index, cmpResult[1]);
        } else {
            Label end = new Label();
            for (int i2 = 0; i2 < nVectors; ++i2) {
                asm.bind(vectorFound[i2]);
                asm.subq(index, this.getResultIndexDelta(i2));
                if (i2 >= nVectors - 1) continue;
                asm.jmpb(end);
            }
            asm.bind(end);
            asm.bsfq(cmpResult[0], cmpResult[0]);
            if (AMD64ArrayIndexOfOp.charMode(this.valueKind)) {
                asm.shrl(cmpResult[0], 1);
            }
            asm.addq(index, cmpResult[0]);
        }
        asm.bind(ret);
    }

    private boolean searchValuesOnStack(Value[] searchValue) {
        for (int i = 0; i < this.nValues; ++i) {
            if (!ValueUtil.isStackSlot((Value)searchValue[i])) continue;
            return true;
        }
        return false;
    }

    private int getResultIndexDelta(int i) {
        return ((this.findTwoConsecutive ? i / 2 : i) + 1) * (this.getVectorSize().getBytes() / this.valueKind.getByteCount()) + (this.findTwoConsecutive ? i & 1 : 0);
    }

    private int getVectorOffset(int i) {
        return this.arrayBaseOffset - this.getResultIndexDelta(i) * this.valueKind.getByteCount();
    }

    private void broadcastSearchValue(CompilationResultBuilder crb, AMD64MacroAssembler asm, Register dst, Value srcVal, Register tmpReg, Register tmpVector) {
        Register src = AMD64ArrayIndexOfOp.asRegOrTmpReg(crb, asm, srcVal, tmpReg);
        if (asm.supports(AMD64.CPUFeature.AVX)) {
            AMD64Assembler.VexMoveOp.VMOVD.emit((AMD64Assembler)asm, AVXKind.AVXSize.DWORD, dst, src);
        } else {
            asm.movdl(dst, src);
        }
        AMD64ArrayIndexOfOp.emitBroadcast(asm, this.getComparisonKind(), dst, tmpVector, this.getVectorSize());
    }

    private static boolean isConstant(Value val) {
        assert (!(val instanceof ConstantValue) || ((ConstantValue)val).isJavaConstant());
        return val instanceof ConstantValue;
    }

    private static JavaConstant asConstant(Value val) {
        return ((ConstantValue)val).getJavaConstant();
    }

    private static Register asRegOrTmpReg(CompilationResultBuilder crb, AMD64MacroAssembler asm, Value val, Register tmpReg) {
        if (ValueUtil.isRegister((Value)val)) {
            return ValueUtil.asRegister((Value)val);
        }
        if (ValueUtil.isStackSlot((Value)val)) {
            asm.movl(tmpReg, (AMD64Address)crb.asAddress(val));
            return tmpReg;
        }
        assert (AMD64ArrayIndexOfOp.isConstant(val));
        asm.movl(tmpReg, AMD64ArrayIndexOfOp.asConstant(val).asInt());
        return tmpReg;
    }

    private static void emitAlign(CompilationResultBuilder crb, AMD64MacroAssembler asm) {
        asm.align(crb.target.wordSize * 2);
    }

    private static void emitBroadcast(AMD64MacroAssembler asm, JavaKind kind, Register vecDst, Register vecTmp, AVXKind.AVXSize vectorSize) {
        switch (kind) {
            case Byte: {
                if (asm.supports(AMD64.CPUFeature.AVX2)) {
                    AMD64Assembler.VexRMOp.VPBROADCASTB.emit((AMD64Assembler)asm, vectorSize, vecDst, vecDst);
                    break;
                }
                if (asm.supports(AMD64.CPUFeature.AVX)) {
                    AMD64Assembler.VexRVMOp.VPXOR.emit((AMD64Assembler)asm, vectorSize, vecTmp, vecTmp, vecTmp);
                    AMD64Assembler.VexRVMOp.VPSHUFB.emit((AMD64Assembler)asm, vectorSize, vecDst, vecDst, vecTmp);
                    break;
                }
                if (asm.supports(AMD64.CPUFeature.SSSE3)) {
                    asm.pxor(vecTmp, vecTmp);
                    asm.pshufb(vecDst, vecTmp);
                    break;
                }
                asm.punpcklbw(vecDst, vecDst);
                asm.punpcklbw(vecDst, vecDst);
                asm.pshufd(vecDst, vecDst, 0);
                break;
            }
            case Short: 
            case Char: {
                if (asm.supports(AMD64.CPUFeature.AVX2)) {
                    AMD64Assembler.VexRMOp.VPBROADCASTW.emit((AMD64Assembler)asm, vectorSize, vecDst, vecDst);
                    break;
                }
                if (asm.supports(AMD64.CPUFeature.AVX)) {
                    AMD64Assembler.VexRMIOp.VPSHUFLW.emit((AMD64Assembler)asm, vectorSize, vecDst, vecDst, 0);
                    AMD64Assembler.VexRMIOp.VPSHUFD.emit((AMD64Assembler)asm, vectorSize, vecDst, vecDst, 0);
                    break;
                }
                asm.pshuflw(vecDst, vecDst, 0);
                asm.pshufd(vecDst, vecDst, 0);
                break;
            }
            case Int: {
                if (asm.supports(AMD64.CPUFeature.AVX2)) {
                    AMD64Assembler.VexRMOp.VPBROADCASTD.emit((AMD64Assembler)asm, vectorSize, vecDst, vecDst);
                    break;
                }
                if (asm.supports(AMD64.CPUFeature.AVX)) {
                    AMD64Assembler.VexRMIOp.VPSHUFD.emit((AMD64Assembler)asm, vectorSize, vecDst, vecDst, 0);
                    break;
                }
                asm.pshufd(vecDst, vecDst, 0);
                break;
            }
            default: {
                throw new UnsupportedOperationException();
            }
        }
    }

    private void emitVectorCompare(AMD64MacroAssembler asm, JavaKind kind, int nVectors, Register arrayPtr, Register index, Register[] vecCmp, Register[] vecArray, Register[] cmpResult, Label[] vectorFound, boolean shortJmp, boolean alignedLoad) {
        int j;
        int base;
        int i;
        for (i = 0; i < nVectors; ++i) {
            base = i * this.nValues;
            for (j = 0; j < this.nValues; ++j) {
                this.emitArrayLoad(asm, this.getVectorSize(), vecArray[base + j], arrayPtr, index, this.getVectorOffset(nVectors - (i + 1)), alignedLoad);
            }
        }
        if (!this.findTwoConsecutive) {
            for (i = 0; i < nVectors; ++i) {
                base = i * this.nValues;
                for (j = 0; j < this.nValues; ++j) {
                    AMD64ArrayIndexOfOp.emitVectorCompareInst(asm, kind, this.getVectorSize(), vecArray[base + j], vecCmp[j]);
                    if ((j & 1) != 1) continue;
                    AMD64ArrayIndexOfOp.emitPOR(asm, this.getVectorSize(), vecArray[base + j - 1], vecArray[base + j]);
                }
                if (this.nValues > 2) {
                    AMD64ArrayIndexOfOp.emitPOR(asm, this.getVectorSize(), vecArray[base], vecArray[base + 2]);
                }
                AMD64ArrayIndexOfOp.emitMOVMSK(asm, this.getVectorSize(), cmpResult[0], vecArray[base]);
                asm.testlAndJcc(cmpResult[0], cmpResult[0], AMD64Assembler.ConditionFlag.NotZero, vectorFound[nVectors - (i + 1)], shortJmp);
            }
        } else {
            for (i = 0; i < nVectors; i += 2) {
                AMD64ArrayIndexOfOp.emitVectorCompareInst(asm, kind, this.getVectorSize(), vecArray[i], vecCmp[0]);
                AMD64ArrayIndexOfOp.emitVectorCompareInst(asm, kind, this.getVectorSize(), vecArray[i + 1], vecCmp[0]);
                AMD64ArrayIndexOfOp.emitMOVMSK(asm, this.getVectorSize(), cmpResult[1], vecArray[i]);
                AMD64ArrayIndexOfOp.emitMOVMSK(asm, this.getVectorSize(), cmpResult[0], vecArray[i + 1]);
                asm.testlAndJcc(cmpResult[1], cmpResult[1], AMD64Assembler.ConditionFlag.NotZero, vectorFound[nVectors - (i + 1)], shortJmp);
                asm.testlAndJcc(cmpResult[0], cmpResult[0], AMD64Assembler.ConditionFlag.NotZero, vectorFound[nVectors - (i + 2)], shortJmp);
            }
        }
    }

    private void emitArrayLoad(AMD64MacroAssembler asm, AVXKind.AVXSize vectorSize, Register vecDst, Register arrayPtr, Register index, int offset, boolean alignedLoad) {
        AMD64Address src = new AMD64Address(arrayPtr, index, this.arrayIndexScale, offset);
        if (asm.supports(AMD64.CPUFeature.AVX)) {
            AMD64Assembler.VexMoveOp loadOp = alignedLoad ? AMD64Assembler.VexMoveOp.VMOVDQA32 : AMD64Assembler.VexMoveOp.VMOVDQU32;
            loadOp.emit((AMD64Assembler)asm, vectorSize, vecDst, src);
        } else {
            asm.movdqu(vecDst, src);
        }
    }

    private static void emitVectorCompareInst(AMD64MacroAssembler asm, JavaKind kind, AVXKind.AVXSize vectorSize, Register vecArray, Register vecCmp) {
        switch (kind) {
            case Byte: {
                if (asm.supports(AMD64.CPUFeature.AVX)) {
                    AMD64Assembler.VexRVMOp.VPCMPEQB.emit((AMD64Assembler)asm, vectorSize, vecArray, vecCmp, vecArray);
                    break;
                }
                asm.pcmpeqb(vecArray, vecCmp);
                break;
            }
            case Short: 
            case Char: {
                if (asm.supports(AMD64.CPUFeature.AVX)) {
                    AMD64Assembler.VexRVMOp.VPCMPEQW.emit((AMD64Assembler)asm, vectorSize, vecArray, vecCmp, vecArray);
                    break;
                }
                asm.pcmpeqw(vecArray, vecCmp);
                break;
            }
            case Int: {
                if (asm.supports(AMD64.CPUFeature.AVX)) {
                    AMD64Assembler.VexRVMOp.VPCMPEQD.emit((AMD64Assembler)asm, vectorSize, vecArray, vecCmp, vecArray);
                    break;
                }
                asm.pcmpeqd(vecArray, vecCmp);
                break;
            }
            default: {
                throw new UnsupportedOperationException();
            }
        }
    }

    private static void emitPOR(AMD64MacroAssembler asm, AVXKind.AVXSize vectorSize, Register dst, Register vecSrc) {
        if (asm.supports(AMD64.CPUFeature.AVX)) {
            AMD64Assembler.VexRVMOp.VPOR.emit((AMD64Assembler)asm, vectorSize, dst, dst, vecSrc);
        } else {
            asm.por(dst, vecSrc);
        }
    }

    private static void emitMOVMSK(AMD64MacroAssembler asm, AVXKind.AVXSize vectorSize, Register dst, Register vecSrc) {
        if (asm.supports(AMD64.CPUFeature.AVX)) {
            AMD64Assembler.VexRMOp.VPMOVMSKB.emit((AMD64Assembler)asm, vectorSize, dst, vecSrc);
        } else {
            asm.pmovmskb(dst, vecSrc);
        }
    }

    private static AMD64BaseAssembler.OperandSize getOpSize(JavaKind kind) {
        switch (kind) {
            case Byte: {
                return AMD64BaseAssembler.OperandSize.BYTE;
            }
            case Short: 
            case Char: {
                return AMD64BaseAssembler.OperandSize.WORD;
            }
            case Int: {
                return AMD64BaseAssembler.OperandSize.DWORD;
            }
        }
        return AMD64BaseAssembler.OperandSize.QWORD;
    }

    private static boolean supportsAVX2(LIRGeneratorTool tool) {
        return AMD64ArrayIndexOfOp.supports(tool, AMD64.CPUFeature.AVX2);
    }

    private static boolean supports(LIRGeneratorTool tool, AMD64.CPUFeature cpuFeature) {
        return ((AMD64)tool.target().arch).getFeatures().contains(cpuFeature);
    }

    @Override
    public boolean needsClearUpperVectorRegisters() {
        return true;
    }
}

