using ChocolArm64.Decoder;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection;
using System.Reflection.Emit;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

namespace ChocolArm64.Instruction
{
    static class AInstEmitSimdHelper
    {
        public static readonly Type[] IntTypesPerSizeLog2 = new Type[]
        {
            typeof(sbyte),
            typeof(short),
            typeof(int),
            typeof(long)
        };

        public static readonly Type[] UIntTypesPerSizeLog2 = new Type[]
        {
            typeof(byte),
            typeof(ushort),
            typeof(uint),
            typeof(ulong)
        };

        public static readonly Type[] VectorIntTypesPerSizeLog2 = new Type[]
        {
            typeof(Vector128<sbyte>),
            typeof(Vector128<short>),
            typeof(Vector128<int>),
            typeof(Vector128<long>)
        };

        public static readonly Type[] VectorUIntTypesPerSizeLog2 = new Type[]
        {
            typeof(Vector128<byte>),
            typeof(Vector128<ushort>),
            typeof(Vector128<uint>),
            typeof(Vector128<ulong>)
        };

        [Flags]
        public enum OperFlags
        {
            Rd = 1 << 0,
            Rn = 1 << 1,
            Rm = 1 << 2,
            Ra = 1 << 3,

            RnRm   = Rn | Rm,
            RdRn   = Rd | Rn,
            RaRnRm = Ra | Rn | Rm,
            RdRnRm = Rd | Rn | Rm
        }

        public static int GetImmShl(AOpCodeSimdShImm Op)
        {
            return Op.Imm - (8 << Op.Size);
        }

        public static int GetImmShr(AOpCodeSimdShImm Op)
        {
            return (8 << (Op.Size + 1)) - Op.Imm;
        }

        public static void EmitSse2Op(AILEmitterCtx Context, string Name)
        {
            EmitSseOp(Context, Name, typeof(Sse2));
        }

        public static void EmitSse41Op(AILEmitterCtx Context, string Name)
        {
            EmitSseOp(Context, Name, typeof(Sse41));
        }

        public static void EmitSse42Op(AILEmitterCtx Context, string Name)
        {
            EmitSseOp(Context, Name, typeof(Sse42));
        }

        private static void EmitSseOp(AILEmitterCtx Context, string Name, Type Type)
        {
            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;

            EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size);

            Type BaseType = VectorIntTypesPerSizeLog2[Op.Size];

            if (Op is AOpCodeSimdReg BinOp)
            {
                EmitLdvecWithSignedCast(Context, BinOp.Rm, Op.Size);

                Context.EmitCall(Type.GetMethod(Name, new Type[] { BaseType, BaseType }));
            }
            else
            {
                Context.EmitCall(Type.GetMethod(Name, new Type[] { BaseType }));
            }

            EmitStvecWithSignedCast(Context, Op.Rd, Op.Size);

            if (Op.RegisterSize == ARegisterSize.SIMD64)
            {
                EmitVectorZeroUpper(Context, Op.Rd);
            }
        }

        public static void EmitLdvecWithSignedCast(AILEmitterCtx Context, int Reg, int Size)
        {
            Context.EmitLdvec(Reg);

            switch (Size)
            {
                case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToSByte)); break;
                case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt16)); break;
                case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt32)); break;
                case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt64)); break;

                default: throw new ArgumentOutOfRangeException(nameof(Size));
            }
        }

        public static void EmitLdvecWithCastToDouble(AILEmitterCtx Context, int Reg)
        {
            Context.EmitLdvec(Reg);

            AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToDouble));
        }

        public static void EmitStvecWithCastFromDouble(AILEmitterCtx Context, int Reg)
        {
            AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorDoubleToSingle));

            Context.EmitStvec(Reg);
        }

        public static void EmitLdvecWithUnsignedCast(AILEmitterCtx Context, int Reg, int Size)
        {
            Context.EmitLdvec(Reg);

            switch (Size)
            {
                case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToByte));   break;
                case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToUInt16)); break;
                case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToUInt32)); break;
                case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToUInt64)); break;

                default: throw new ArgumentOutOfRangeException(nameof(Size));
            }
        }

        public static void EmitStvecWithSignedCast(AILEmitterCtx Context, int Reg, int Size)
        {
            switch (Size)
            {
                case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSByteToSingle)); break;
                case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt16ToSingle)); break;
                case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt32ToSingle)); break;
                case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt64ToSingle)); break;

                default: throw new ArgumentOutOfRangeException(nameof(Size));
            }

            Context.EmitStvec(Reg);
        }

        public static void EmitStvecWithUnsignedCast(AILEmitterCtx Context, int Reg, int Size)
        {
            switch (Size)
            {
                case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorByteToSingle));   break;
                case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorUInt16ToSingle)); break;
                case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorUInt32ToSingle)); break;
                case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorUInt64ToSingle)); break;

                default: throw new ArgumentOutOfRangeException(nameof(Size));
            }

            Context.EmitStvec(Reg);
        }

        public static void EmitScalarSseOrSse2OpF(AILEmitterCtx Context, string Name)
        {
            EmitSseOrSse2OpF(Context, Name, true);
        }

        public static void EmitVectorSseOrSse2OpF(AILEmitterCtx Context, string Name)
        {
            EmitSseOrSse2OpF(Context, Name, false);
        }

        public static void EmitSseOrSse2OpF(AILEmitterCtx Context, string Name, bool Scalar)
        {
            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;

            int SizeF = Op.Size & 1;

            void Ldvec(int Reg)
            {
                Context.EmitLdvec(Reg);

                if (SizeF == 1)
                {
                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToDouble));
                }
            }

            Ldvec(Op.Rn);

            Type Type;
            Type BaseType;

            if (SizeF == 0)
            {
                Type     = typeof(Sse);
                BaseType = typeof(Vector128<float>);
            }
            else /* if (SizeF == 1) */
            {
                Type     = typeof(Sse2);
                BaseType = typeof(Vector128<double>);
            }

            if (Op is AOpCodeSimdReg BinOp)
            {
                Ldvec(BinOp.Rm);

                Context.EmitCall(Type.GetMethod(Name, new Type[] { BaseType, BaseType }));
            }
            else
            {
                Context.EmitCall(Type.GetMethod(Name, new Type[] { BaseType }));
            }

            if (SizeF == 1)
            {
                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorDoubleToSingle));
            }

            Context.EmitStvec(Op.Rd);

            if (Scalar)
            {
                if (SizeF == 0)
                {
                    EmitVectorZero32_128(Context, Op.Rd);
                }
                else /* if (SizeF == 1) */
                {
                    EmitVectorZeroUpper(Context, Op.Rd);
                }
            }
            else if (Op.RegisterSize == ARegisterSize.SIMD64)
            {
                EmitVectorZeroUpper(Context, Op.Rd);
            }
        }

        public static void EmitUnaryMathCall(AILEmitterCtx Context, string Name)
        {
            IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp;

            int SizeF = Op.Size & 1;

            MethodInfo MthdInfo;

            if (SizeF == 0)
            {
                MthdInfo = typeof(MathF).GetMethod(Name, new Type[] { typeof(float) });
            }
            else /* if (SizeF == 1) */
            {
                MthdInfo = typeof(Math).GetMethod(Name, new Type[] { typeof(double) });
            }

            Context.EmitCall(MthdInfo);
        }

        public static void EmitBinaryMathCall(AILEmitterCtx Context, string Name)
        {
            IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp;

            int SizeF = Op.Size & 1;

            MethodInfo MthdInfo;

            if (SizeF == 0)
            {
                MthdInfo = typeof(MathF).GetMethod(Name, new Type[] { typeof(float), typeof(float) });
            }
            else /* if (SizeF == 1) */
            {
                MthdInfo = typeof(Math).GetMethod(Name, new Type[] { typeof(double), typeof(double) });
            }

            Context.EmitCall(MthdInfo);
        }

        public static void EmitRoundMathCall(AILEmitterCtx Context, MidpointRounding RoundMode)
        {
            IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp;

            int SizeF = Op.Size & 1;

            MethodInfo MthdInfo;

            if (SizeF == 0)
            {
                MthdInfo = typeof(MathF).GetMethod(nameof(MathF.Round), new Type[] { typeof(float), typeof(MidpointRounding) });
            }
            else /* if (SizeF == 1) */
            {
                MthdInfo = typeof(Math).GetMethod(nameof(Math.Round), new Type[] { typeof(double), typeof(MidpointRounding) });
            }

            Context.EmitLdc_I4((int)RoundMode);

            Context.EmitCall(MthdInfo);
        }

        public static void EmitUnarySoftFloatCall(AILEmitterCtx Context, string Name)
        {
            IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp;

            int SizeF = Op.Size & 1;

            MethodInfo MthdInfo;

            if (SizeF == 0)
            {
                MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(float) });
            }
            else /* if (SizeF == 1) */
            {
                MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(double) });
            }

            Context.EmitCall(MthdInfo);
        }

        public static void EmitSoftFloatCall(AILEmitterCtx Context, string Name)
        {
            IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp;

            Type Type = (Op.Size & 1) == 0
                ? typeof(ASoftFloat_32)
                : typeof(ASoftFloat_64);

            Context.EmitLdarg(ATranslatedSub.StateArgIdx);

            Context.EmitCall(Type, Name);
        }

        public static void EmitScalarBinaryOpByElemF(AILEmitterCtx Context, Action Emit)
        {
            AOpCodeSimdRegElemF Op = (AOpCodeSimdRegElemF)Context.CurrOp;

            EmitScalarOpByElemF(Context, Emit, Op.Index, Ternary: false);
        }

        public static void EmitScalarTernaryOpByElemF(AILEmitterCtx Context, Action Emit)
        {
            AOpCodeSimdRegElemF Op = (AOpCodeSimdRegElemF)Context.CurrOp;

            EmitScalarOpByElemF(Context, Emit, Op.Index, Ternary: true);
        }

        public static void EmitScalarOpByElemF(AILEmitterCtx Context, Action Emit, int Elem, bool Ternary)
        {
            AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;

            int SizeF = Op.Size & 1;

            if (Ternary)
            {
                EmitVectorExtractF(Context, Op.Rd, 0, SizeF);
            }

            EmitVectorExtractF(Context, Op.Rn, 0,    SizeF);
            EmitVectorExtractF(Context, Op.Rm, Elem, SizeF);

            Emit();

            EmitScalarSetF(Context, Op.Rd, SizeF);
        }

        public static void EmitScalarUnaryOpSx(AILEmitterCtx Context, Action Emit)
        {
            EmitScalarOp(Context, Emit, OperFlags.Rn, true);
        }

        public static void EmitScalarBinaryOpSx(AILEmitterCtx Context, Action Emit)
        {
            EmitScalarOp(Context, Emit, OperFlags.RnRm, true);
        }

        public static void EmitScalarUnaryOpZx(AILEmitterCtx Context, Action Emit)
        {
            EmitScalarOp(Context, Emit, OperFlags.Rn, false);
        }

        public static void EmitScalarBinaryOpZx(AILEmitterCtx Context, Action Emit)
        {
            EmitScalarOp(Context, Emit, OperFlags.RnRm, false);
        }

        public static void EmitScalarTernaryOpZx(AILEmitterCtx Context, Action Emit)
        {
            EmitScalarOp(Context, Emit, OperFlags.RdRnRm, false);
        }

        public static void EmitScalarOp(AILEmitterCtx Context, Action Emit, OperFlags Opers, bool Signed)
        {
            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;

            bool Rd = (Opers & OperFlags.Rd) != 0;
            bool Rn = (Opers & OperFlags.Rn) != 0;
            bool Rm = (Opers & OperFlags.Rm) != 0;

            if (Rd)
            {
                EmitVectorExtract(Context, Op.Rd, 0, Op.Size, Signed);
            }

            if (Rn)
            {
                EmitVectorExtract(Context, Op.Rn, 0, Op.Size, Signed);
            }

            if (Rm)
            {
                EmitVectorExtract(Context, ((AOpCodeSimdReg)Op).Rm, 0, Op.Size, Signed);
            }

            Emit();

            EmitScalarSet(Context, Op.Rd, Op.Size);
        }

        public static void EmitScalarUnaryOpF(AILEmitterCtx Context, Action Emit)
        {
            EmitScalarOpF(Context, Emit, OperFlags.Rn);
        }

        public static void EmitScalarBinaryOpF(AILEmitterCtx Context, Action Emit)
        {
            EmitScalarOpF(Context, Emit, OperFlags.RnRm);
        }

        public static void EmitScalarTernaryRaOpF(AILEmitterCtx Context, Action Emit)
        {
            EmitScalarOpF(Context, Emit, OperFlags.RaRnRm);
        }

        public static void EmitScalarOpF(AILEmitterCtx Context, Action Emit, OperFlags Opers)
        {
            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;

            int SizeF = Op.Size & 1;

            bool Ra = (Opers & OperFlags.Ra) != 0;
            bool Rn = (Opers & OperFlags.Rn) != 0;
            bool Rm = (Opers & OperFlags.Rm) != 0;

            if (Ra)
            {
                EmitVectorExtractF(Context, ((AOpCodeSimdReg)Op).Ra, 0, SizeF);
            }

            if (Rn)
            {
                EmitVectorExtractF(Context, Op.Rn, 0, SizeF);
            }

            if (Rm)
            {
                EmitVectorExtractF(Context, ((AOpCodeSimdReg)Op).Rm, 0, SizeF);
            }

            Emit();

            EmitScalarSetF(Context, Op.Rd, SizeF);
        }

        public static void EmitVectorUnaryOpF(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorOpF(Context, Emit, OperFlags.Rn);
        }

        public static void EmitVectorBinaryOpF(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorOpF(Context, Emit, OperFlags.RnRm);
        }

        public static void EmitVectorTernaryOpF(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorOpF(Context, Emit, OperFlags.RdRnRm);
        }

        public static void EmitVectorOpF(AILEmitterCtx Context, Action Emit, OperFlags Opers)
        {
            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;

            int SizeF = Op.Size & 1;

            int Bytes = Op.GetBitsCount() >> 3;
            int Elems = Bytes >> SizeF + 2;

            bool Rd = (Opers & OperFlags.Rd) != 0;
            bool Rn = (Opers & OperFlags.Rn) != 0;
            bool Rm = (Opers & OperFlags.Rm) != 0;

            for (int Index = 0; Index < Elems; Index++)
            {
                if (Rd)
                {
                    EmitVectorExtractF(Context, Op.Rd, Index, SizeF);
                }

                if (Rn)
                {
                    EmitVectorExtractF(Context, Op.Rn, Index, SizeF);
                }

                if (Rm)
                {
                    EmitVectorExtractF(Context, ((AOpCodeSimdReg)Op).Rm, Index, SizeF);
                }

                Emit();

                EmitVectorInsertF(Context, Op.Rd, Index, SizeF);
            }

            if (Op.RegisterSize == ARegisterSize.SIMD64)
            {
                EmitVectorZeroUpper(Context, Op.Rd);
            }
        }

        public static void EmitVectorBinaryOpByElemF(AILEmitterCtx Context, Action Emit)
        {
            AOpCodeSimdRegElemF Op = (AOpCodeSimdRegElemF)Context.CurrOp;

            EmitVectorOpByElemF(Context, Emit, Op.Index, Ternary: false);
        }

        public static void EmitVectorTernaryOpByElemF(AILEmitterCtx Context, Action Emit)
        {
            AOpCodeSimdRegElemF Op = (AOpCodeSimdRegElemF)Context.CurrOp;

            EmitVectorOpByElemF(Context, Emit, Op.Index, Ternary: true);
        }

        public static void EmitVectorOpByElemF(AILEmitterCtx Context, Action Emit, int Elem, bool Ternary)
        {
            AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;

            int SizeF = Op.Size & 1;

            int Bytes = Op.GetBitsCount() >> 3;
            int Elems = Bytes >> SizeF + 2;

            for (int Index = 0; Index < Elems; Index++)
            {
                if (Ternary)
                {
                    EmitVectorExtractF(Context, Op.Rd, Index, SizeF);
                }

                EmitVectorExtractF(Context, Op.Rn, Index, SizeF);
                EmitVectorExtractF(Context, Op.Rm, Elem,  SizeF);

                Emit();

                EmitVectorInsertTmpF(Context, Index, SizeF);
            }

            Context.EmitLdvectmp();
            Context.EmitStvec(Op.Rd);

            if (Op.RegisterSize == ARegisterSize.SIMD64)
            {
                EmitVectorZeroUpper(Context, Op.Rd);
            }
        }

        public static void EmitVectorUnaryOpSx(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorOp(Context, Emit, OperFlags.Rn, true);
        }

        public static void EmitVectorBinaryOpSx(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorOp(Context, Emit, OperFlags.RnRm, true);
        }

        public static void EmitVectorTernaryOpSx(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorOp(Context, Emit, OperFlags.RdRnRm, true);
        }

        public static void EmitVectorUnaryOpZx(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorOp(Context, Emit, OperFlags.Rn, false);
        }

        public static void EmitVectorBinaryOpZx(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorOp(Context, Emit, OperFlags.RnRm, false);
        }

        public static void EmitVectorTernaryOpZx(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorOp(Context, Emit, OperFlags.RdRnRm, false);
        }

        public static void EmitVectorOp(AILEmitterCtx Context, Action Emit, OperFlags Opers, bool Signed)
        {
            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;

            int Bytes = Op.GetBitsCount() >> 3;
            int Elems = Bytes >> Op.Size;

            bool Rd = (Opers & OperFlags.Rd) != 0;
            bool Rn = (Opers & OperFlags.Rn) != 0;
            bool Rm = (Opers & OperFlags.Rm) != 0;

            for (int Index = 0; Index < Elems; Index++)
            {
                if (Rd)
                {
                    EmitVectorExtract(Context, Op.Rd, Index, Op.Size, Signed);
                }

                if (Rn)
                {
                    EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed);
                }

                if (Rm)
                {
                    EmitVectorExtract(Context, ((AOpCodeSimdReg)Op).Rm, Index, Op.Size, Signed);
                }

                Emit();

                EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
            }

            if (Op.RegisterSize == ARegisterSize.SIMD64)
            {
                EmitVectorZeroUpper(Context, Op.Rd);
            }
        }

        public static void EmitVectorBinaryOpByElemSx(AILEmitterCtx Context, Action Emit)
        {
            AOpCodeSimdRegElem Op = (AOpCodeSimdRegElem)Context.CurrOp;

            EmitVectorOpByElem(Context, Emit, Op.Index, false, true);
        }

        public static void EmitVectorBinaryOpByElemZx(AILEmitterCtx Context, Action Emit)
        {
            AOpCodeSimdRegElem Op = (AOpCodeSimdRegElem)Context.CurrOp;

            EmitVectorOpByElem(Context, Emit, Op.Index, false, false);
        }

        public static void EmitVectorTernaryOpByElemZx(AILEmitterCtx Context, Action Emit)
        {
            AOpCodeSimdRegElem Op = (AOpCodeSimdRegElem)Context.CurrOp;

            EmitVectorOpByElem(Context, Emit, Op.Index, true, false);
        }

        public static void EmitVectorOpByElem(AILEmitterCtx Context, Action Emit, int Elem, bool Ternary, bool Signed)
        {
            AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;

            int Bytes = Op.GetBitsCount() >> 3;
            int Elems = Bytes >> Op.Size;

            EmitVectorExtract(Context, Op.Rm, Elem, Op.Size, Signed);
            Context.EmitSttmp();

            for (int Index = 0; Index < Elems; Index++)
            {
                if (Ternary)
                {
                    EmitVectorExtract(Context, Op.Rd, Index, Op.Size, Signed);
                }

                EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed);
                Context.EmitLdtmp();

                Emit();

                EmitVectorInsertTmp(Context, Index, Op.Size);
            }

            Context.EmitLdvectmp();
            Context.EmitStvec(Op.Rd);

            if (Op.RegisterSize == ARegisterSize.SIMD64)
            {
                EmitVectorZeroUpper(Context, Op.Rd);
            }
        }

        public static void EmitVectorImmUnaryOp(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorImmOp(Context, Emit, false);
        }

        public static void EmitVectorImmBinaryOp(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorImmOp(Context, Emit, true);
        }

        public static void EmitVectorImmOp(AILEmitterCtx Context, Action Emit, bool Binary)
        {
            AOpCodeSimdImm Op = (AOpCodeSimdImm)Context.CurrOp;

            int Bytes = Op.GetBitsCount() >> 3;
            int Elems = Bytes >> Op.Size;

            for (int Index = 0; Index < Elems; Index++)
            {
                if (Binary)
                {
                    EmitVectorExtractZx(Context, Op.Rd, Index, Op.Size);
                }

                Context.EmitLdc_I8(Op.Imm);

                Emit();

                EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
            }

            if (Op.RegisterSize == ARegisterSize.SIMD64)
            {
                EmitVectorZeroUpper(Context, Op.Rd);
            }
        }

        public static void EmitVectorWidenRmBinaryOpSx(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorWidenRmBinaryOp(Context, Emit, true);
        }

        public static void EmitVectorWidenRmBinaryOpZx(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorWidenRmBinaryOp(Context, Emit, false);
        }

        public static void EmitVectorWidenRmBinaryOp(AILEmitterCtx Context, Action Emit, bool Signed)
        {
            AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;

            int Elems = 8 >> Op.Size;

            int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;

            for (int Index = 0; Index < Elems; Index++)
            {
                EmitVectorExtract(Context, Op.Rn,        Index, Op.Size + 1, Signed);
                EmitVectorExtract(Context, Op.Rm, Part + Index, Op.Size,     Signed);

                Emit();

                EmitVectorInsertTmp(Context, Index, Op.Size + 1);
            }

            Context.EmitLdvectmp();
            Context.EmitStvec(Op.Rd);
        }

        public static void EmitVectorWidenRnRmBinaryOpSx(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorWidenRnRmOp(Context, Emit, false, true);
        }

        public static void EmitVectorWidenRnRmBinaryOpZx(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorWidenRnRmOp(Context, Emit, false, false);
        }

        public static void EmitVectorWidenRnRmTernaryOpSx(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorWidenRnRmOp(Context, Emit, true, true);
        }

        public static void EmitVectorWidenRnRmTernaryOpZx(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorWidenRnRmOp(Context, Emit, true, false);
        }

        public static void EmitVectorWidenRnRmOp(AILEmitterCtx Context, Action Emit, bool Ternary, bool Signed)
        {
            AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;

            int Elems = 8 >> Op.Size;

            int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;

            for (int Index = 0; Index < Elems; Index++)
            {
                if (Ternary)
                {
                    EmitVectorExtract(Context, Op.Rd, Index, Op.Size + 1, Signed);
                }

                EmitVectorExtract(Context, Op.Rn, Part + Index, Op.Size, Signed);
                EmitVectorExtract(Context, Op.Rm, Part + Index, Op.Size, Signed);

                Emit();

                EmitVectorInsertTmp(Context, Index, Op.Size + 1);
            }

            Context.EmitLdvectmp();
            Context.EmitStvec(Op.Rd);
        }

        public static void EmitVectorPairwiseOpSx(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorPairwiseOp(Context, Emit, true);
        }

        public static void EmitVectorPairwiseOpZx(AILEmitterCtx Context, Action Emit)
        {
            EmitVectorPairwiseOp(Context, Emit, false);
        }

        public static void EmitVectorPairwiseOp(AILEmitterCtx Context, Action Emit, bool Signed)
        {
            AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;

            int Words = Op.GetBitsCount() >> 4;
            int Pairs = Words >> Op.Size;

            for (int Index = 0; Index < Pairs; Index++)
            {
                int Idx = Index << 1;

                EmitVectorExtract(Context, Op.Rn, Idx,     Op.Size, Signed);
                EmitVectorExtract(Context, Op.Rn, Idx + 1, Op.Size, Signed);

                Emit();

                EmitVectorExtract(Context, Op.Rm, Idx,     Op.Size, Signed);
                EmitVectorExtract(Context, Op.Rm, Idx + 1, Op.Size, Signed);

                Emit();

                EmitVectorInsertTmp(Context, Pairs + Index, Op.Size);
                EmitVectorInsertTmp(Context,         Index, Op.Size);
            }

            Context.EmitLdvectmp();
            Context.EmitStvec(Op.Rd);

            if (Op.RegisterSize == ARegisterSize.SIMD64)
            {
                EmitVectorZeroUpper(Context, Op.Rd);
            }
        }

        public static void EmitVectorPairwiseOpF(AILEmitterCtx Context, Action Emit)
        {
            AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;

            int SizeF = Op.Size & 1;

            int Words = Op.GetBitsCount() >> 4;
            int Pairs = Words >> SizeF + 2;

            for (int Index = 0; Index < Pairs; Index++)
            {
                int Idx = Index << 1;

                EmitVectorExtractF(Context, Op.Rn, Idx,     SizeF);
                EmitVectorExtractF(Context, Op.Rn, Idx + 1, SizeF);

                Emit();

                EmitVectorExtractF(Context, Op.Rm, Idx,     SizeF);
                EmitVectorExtractF(Context, Op.Rm, Idx + 1, SizeF);

                Emit();

                EmitVectorInsertTmpF(Context, Pairs + Index, SizeF);
                EmitVectorInsertTmpF(Context,         Index, SizeF);
            }

            Context.EmitLdvectmp();
            Context.EmitStvec(Op.Rd);

            if (Op.RegisterSize == ARegisterSize.SIMD64)
            {
                EmitVectorZeroUpper(Context, Op.Rd);
            }
        }

        [Flags]
        public enum SaturatingFlags
        {
            Scalar = 1 << 0,
            Signed = 1 << 1,

            Add = 1 << 2,
            Sub = 1 << 3,

            Accumulate = 1 << 4,

            ScalarSx = Scalar | Signed,
            ScalarZx = Scalar,

            VectorSx = Signed,
            VectorZx = 0
        }

        public static void EmitScalarSaturatingUnaryOpSx(AILEmitterCtx Context, Action Emit)
        {
            EmitSaturatingUnaryOpSx(Context, Emit, SaturatingFlags.ScalarSx);
        }

        public static void EmitVectorSaturatingUnaryOpSx(AILEmitterCtx Context, Action Emit)
        {
            EmitSaturatingUnaryOpSx(Context, Emit, SaturatingFlags.VectorSx);
        }

        public static void EmitSaturatingUnaryOpSx(AILEmitterCtx Context, Action Emit, SaturatingFlags Flags)
        {
            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;

            bool Scalar = (Flags & SaturatingFlags.Scalar) != 0;

            int Bytes = Op.GetBitsCount() >> 3;
            int Elems = !Scalar ? Bytes >> Op.Size : 1;

            if (Scalar)
            {
                EmitVectorZeroLowerTmp(Context);
            }

            for (int Index = 0; Index < Elems; Index++)
            {
                EmitVectorExtractSx(Context, Op.Rn, Index, Op.Size);

                Emit();

                if (Op.Size <= 2)
                {
                    EmitSatQ(Context, Op.Size, true, true);
                }
                else /* if (Op.Size == 3) */
                {
                    EmitUnarySignedSatQAbsOrNeg(Context);
                }

                EmitVectorInsertTmp(Context, Index, Op.Size);
            }

            Context.EmitLdvectmp();
            Context.EmitStvec(Op.Rd);

            if ((Op.RegisterSize == ARegisterSize.SIMD64) || Scalar)
            {
                EmitVectorZeroUpper(Context, Op.Rd);
            }
        }

        public static void EmitScalarSaturatingBinaryOpSx(AILEmitterCtx Context, SaturatingFlags Flags)
        {
            EmitSaturatingBinaryOp(Context, () => { }, SaturatingFlags.ScalarSx | Flags);
        }

        public static void EmitScalarSaturatingBinaryOpZx(AILEmitterCtx Context, SaturatingFlags Flags)
        {
            EmitSaturatingBinaryOp(Context, () => { }, SaturatingFlags.ScalarZx | Flags);
        }

        public static void EmitVectorSaturatingBinaryOpSx(AILEmitterCtx Context, SaturatingFlags Flags)
        {
            EmitSaturatingBinaryOp(Context, () => { }, SaturatingFlags.VectorSx | Flags);
        }

        public static void EmitVectorSaturatingBinaryOpZx(AILEmitterCtx Context, SaturatingFlags Flags)
        {
            EmitSaturatingBinaryOp(Context, () => { }, SaturatingFlags.VectorZx | Flags);
        }

        public static void EmitSaturatingBinaryOp(AILEmitterCtx Context, Action Emit, SaturatingFlags Flags)
        {
            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;

            bool Scalar = (Flags & SaturatingFlags.Scalar) != 0;
            bool Signed = (Flags & SaturatingFlags.Signed) != 0;

            bool Add = (Flags & SaturatingFlags.Add) != 0;
            bool Sub = (Flags & SaturatingFlags.Sub) != 0;

            bool Accumulate = (Flags & SaturatingFlags.Accumulate) != 0;

            int Bytes = Op.GetBitsCount() >> 3;
            int Elems = !Scalar ? Bytes >> Op.Size : 1;

            if (Scalar)
            {
                EmitVectorZeroLowerTmp(Context);
            }

            if (Add || Sub)
            {
                for (int Index = 0; Index < Elems; Index++)
                {
                    EmitVectorExtract(Context,                   Op.Rn, Index, Op.Size, Signed);
                    EmitVectorExtract(Context, ((AOpCodeSimdReg)Op).Rm, Index, Op.Size, Signed);

                    if (Op.Size <= 2)
                    {
                        Context.Emit(Add ? OpCodes.Add : OpCodes.Sub);

                        EmitSatQ(Context, Op.Size, true, Signed);
                    }
                    else /* if (Op.Size == 3) */
                    {
                        if (Add)
                        {
                            EmitBinarySatQAdd(Context, Signed);
                        }
                        else /* if (Sub) */
                        {
                            EmitBinarySatQSub(Context, Signed);
                        }
                    }

                    EmitVectorInsertTmp(Context, Index, Op.Size);
                }
            }
            else if (Accumulate)
            {
                for (int Index = 0; Index < Elems; Index++)
                {
                    EmitVectorExtract(Context, Op.Rn, Index, Op.Size, !Signed);
                    EmitVectorExtract(Context, Op.Rd, Index, Op.Size,  Signed);

                    if (Op.Size <= 2)
                    {
                        Context.Emit(OpCodes.Add);

                        EmitSatQ(Context, Op.Size, true, Signed);
                    }
                    else /* if (Op.Size == 3) */
                    {
                        EmitBinarySatQAccumulate(Context, Signed);
                    }

                    EmitVectorInsertTmp(Context, Index, Op.Size);
                }
            }
            else
            {
                for (int Index = 0; Index < Elems; Index++)
                {
                    EmitVectorExtract(Context,                   Op.Rn, Index, Op.Size, Signed);
                    EmitVectorExtract(Context, ((AOpCodeSimdReg)Op).Rm, Index, Op.Size, Signed);

                    Emit();

                    EmitSatQ(Context, Op.Size, true, Signed);

                    EmitVectorInsertTmp(Context, Index, Op.Size);
                }
            }

            Context.EmitLdvectmp();
            Context.EmitStvec(Op.Rd);

            if ((Op.RegisterSize == ARegisterSize.SIMD64) || Scalar)
            {
                EmitVectorZeroUpper(Context, Op.Rd);
            }
        }

        [Flags]
        public enum SaturatingNarrowFlags
        {
            Scalar    = 1 << 0,
            SignedSrc = 1 << 1,
            SignedDst = 1 << 2,

            ScalarSxSx = Scalar | SignedSrc | SignedDst,
            ScalarSxZx = Scalar | SignedSrc,
            ScalarZxZx = Scalar,

            VectorSxSx = SignedSrc | SignedDst,
            VectorSxZx = SignedSrc,
            VectorZxZx = 0
        }

        public static void EmitSaturatingNarrowOp(AILEmitterCtx Context, SaturatingNarrowFlags Flags)
        {
            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;

            bool Scalar    = (Flags & SaturatingNarrowFlags.Scalar)    != 0;
            bool SignedSrc = (Flags & SaturatingNarrowFlags.SignedSrc) != 0;
            bool SignedDst = (Flags & SaturatingNarrowFlags.SignedDst) != 0;

            int Elems = !Scalar ? 8 >> Op.Size : 1;

            int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0;

            if (Scalar)
            {
                EmitVectorZeroLowerTmp(Context);
            }

            if (Part != 0)
            {
                Context.EmitLdvec(Op.Rd);
                Context.EmitStvectmp();
            }

            for (int Index = 0; Index < Elems; Index++)
            {
                EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc);

                EmitSatQ(Context, Op.Size, SignedSrc, SignedDst);

                EmitVectorInsertTmp(Context, Part + Index, Op.Size);
            }

            Context.EmitLdvectmp();
            Context.EmitStvec(Op.Rd);

            if (Part == 0)
            {
                EmitVectorZeroUpper(Context, Op.Rd);
            }
        }

        // TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
        public static void EmitSatQ(
            AILEmitterCtx Context,
            int  SizeDst,
            bool SignedSrc,
            bool SignedDst)
        {
            if (SizeDst > 2)
            {
                throw new ArgumentOutOfRangeException(nameof(SizeDst));
            }

            Context.EmitLdc_I4(SizeDst);
            Context.EmitLdarg(ATranslatedSub.StateArgIdx);

            if (SignedSrc)
            {
                ASoftFallback.EmitCall(Context, SignedDst
                    ? nameof(ASoftFallback.SignedSrcSignedDstSatQ)
                    : nameof(ASoftFallback.SignedSrcUnsignedDstSatQ));
            }
            else
            {
                ASoftFallback.EmitCall(Context, SignedDst
                    ? nameof(ASoftFallback.UnsignedSrcSignedDstSatQ)
                    : nameof(ASoftFallback.UnsignedSrcUnsignedDstSatQ));
            }
        }

        // TSrc (64bit) == TDst (64bit); signed.
        public static void EmitUnarySignedSatQAbsOrNeg(AILEmitterCtx Context)
        {
            if (((AOpCodeSimd)Context.CurrOp).Size < 3)
            {
                throw new InvalidOperationException();
            }

            Context.EmitLdarg(ATranslatedSub.StateArgIdx);

            ASoftFallback.EmitCall(Context, nameof(ASoftFallback.UnarySignedSatQAbsOrNeg));
        }

        // TSrcs (64bit) == TDst (64bit); signed, unsigned.
        public static void EmitBinarySatQAdd(AILEmitterCtx Context, bool Signed)
        {
            if (((AOpCodeSimdReg)Context.CurrOp).Size < 3)
            {
                throw new InvalidOperationException();
            }

            Context.EmitLdarg(ATranslatedSub.StateArgIdx);

            ASoftFallback.EmitCall(Context, Signed
                ? nameof(ASoftFallback.BinarySignedSatQAdd)
                : nameof(ASoftFallback.BinaryUnsignedSatQAdd));
        }

        // TSrcs (64bit) == TDst (64bit); signed, unsigned.
        public static void EmitBinarySatQSub(AILEmitterCtx Context, bool Signed)
        {
            if (((AOpCodeSimdReg)Context.CurrOp).Size < 3)
            {
                throw new InvalidOperationException();
            }

            Context.EmitLdarg(ATranslatedSub.StateArgIdx);

            ASoftFallback.EmitCall(Context, Signed
                ? nameof(ASoftFallback.BinarySignedSatQSub)
                : nameof(ASoftFallback.BinaryUnsignedSatQSub));
        }

        // TSrcs (64bit) == TDst (64bit); signed, unsigned.
        public static void EmitBinarySatQAccumulate(AILEmitterCtx Context, bool Signed)
        {
            if (((AOpCodeSimd)Context.CurrOp).Size < 3)
            {
                throw new InvalidOperationException();
            }

            Context.EmitLdarg(ATranslatedSub.StateArgIdx);

            ASoftFallback.EmitCall(Context, Signed
                ? nameof(ASoftFallback.BinarySignedSatQAcc)
                : nameof(ASoftFallback.BinaryUnsignedSatQAcc));
        }

        public static void EmitScalarSet(AILEmitterCtx Context, int Reg, int Size)
        {
            EmitVectorZeroAll(Context, Reg);
            EmitVectorInsert(Context, Reg, 0, Size);
        }

        public static void EmitScalarSetF(AILEmitterCtx Context, int Reg, int Size)
        {
            if (AOptimizations.UseSse41 && Size == 0)
            {
                //If the type is float, we can perform insertion and
                //zero the upper bits with a single instruction (INSERTPS);
                Context.EmitLdvec(Reg);

                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Sse41VectorInsertScalarSingle));

                Context.EmitStvec(Reg);
            }
            else
            {
                EmitVectorZeroAll(Context, Reg);
                EmitVectorInsertF(Context, Reg, 0, Size);
            }
        }

        public static void EmitVectorExtractSx(AILEmitterCtx Context, int Reg, int Index, int Size)
        {
            EmitVectorExtract(Context, Reg, Index, Size, true);
        }

        public static void EmitVectorExtractZx(AILEmitterCtx Context, int Reg, int Index, int Size)
        {
            EmitVectorExtract(Context, Reg, Index, Size, false);
        }

        public static void EmitVectorExtract(AILEmitterCtx Context, int Reg, int Index, int Size, bool Signed)
        {
            ThrowIfInvalid(Index, Size);

            IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp;

            Context.EmitLdvec(Reg);
            Context.EmitLdc_I4(Index);
            Context.EmitLdc_I4(Size);

            AVectorHelper.EmitCall(Context, Signed
                ? nameof(AVectorHelper.VectorExtractIntSx)
                : nameof(AVectorHelper.VectorExtractIntZx));
        }

        public static void EmitVectorExtractF(AILEmitterCtx Context, int Reg, int Index, int Size)
        {
            ThrowIfInvalidF(Index, Size);

            Context.EmitLdvec(Reg);
            Context.EmitLdc_I4(Index);

            if (Size == 0)
            {
                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorExtractSingle));
            }
            else if (Size == 1)
            {
                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorExtractDouble));
            }
            else
            {
                throw new ArgumentOutOfRangeException(nameof(Size));
            }
        }

        public static void EmitVectorZeroAll(AILEmitterCtx Context, int Rd)
        {
            if (AOptimizations.UseSse2)
            {
                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleZero));

                Context.EmitStvec(Rd);
            }
            else
            {
                EmitVectorZeroLower(Context, Rd);
                EmitVectorZeroUpper(Context, Rd);
            }
        }

        public static void EmitVectorZeroLower(AILEmitterCtx Context, int Rd)
        {
            EmitVectorInsert(Context, Rd, 0, 3, 0);
        }

        public static void EmitVectorZeroLowerTmp(AILEmitterCtx Context)
        {
            EmitVectorInsertTmp(Context, 0, 3, 0);
        }

        public static void EmitVectorZeroUpper(AILEmitterCtx Context, int Reg)
        {
            if (AOptimizations.UseSse2)
            {
                //TODO: Use MoveScalar once it is fixed, as of the
                //time of writing it just crashes the JIT.
                EmitLdvecWithUnsignedCast(Context, Reg, 3);

                Type[] Types = new Type[] { typeof(Vector128<ulong>), typeof(byte) };

                //Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MoveScalar), Types));

                Context.EmitLdc_I4(8);

                Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical128BitLane), Types));

                Context.EmitLdc_I4(8);

                Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), Types));

                EmitStvecWithUnsignedCast(Context, Reg, 3);
            }
            else
            {
                EmitVectorInsert(Context, Reg, 1, 3, 0);
            }
        }

        public static void EmitVectorZero32_128(AILEmitterCtx Context, int Reg)
        {
            Context.EmitLdvec(Reg);

            AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorZero32_128));

            Context.EmitStvec(Reg);
        }

        public static void EmitVectorInsert(AILEmitterCtx Context, int Reg, int Index, int Size)
        {
            ThrowIfInvalid(Index, Size);

            Context.EmitLdvec(Reg);
            Context.EmitLdc_I4(Index);
            Context.EmitLdc_I4(Size);

            AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));

            Context.EmitStvec(Reg);
        }

        public static void EmitVectorInsertTmp(AILEmitterCtx Context, int Index, int Size)
        {
            ThrowIfInvalid(Index, Size);

            Context.EmitLdvectmp();
            Context.EmitLdc_I4(Index);
            Context.EmitLdc_I4(Size);

            AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));

            Context.EmitStvectmp();
        }

        public static void EmitVectorInsert(AILEmitterCtx Context, int Reg, int Index, int Size, long Value)
        {
            ThrowIfInvalid(Index, Size);

            Context.EmitLdc_I8(Value);
            Context.EmitLdvec(Reg);
            Context.EmitLdc_I4(Index);
            Context.EmitLdc_I4(Size);

            AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));

            Context.EmitStvec(Reg);
        }

        public static void EmitVectorInsertTmp(AILEmitterCtx Context, int Index, int Size, long Value)
        {
            ThrowIfInvalid(Index, Size);

            Context.EmitLdc_I8(Value);
            Context.EmitLdvectmp();
            Context.EmitLdc_I4(Index);
            Context.EmitLdc_I4(Size);

            AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));

            Context.EmitStvectmp();
        }

        public static void EmitVectorInsertF(AILEmitterCtx Context, int Reg, int Index, int Size)
        {
            ThrowIfInvalidF(Index, Size);

            Context.EmitLdvec(Reg);
            Context.EmitLdc_I4(Index);

            if (Size == 0)
            {
                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertSingle));
            }
            else if (Size == 1)
            {
                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertDouble));
            }
            else
            {
                throw new ArgumentOutOfRangeException(nameof(Size));
            }

            Context.EmitStvec(Reg);
        }

        public static void EmitVectorInsertTmpF(AILEmitterCtx Context, int Index, int Size)
        {
            ThrowIfInvalidF(Index, Size);

            Context.EmitLdvectmp();
            Context.EmitLdc_I4(Index);

            if (Size == 0)
            {
                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertSingle));
            }
            else if (Size == 1)
            {
                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertDouble));
            }
            else
            {
                throw new ArgumentOutOfRangeException(nameof(Size));
            }

            Context.EmitStvectmp();
        }

        private static void ThrowIfInvalid(int Index, int Size)
        {
            if ((uint)Size > 3)
            {
                throw new ArgumentOutOfRangeException(nameof(Size));
            }

            if ((uint)Index >= 16 >> Size)
            {
                throw new ArgumentOutOfRangeException(nameof(Index));
            }
        }

        private static void ThrowIfInvalidF(int Index, int Size)
        {
            if ((uint)Size > 1)
            {
                throw new ArgumentOutOfRangeException(nameof(Size));
            }

            if ((uint)Index >= 4 >> Size)
            {
                throw new ArgumentOutOfRangeException(nameof(Index));
            }
        }
    }
}