From a804db6eed016a8a1f152c2837fc7b65e50f02df Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Mon, 13 Jul 2020 13:08:47 +0200 Subject: [PATCH] =?UTF-8?q?Add=20Fmax/minv=5FV=20&=20S/Ushl=5FS=20Inst.s?= =?UTF-8?q?=20with=20Tests.=20Fix=20Maxps/d=20&=20Minps/d=20d=E2=80=A6=20(?= =?UTF-8?q?#1335)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add Fmax/minv_V & S/Ushl_S Inst.s with Tests. Fix Maxps/d & Minps/d double zero sign handling. Allows better handling of NaNs. * Optimized EmitSse2VectorIsNaNOpF() for multiple uses per opF. --- ARMeilleure/Decoders/OpCodeTable.cs | 6 + .../Instructions/InstEmitSimdArithmetic.cs | 368 +++++++++++++++--- .../Instructions/InstEmitSimdArithmetic32.cs | 4 +- .../Instructions/InstEmitSimdHelper.cs | 45 ++- ARMeilleure/Instructions/InstEmitSimdShift.cs | 98 +++-- ARMeilleure/Instructions/InstName.cs | 6 + ARMeilleure/Translation/PTC/Ptc.cs | 4 +- Ryujinx.Tests/Cpu/CpuTest.cs | 147 ++++--- Ryujinx.Tests/Cpu/CpuTestMisc.cs | 123 ++++++ Ryujinx.Tests/Cpu/CpuTestSimd.cs | 4 +- Ryujinx.Tests/Cpu/CpuTestSimdReg.cs | 57 ++- 11 files changed, 698 insertions(+), 164 deletions(-) diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index c1632d461..b98fcab12 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -332,14 +332,18 @@ namespace ARMeilleure.Decoders SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", InstName.Fmax_V, InstEmit.Fmax_V, typeof(OpCodeSimdReg)); SetA64("000111100x1xxxxx011010xxxxxxxxxx", InstName.Fmaxnm_S, InstEmit.Fmaxnm_S, typeof(OpCodeSimdReg)); SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnm_V, InstEmit.Fmaxnm_V, typeof(OpCodeSimdReg)); + SetA64("0>1011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnmp_V, InstEmit.Fmaxnmp_V, typeof(OpCodeSimdReg)); SetA64("0110111000110000110010xxxxxxxxxx", InstName.Fmaxnmv_V, InstEmit.Fmaxnmv_V, typeof(OpCodeSimd)); SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, typeof(OpCodeSimdReg)); + SetA64("0110111000110000111110xxxxxxxxxx", InstName.Fmaxv_V, InstEmit.Fmaxv_V, typeof(OpCodeSimd)); SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, InstEmit.Fmin_S, typeof(OpCodeSimdReg)); SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", InstName.Fmin_V, InstEmit.Fmin_V, typeof(OpCodeSimdReg)); SetA64("000111100x1xxxxx011110xxxxxxxxxx", InstName.Fminnm_S, InstEmit.Fminnm_S, typeof(OpCodeSimdReg)); SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnm_V, InstEmit.Fminnm_V, typeof(OpCodeSimdReg)); + SetA64("0>1011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnmp_V, InstEmit.Fminnmp_V, typeof(OpCodeSimdReg)); SetA64("0110111010110000110010xxxxxxxxxx", InstName.Fminnmv_V, InstEmit.Fminnmv_V, typeof(OpCodeSimd)); SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, typeof(OpCodeSimdReg)); + SetA64("0110111010110000111110xxxxxxxxxx", InstName.Fminv_V, InstEmit.Fminv_V, typeof(OpCodeSimd)); SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, InstEmit.Fmla_Se, typeof(OpCodeSimdRegElemF)); SetA64("0>0011100<1xxxxx110011xxxxxxxxxx", InstName.Fmla_V, InstEmit.Fmla_V, typeof(OpCodeSimdReg)); SetA64("0>0011111>>xxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, typeof(OpCodeSimdShImm)); SetA64("0100111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, typeof(OpCodeSimdShImm)); + SetA64("01011110111xxxxx010001xxxxxxxxxx", InstName.Sshl_S, InstEmit.Sshl_S, typeof(OpCodeSimdReg)); SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", InstName.Sshl_V, InstEmit.Sshl_V, typeof(OpCodeSimdReg)); SetA64("0x00111100>>>xxx101001xxxxxxxxxx", InstName.Sshll_V, InstEmit.Sshll_V, typeof(OpCodeSimdShImm)); SetA64("0101111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_S, InstEmit.Sshr_S, typeof(OpCodeSimdShImm)); @@ -611,6 +616,7 @@ namespace ARMeilleure.Decoders SetA64("0111111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_S, InstEmit.Ursra_S, typeof(OpCodeSimdShImm)); SetA64("0x10111100>>>xxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, typeof(OpCodeSimdShImm)); SetA64("0110111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, typeof(OpCodeSimdShImm)); + SetA64("01111110111xxxxx010001xxxxxxxxxx", InstName.Ushl_S, InstEmit.Ushl_S, typeof(OpCodeSimdReg)); SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", InstName.Ushl_V, InstEmit.Ushl_V, typeof(OpCodeSimdReg)); SetA64("0x10111100>>>xxx101001xxxxxxxxxx", InstName.Ushll_V, InstEmit.Ushll_V, typeof(OpCodeSimdShImm)); SetA64("0111111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_S, InstEmit.Ushr_S, typeof(OpCodeSimdShImm)); diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs index b3041aac5..0d417f70e 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs @@ -382,7 +382,14 @@ namespace ARMeilleure.Instructions { if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitSse2VectorPairwiseOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd); + EmitSse2VectorPairwiseOpF(context, (op1, op2) => + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Intrinsic addInst = (op.Size & 1) == 0 ? Intrinsic.X86Addps : Intrinsic.X86Addpd; + + return context.AddIntrinsic(addInst, op1, op2); + }); } else { @@ -468,9 +475,12 @@ namespace ARMeilleure.Instructions public static void Fmax_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseSse41) { - EmitScalarBinaryOpF(context, Intrinsic.X86Maxss, Intrinsic.X86Maxsd); + EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true); + }, scalar: true); } else { @@ -483,9 +493,12 @@ namespace ARMeilleure.Instructions public static void Fmax_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseSse41) { - EmitVectorBinaryOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd); + EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true); + }, scalar: false); } else { @@ -526,19 +539,53 @@ namespace ARMeilleure.Instructions } } + public static void Fmaxnmp_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2VectorPairwiseOpF(context, (op1, op2) => + { + return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false, op1, op2); + }); + } + else + { + EmitVectorPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2); + }); + } + } + public static void Fmaxnmv_V(ArmEmitterContext context) { - EmitVectorAcrossVectorOpF(context, (op1, op2) => + if (Optimizations.FastFP && Optimizations.UseSse41) { - return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMaxNum)), op1, op2); - }); + EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => + { + return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false, op1, op2); + }); + } + else + { + EmitVectorAcrossVectorOpF(context, (op1, op2) => + { + return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMaxNum)), op1, op2); + }); + } } public static void Fmaxp_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseSse41) { - EmitSse2VectorPairwiseOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd); + EmitSse2VectorPairwiseOpF(context, (op1, op2) => + { + return EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true); + }, scalar: false, op1, op2); + }); } else { @@ -549,11 +596,35 @@ namespace ARMeilleure.Instructions } } + public static void Fmaxv_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => + { + return EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true); + }, scalar: false, op1, op2); + }); + } + else + { + EmitVectorAcrossVectorOpF(context, (op1, op2) => + { + return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMax)), op1, op2); + }); + } + } + public static void Fmin_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseSse41) { - EmitScalarBinaryOpF(context, Intrinsic.X86Minss, Intrinsic.X86Minsd); + EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false); + }, scalar: true); } else { @@ -566,9 +637,12 @@ namespace ARMeilleure.Instructions public static void Fmin_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseSse41) { - EmitVectorBinaryOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd); + EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false); + }, scalar: false); } else { @@ -609,19 +683,53 @@ namespace ARMeilleure.Instructions } } + public static void Fminnmp_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2VectorPairwiseOpF(context, (op1, op2) => + { + return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false, op1, op2); + }); + } + else + { + EmitVectorPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2); + }); + } + } + public static void Fminnmv_V(ArmEmitterContext context) { - EmitVectorAcrossVectorOpF(context, (op1, op2) => + if (Optimizations.FastFP && Optimizations.UseSse41) { - return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMinNum)), op1, op2); - }); + EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => + { + return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false, op1, op2); + }); + } + else + { + EmitVectorAcrossVectorOpF(context, (op1, op2) => + { + return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMinNum)), op1, op2); + }); + } } public static void Fminp_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseSse41) { - EmitSse2VectorPairwiseOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd); + EmitSse2VectorPairwiseOpF(context, (op1, op2) => + { + return EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false); + }, scalar: false, op1, op2); + }); } else { @@ -632,6 +740,27 @@ namespace ARMeilleure.Instructions } } + public static void Fminv_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => + { + return EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false); + }, scalar: false, op1, op2); + }); + } + else + { + EmitVectorAcrossVectorOpF(context, (op1, op2) => + { + return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMin)), op1, op2); + }); + } + } + public static void Fmla_Se(ArmEmitterContext context) // Fused. { EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => @@ -3111,7 +3240,12 @@ namespace ARMeilleure.Instructions context.Copy(GetVec(op.Rd), res); } - public static Operand EmitSse2VectorIsQNaNOpF(ArmEmitterContext context, Operand opF) + public static void EmitSse2VectorIsNaNOpF( + ArmEmitterContext context, + Operand opF, + out Operand qNaNMask, + out Operand sNaNMask, + bool? isQNaN = null) { IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; @@ -3126,7 +3260,8 @@ namespace ARMeilleure.Instructions Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask); mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, mask2, qMask, Const((int)CmpCondition.Equal)); - return context.AddIntrinsic(Intrinsic.X86Andps, mask1, mask2); + qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andps, mask2, mask1) : null; + sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnps, mask2, mask1) : null; } else /* if ((op.Size & 1) == 1) */ { @@ -3139,67 +3274,202 @@ namespace ARMeilleure.Instructions Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask); mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, mask2, qMask, Const((int)CmpCondition.Equal)); - return context.AddIntrinsic(Intrinsic.X86Andpd, mask1, mask2); + qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andpd, mask2, mask1) : null; + sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnpd, mask2, mask1) : null; } } - private static void EmitSse41MaxMinNumOpF(ArmEmitterContext context, bool isMaxNum, bool scalar) + public static Operand EmitSse41ProcessNaNsOpF( + ArmEmitterContext context, + Func2I emit, + bool scalar, + Operand n = null, + Operand m = null) { - OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + Operand nCopy = n ?? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn)); + Operand mCopy = m ?? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm)); - Operand d = GetVec(op.Rd); - Operand n = GetVec(op.Rn); - Operand m = GetVec(op.Rm); + EmitSse2VectorIsNaNOpF(context, nCopy, out Operand nQNaNMask, out Operand nSNaNMask); + EmitSse2VectorIsNaNOpF(context, mCopy, out _, out Operand mSNaNMask, isQNaN: false); - Operand nNum = context.Copy(n); - Operand mNum = context.Copy(m); - - Operand nQNaNMask = EmitSse2VectorIsQNaNOpF(context, nNum); - Operand mQNaNMask = EmitSse2VectorIsQNaNOpF(context, mNum); - - int sizeF = op.Size & 1; + int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1; if (sizeF == 0) { - Operand negInfMask = X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity); + const int QBit = 22; - Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask); - Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask); + Operand qMask = scalar ? X86GetScalar(context, 1 << QBit) : X86GetAllElements(context, 1 << QBit); - nNum = context.AddIntrinsic(Intrinsic.X86Blendvps, nNum, negInfMask, nMask); - mNum = context.AddIntrinsic(Intrinsic.X86Blendvps, mNum, negInfMask, mMask); + Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask); + resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask); - Operand res = context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxps : Intrinsic.X86Minps, nNum, mNum); + Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, nCopy, resNaNMask); + resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask); + + Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nCopy, mCopy, Const((int)CmpCondition.OrderedQ)); + + Operand res = context.AddIntrinsic(Intrinsic.X86Blendvps, resNaN, emit(nCopy, mCopy), resMask); + + if (n != null || m != null) + { + return res; + } if (scalar) { res = context.VectorZeroUpper96(res); } - else if (op.RegisterSize == RegisterSize.Simd64) + else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64) { res = context.VectorZeroUpper64(res); } - context.Copy(d, res); + context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res); + + return null; } else /* if (sizeF == 1) */ { - Operand negInfMask = X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity); + const int QBit = 51; - Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask); - Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask); + Operand qMask = scalar ? X86GetScalar(context, 1L << QBit) : X86GetAllElements(context, 1L << QBit); - nNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, nNum, negInfMask, nMask); - mNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, mNum, negInfMask, mMask); + Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask); + resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask); - Operand res = context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, nNum, mNum); + Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, nCopy, resNaNMask); + resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask); + + Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nCopy, mCopy, Const((int)CmpCondition.OrderedQ)); + + Operand res = context.AddIntrinsic(Intrinsic.X86Blendvpd, resNaN, emit(nCopy, mCopy), resMask); + + if (n != null || m != null) + { + return res; + } if (scalar) { res = context.VectorZeroUpper64(res); } - context.Copy(d, res); + context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res); + + return null; + } + } + + private static Operand EmitSse2VectorMaxMinOpF(ArmEmitterContext context, Operand n, Operand m, bool isMax) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + if ((op.Size & 1) == 0) + { + Operand mask = X86GetAllElements(context, -0f); + + Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxps : Intrinsic.X86Minps, n, m); + res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res); + + Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m); + resSign = context.AddIntrinsic(Intrinsic.X86Andps, mask, resSign); + + return context.AddIntrinsic(Intrinsic.X86Por, res, resSign); + } + else /* if ((op.Size & 1) == 1) */ + { + Operand mask = X86GetAllElements(context, -0d); + + Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, n, m); + res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res); + + Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m); + resSign = context.AddIntrinsic(Intrinsic.X86Andpd, mask, resSign); + + return context.AddIntrinsic(Intrinsic.X86Por, res, resSign); + } + } + + private static Operand EmitSse41MaxMinNumOpF( + ArmEmitterContext context, + bool isMaxNum, + bool scalar, + Operand n = null, + Operand m = null) + { + Operand nCopy = n ?? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn)); + Operand mCopy = m ?? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm)); + + EmitSse2VectorIsNaNOpF(context, nCopy, out Operand nQNaNMask, out _, isQNaN: true); + EmitSse2VectorIsNaNOpF(context, mCopy, out Operand mQNaNMask, out _, isQNaN: true); + + int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1; + + if (sizeF == 0) + { + Operand negInfMask = scalar + ? X86GetScalar (context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity) + : X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity); + + Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask); + Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask); + + nCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, nCopy, negInfMask, nMask); + mCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, negInfMask, mMask); + + Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum); + }, scalar: scalar, nCopy, mCopy); + + if (n != null || m != null) + { + return res; + } + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res); + + return null; + } + else /* if (sizeF == 1) */ + { + Operand negInfMask = scalar + ? X86GetScalar (context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity) + : X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity); + + Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask); + Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask); + + nCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, nCopy, negInfMask, nMask); + mCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, negInfMask, mMask); + + Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum); + }, scalar: scalar, nCopy, mCopy); + + if (n != null || m != null) + { + return res; + } + + if (scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res); + + return null; } } diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs index 82f57d63e..eb86ac9e7 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -1200,8 +1200,8 @@ namespace ARMeilleure.Instructions Operand nNum = context.Copy(n); Operand mNum = context.Copy(m); - Operand nQNaNMask = InstEmit.EmitSse2VectorIsQNaNOpF(context, nNum); - Operand mQNaNMask = InstEmit.EmitSse2VectorIsQNaNOpF(context, mNum); + InstEmit.EmitSse2VectorIsNaNOpF(context, nNum, out Operand nQNaNMask, out _, isQNaN: true); + InstEmit.EmitSse2VectorIsNaNOpF(context, mNum, out Operand mQNaNMask, out _, isQNaN: true); int sizeF = op.Size & 1; diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs index 912c22600..69e79a6db 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs @@ -1095,6 +1095,29 @@ namespace ARMeilleure.Instructions context.Copy(GetVec(op.Rd), d); } + public static void EmitSse2VectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128); + + const int sm0 = 0 << 6 | 0 << 4 | 0 << 2 | 0 << 0; + const int sm1 = 1 << 6 | 1 << 4 | 1 << 2 | 1 << 0; + const int sm2 = 2 << 6 | 2 << 4 | 2 << 2 | 2 << 0; + const int sm3 = 3 << 6 | 3 << 4 | 3 << 2 | 3 << 0; + + Operand nCopy = context.Copy(GetVec(op.Rn)); + + Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm0)); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm1)); + Operand part2 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm2)); + Operand part3 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm3)); + + Operand res = emit(emit(part0, part1), emit(part2, part3)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1124,12 +1147,12 @@ namespace ARMeilleure.Instructions context.Copy(GetVec(op.Rd), res); } - public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Func2I emit) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - Operand n = GetVec(op.Rn); - Operand m = GetVec(op.Rm); + Operand nCopy = context.Copy(GetVec(op.Rn)); + Operand mCopy = context.Copy(GetVec(op.Rm)); int sizeF = op.Size & 1; @@ -1137,32 +1160,32 @@ namespace ARMeilleure.Instructions { if (op.RegisterSize == RegisterSize.Simd64) { - Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, n, m); + Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, nCopy, mCopy); Operand zero = context.VectorZero(); Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero); Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck); - context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1)); + context.Copy(GetVec(op.Rd), emit(part0, part1)); } else /* if (op.RegisterSize == RegisterSize.Simd128) */ { const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0; const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0; - Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm0)); - Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm1)); + Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm0)); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm1)); - context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1)); + context.Copy(GetVec(op.Rd), emit(part0, part1)); } } else /* if (sizeF == 1) */ { - Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, n, m); - Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, n, m); + Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, nCopy, mCopy); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nCopy, mCopy); - context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst64, part0, part1)); + context.Copy(GetVec(op.Rd), emit(part0, part1)); } } diff --git a/ARMeilleure/Instructions/InstEmitSimdShift.cs b/ARMeilleure/Instructions/InstEmitSimdShift.cs index 0b3d85aeb..62363fdec 100644 --- a/ARMeilleure/Instructions/InstEmitSimdShift.cs +++ b/ARMeilleure/Instructions/InstEmitSimdShift.cs @@ -391,25 +391,14 @@ namespace ARMeilleure.Instructions } } + public static void Sshl_S(ArmEmitterContext context) + { + EmitSshlOrUshl(context, signed: true, scalar: true); + } + public static void Sshl_V(ArmEmitterContext context) { - OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - - Operand res = context.VectorZero(); - - int elems = op.GetBytesCount() >> op.Size; - - for (int index = 0; index < elems; index++) - { - Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); - Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); - - Operand e = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShlReg)), ne, me, Const(0), Const(op.Size)); - - res = EmitVectorInsert(context, res, e, index, op.Size); - } - - context.Copy(GetVec(op.Rd), res); + EmitSshlOrUshl(context, signed: true, scalar: false); } public static void Sshll_V(ArmEmitterContext context) @@ -686,25 +675,14 @@ namespace ARMeilleure.Instructions } } + public static void Ushl_S(ArmEmitterContext context) + { + EmitSshlOrUshl(context, signed: false, scalar: true); + } + public static void Ushl_V(ArmEmitterContext context) { - OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - - Operand res = context.VectorZero(); - - int elems = op.GetBytesCount() >> op.Size; - - for (int index = 0; index < elems; index++) - { - Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); - Operand me = EmitVectorExtractSx(context, op.Rm, index << op.Size, 0); - - Operand e = EmitUnsignedShlRegOp(context, ne, context.ConvertI64ToI32(me), op.Size); - - res = EmitVectorInsert(context, res, e, index, op.Size); - } - - context.Copy(GetVec(op.Rd), res); + EmitSshlOrUshl(context, signed: false, scalar: false); } public static void Ushll_V(ArmEmitterContext context) @@ -894,7 +872,7 @@ namespace ARMeilleure.Instructions context.Copy(GetVec(op.Rd), res); } - private static Operand EmitUnsignedShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size) + private static Operand EmitShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size, bool signed) { Debug.Assert(op.Type == OperandType.I64); Debug.Assert(shiftLsB.Type == OperandType.I32); @@ -902,18 +880,33 @@ namespace ARMeilleure.Instructions Operand negShiftLsB = context.Negate(shiftLsB); + Operand isInRange = context.BitwiseAnd( + context.ICompareLess(shiftLsB, Const(8 << size)), + context.ICompareLess(negShiftLsB, Const(8 << size))); + Operand isPositive = context.ICompareGreaterOrEqual(shiftLsB, Const(0)); - Operand shl = context.ShiftLeft (op, shiftLsB); - Operand shr = context.ShiftRightUI(op, negShiftLsB); + Operand shl = context.ShiftLeft(op, shiftLsB); - Operand res = context.ConditionalSelect(isPositive, shl, shr); + Operand sarOrShr = signed + ? context.ShiftRightSI(op, negShiftLsB) + : context.ShiftRightUI(op, negShiftLsB); - Operand isOutOfRange = context.BitwiseOr( - context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size)), - context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size))); + Operand res = context.ConditionalSelect(isPositive, shl, sarOrShr); - return context.ConditionalSelect(isOutOfRange, Const(0UL), res); + if (signed) + { + Operand isPositive2 = context.ICompareGreaterOrEqual(op, Const(0L)); + + Operand res2 = context.ConditionalSelect(isPositive2, Const(0L), Const(-1L)); + res2 = context.ConditionalSelect(isPositive, Const(0L), res2); + + return context.ConditionalSelect(isInRange, res, res2); + } + else + { + return context.ConditionalSelect(isInRange, res, Const(0UL)); + } } private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round) @@ -1174,5 +1167,26 @@ namespace ARMeilleure.Instructions context.Copy(GetVec(op.Rd), res); } } + + private static void EmitSshlOrUshl(ArmEmitterContext context, bool signed, bool scalar) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract (context, op.Rn, index, op.Size, signed); + Operand me = EmitVectorExtractSx(context, op.Rm, index << op.Size, 0); + + Operand e = EmitShlRegOp(context, ne, context.ConvertI64ToI32(me), op.Size, signed); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } } } diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index e4d084560..69b5d3fc7 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -212,14 +212,18 @@ namespace ARMeilleure.Instructions Fmax_V, Fmaxnm_S, Fmaxnm_V, + Fmaxnmp_V, Fmaxnmv_V, Fmaxp_V, + Fmaxv_V, Fmin_S, Fmin_V, Fminnm_S, Fminnm_V, + Fminnmp_V, Fminnmv_V, Fminp_V, + Fminv_V, Fmla_Se, Fmla_V, Fmla_Ve, @@ -378,6 +382,7 @@ namespace ARMeilleure.Instructions Srshr_V, Srsra_S, Srsra_V, + Sshl_S, Sshl_V, Sshll_V, Sshr_S, @@ -444,6 +449,7 @@ namespace ARMeilleure.Instructions Urshr_V, Ursra_S, Ursra_V, + Ushl_S, Ushl_V, Ushll_V, Ushr_S, diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index deffabe1e..b951caf83 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -19,8 +19,8 @@ namespace ARMeilleure.Translation.PTC public static class Ptc { private const string HeaderMagic = "PTChd"; - - private const int InternalVersion = 9; //! To be incremented manually for each change to the ARMeilleure project. + + private const int InternalVersion = 10; //! To be incremented manually for each change to the ARMeilleure project. private const string BaseDir = "Ryujinx"; diff --git a/Ryujinx.Tests/Cpu/CpuTest.cs b/Ryujinx.Tests/Cpu/CpuTest.cs index 2c0472481..4f5fba9d0 100644 --- a/Ryujinx.Tests/Cpu/CpuTest.cs +++ b/Ryujinx.Tests/Cpu/CpuTest.cs @@ -12,10 +12,14 @@ namespace Ryujinx.Tests.Cpu [TestFixture] public class CpuTest { - private ulong _currAddress; - private ulong _size; + protected const ulong Size = 0x1000; + protected const ulong CodeBaseAddress = 0x1000; + protected const ulong DataBaseAddress = CodeBaseAddress + Size; - private ulong _entryPoint; + private const bool Ignore_FpcrFz_FpcrDn = false; + private const bool IgnoreAllExcept_FpsrQc = false; + + private ulong _currAddress; private MemoryBlock _ram; @@ -28,6 +32,8 @@ namespace Ryujinx.Tests.Cpu private static bool _unicornAvailable; private UnicornAArch64 _unicornEmu; + private bool _usingMemory; + static CpuTest() { _unicornAvailable = UnicornAArch64.IsAvailable(); @@ -41,14 +47,11 @@ namespace Ryujinx.Tests.Cpu [SetUp] public void Setup() { - _currAddress = 0x1000; - _size = 0x1000; + _currAddress = CodeBaseAddress; - _entryPoint = _currAddress; - - _ram = new MemoryBlock(_size); - _memory = new MemoryManager(_ram, 1UL << 16); - _memory.Map(_currAddress, 0, _size); + _ram = new MemoryBlock(Size * 2); + _memory = new MemoryManager(_ram, 1ul << 16); + _memory.Map(CodeBaseAddress, 0, Size * 2); _context = CpuContext.CreateExecutionContext(); @@ -57,8 +60,9 @@ namespace Ryujinx.Tests.Cpu if (_unicornAvailable) { _unicornEmu = new UnicornAArch64(); - _unicornEmu.MemoryMap(_currAddress, _size, MemoryPermission.READ | MemoryPermission.EXEC); - _unicornEmu.PC = _entryPoint; + _unicornEmu.MemoryMap(CodeBaseAddress, Size, MemoryPermission.READ | MemoryPermission.EXEC); + _unicornEmu.MemoryMap(DataBaseAddress, Size, MemoryPermission.READ | MemoryPermission.WRITE); + _unicornEmu.PC = CodeBaseAddress; } } @@ -73,6 +77,8 @@ namespace Ryujinx.Tests.Cpu _context = null; _cpuContext = null; _unicornEmu = null; + + _usingMemory = false; } protected void Reset() @@ -169,11 +175,11 @@ namespace Ryujinx.Tests.Cpu protected void ExecuteOpcodes(bool runUnicorn = true) { - _cpuContext.Execute(_context, _entryPoint); + _cpuContext.Execute(_context, CodeBaseAddress); if (_unicornAvailable && runUnicorn) { - _unicornEmu.RunForCount((_currAddress - _entryPoint - 4) / 4); + _unicornEmu.RunForCount((_currAddress - CodeBaseAddress - 4) / 4); } } @@ -199,6 +205,11 @@ namespace Ryujinx.Tests.Cpu int fpsr = 0, bool runUnicorn = true) { + if (Ignore_FpcrFz_FpcrDn) + { + fpcr &= ~((int)FPCR.Fz | (int)FPCR.Dn); + } + Opcode(opcode); Opcode(0xD65F03C0); // RET SetContext(x0, x1, x2, x3, x31, v0, v1, v2, v3, v4, v5, v30, v31, overflow, carry, zero, negative, fpcr, fpsr); @@ -207,6 +218,30 @@ namespace Ryujinx.Tests.Cpu return GetContext(); } + protected void SetWorkingMemory(ulong offset, byte[] data) + { + _memory.Write(DataBaseAddress + offset, data); + + if (_unicornAvailable) + { + _unicornEmu.MemoryWrite(DataBaseAddress + offset, data); + } + + _usingMemory = true; // When true, CompareAgainstUnicorn checks the working memory for equality too. + } + + protected void SetWorkingMemory(ulong offset, byte data) + { + _memory.Write(DataBaseAddress + offset, data); + + if (_unicornAvailable) + { + _unicornEmu.MemoryWrite8(DataBaseAddress + offset, data); + } + + _usingMemory = true; // When true, CompareAgainstUnicorn checks the working memory for equality too. + } + /// Rounding Mode control field. public enum RMode { @@ -284,15 +319,20 @@ namespace Ryujinx.Tests.Cpu return; } + if (IgnoreAllExcept_FpsrQc) + { + fpsrMask &= Fpsr.Qc; + } + if (fpSkips != FpSkips.None) { ManageFpSkips(fpSkips); } - Assert.That(_context.GetX(0), Is.EqualTo(_unicornEmu.X[0])); - Assert.That(_context.GetX(1), Is.EqualTo(_unicornEmu.X[1])); - Assert.That(_context.GetX(2), Is.EqualTo(_unicornEmu.X[2])); - Assert.That(_context.GetX(3), Is.EqualTo(_unicornEmu.X[3])); + Assert.That(_context.GetX(0), Is.EqualTo(_unicornEmu.X[0]), "X0"); + Assert.That(_context.GetX(1), Is.EqualTo(_unicornEmu.X[1]), "X1"); + Assert.That(_context.GetX(2), Is.EqualTo(_unicornEmu.X[2]), "X2"); + Assert.That(_context.GetX(3), Is.EqualTo(_unicornEmu.X[3]), "X3"); Assert.That(_context.GetX(4), Is.EqualTo(_unicornEmu.X[4])); Assert.That(_context.GetX(5), Is.EqualTo(_unicornEmu.X[5])); Assert.That(_context.GetX(6), Is.EqualTo(_unicornEmu.X[6])); @@ -321,21 +361,21 @@ namespace Ryujinx.Tests.Cpu Assert.That(_context.GetX(29), Is.EqualTo(_unicornEmu.X[29])); Assert.That(_context.GetX(30), Is.EqualTo(_unicornEmu.X[30])); - Assert.That(_context.GetX(31), Is.EqualTo(_unicornEmu.SP)); + Assert.That(_context.GetX(31), Is.EqualTo(_unicornEmu.SP), "X31"); if (fpTolerances == FpTolerances.None) { - Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0])); + Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0]), "V0"); } else { ManageFpTolerances(fpTolerances); } - Assert.That(V128ToSimdValue(_context.GetV(1)), Is.EqualTo(_unicornEmu.Q[1])); - Assert.That(V128ToSimdValue(_context.GetV(2)), Is.EqualTo(_unicornEmu.Q[2])); - Assert.That(V128ToSimdValue(_context.GetV(3)), Is.EqualTo(_unicornEmu.Q[3])); - Assert.That(V128ToSimdValue(_context.GetV(4)), Is.EqualTo(_unicornEmu.Q[4])); - Assert.That(V128ToSimdValue(_context.GetV(5)), Is.EqualTo(_unicornEmu.Q[5])); + Assert.That(V128ToSimdValue(_context.GetV(1)), Is.EqualTo(_unicornEmu.Q[1]), "V1"); + Assert.That(V128ToSimdValue(_context.GetV(2)), Is.EqualTo(_unicornEmu.Q[2]), "V2"); + Assert.That(V128ToSimdValue(_context.GetV(3)), Is.EqualTo(_unicornEmu.Q[3]), "V3"); + Assert.That(V128ToSimdValue(_context.GetV(4)), Is.EqualTo(_unicornEmu.Q[4]), "V4"); + Assert.That(V128ToSimdValue(_context.GetV(5)), Is.EqualTo(_unicornEmu.Q[5]), "V5"); Assert.That(V128ToSimdValue(_context.GetV(6)), Is.EqualTo(_unicornEmu.Q[6])); Assert.That(V128ToSimdValue(_context.GetV(7)), Is.EqualTo(_unicornEmu.Q[7])); Assert.That(V128ToSimdValue(_context.GetV(8)), Is.EqualTo(_unicornEmu.Q[8])); @@ -360,16 +400,27 @@ namespace Ryujinx.Tests.Cpu Assert.That(V128ToSimdValue(_context.GetV(27)), Is.EqualTo(_unicornEmu.Q[27])); Assert.That(V128ToSimdValue(_context.GetV(28)), Is.EqualTo(_unicornEmu.Q[28])); Assert.That(V128ToSimdValue(_context.GetV(29)), Is.EqualTo(_unicornEmu.Q[29])); - Assert.That(V128ToSimdValue(_context.GetV(30)), Is.EqualTo(_unicornEmu.Q[30])); - Assert.That(V128ToSimdValue(_context.GetV(31)), Is.EqualTo(_unicornEmu.Q[31])); + Assert.That(V128ToSimdValue(_context.GetV(30)), Is.EqualTo(_unicornEmu.Q[30]), "V30"); + Assert.That(V128ToSimdValue(_context.GetV(31)), Is.EqualTo(_unicornEmu.Q[31]), "V31"); - Assert.That((int)_context.Fpcr, Is.EqualTo(_unicornEmu.Fpcr)); - Assert.That((int)_context.Fpsr & (int)fpsrMask, Is.EqualTo(_unicornEmu.Fpsr & (int)fpsrMask)); + Assert.That((int)_context.Fpcr, Is.EqualTo(_unicornEmu.Fpcr), "Fpcr"); + Assert.That((int)_context.Fpsr & (int)fpsrMask, Is.EqualTo(_unicornEmu.Fpsr & (int)fpsrMask), "Fpsr"); - Assert.That(_context.GetPstateFlag(PState.VFlag), Is.EqualTo(_unicornEmu.OverflowFlag)); - Assert.That(_context.GetPstateFlag(PState.CFlag), Is.EqualTo(_unicornEmu.CarryFlag)); - Assert.That(_context.GetPstateFlag(PState.ZFlag), Is.EqualTo(_unicornEmu.ZeroFlag)); - Assert.That(_context.GetPstateFlag(PState.NFlag), Is.EqualTo(_unicornEmu.NegativeFlag)); + Assert.Multiple(() => + { + Assert.That(_context.GetPstateFlag(PState.VFlag), Is.EqualTo(_unicornEmu.OverflowFlag), "VFlag"); + Assert.That(_context.GetPstateFlag(PState.CFlag), Is.EqualTo(_unicornEmu.CarryFlag), "CFlag"); + Assert.That(_context.GetPstateFlag(PState.ZFlag), Is.EqualTo(_unicornEmu.ZeroFlag), "ZFlag"); + Assert.That(_context.GetPstateFlag(PState.NFlag), Is.EqualTo(_unicornEmu.NegativeFlag), "NFlag"); + }); + + if (_usingMemory) + { + byte[] mem = _memory.GetSpan(DataBaseAddress, (int)Size).ToArray(); + byte[] unicornMem = _unicornEmu.MemoryRead(DataBaseAddress, Size); + + Assert.That(mem, Is.EqualTo(unicornMem), "Data"); + } } private void ManageFpSkips(FpSkips fpSkips) @@ -418,14 +469,17 @@ namespace Ryujinx.Tests.Cpu if (IsNormalOrSubnormalS(_unicornEmu.Q[0].AsFloat()) && IsNormalOrSubnormalS(_context.GetV(0).As())) { - Assert.That (_context.GetV(0).Extract(0), - Is.EqualTo(_unicornEmu.Q[0].GetFloat(0)).Within(1).Ulps); - Assert.That (_context.GetV(0).Extract(1), - Is.EqualTo(_unicornEmu.Q[0].GetFloat(1)).Within(1).Ulps); - Assert.That (_context.GetV(0).Extract(2), - Is.EqualTo(_unicornEmu.Q[0].GetFloat(2)).Within(1).Ulps); - Assert.That (_context.GetV(0).Extract(3), - Is.EqualTo(_unicornEmu.Q[0].GetFloat(3)).Within(1).Ulps); + Assert.Multiple(() => + { + Assert.That (_context.GetV(0).Extract(0), + Is.EqualTo(_unicornEmu.Q[0].GetFloat(0)).Within(1).Ulps, "V0[0]"); + Assert.That (_context.GetV(0).Extract(1), + Is.EqualTo(_unicornEmu.Q[0].GetFloat(1)).Within(1).Ulps, "V0[1]"); + Assert.That (_context.GetV(0).Extract(2), + Is.EqualTo(_unicornEmu.Q[0].GetFloat(2)).Within(1).Ulps, "V0[2]"); + Assert.That (_context.GetV(0).Extract(3), + Is.EqualTo(_unicornEmu.Q[0].GetFloat(3)).Within(1).Ulps, "V0[3]"); + }); Console.WriteLine(fpTolerances); } @@ -440,10 +494,13 @@ namespace Ryujinx.Tests.Cpu if (IsNormalOrSubnormalD(_unicornEmu.Q[0].AsDouble()) && IsNormalOrSubnormalD(_context.GetV(0).As())) { - Assert.That (_context.GetV(0).Extract(0), - Is.EqualTo(_unicornEmu.Q[0].GetDouble(0)).Within(1).Ulps); - Assert.That (_context.GetV(0).Extract(1), - Is.EqualTo(_unicornEmu.Q[0].GetDouble(1)).Within(1).Ulps); + Assert.Multiple(() => + { + Assert.That (_context.GetV(0).Extract(0), + Is.EqualTo(_unicornEmu.Q[0].GetDouble(0)).Within(1).Ulps, "V0[0]"); + Assert.That (_context.GetV(0).Extract(1), + Is.EqualTo(_unicornEmu.Q[0].GetDouble(1)).Within(1).Ulps, "V0[1]"); + }); Console.WriteLine(fpTolerances); } diff --git a/Ryujinx.Tests/Cpu/CpuTestMisc.cs b/Ryujinx.Tests/Cpu/CpuTestMisc.cs index 9b31e68e0..9c067f4ed 100644 --- a/Ryujinx.Tests/Cpu/CpuTestMisc.cs +++ b/Ryujinx.Tests/Cpu/CpuTestMisc.cs @@ -4,15 +4,67 @@ using ARMeilleure.State; using NUnit.Framework; +using System; +using System.Collections.Generic; + namespace Ryujinx.Tests.Cpu { [Category("Misc")] public sealed class CpuTestMisc : CpuTest { #if Misc + +#region "ValueSource (Types)" + private static IEnumerable _1S_F_() + { + yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x0000000080800000ul; // -Min Normal + yield return 0x00000000807FFFFFul; // -Max Subnormal + yield return 0x0000000080000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0000000000800000ul; // +Min Normal + yield return 0x00000000007FFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (float.Epsilon) + + if (!NoZeros) + { + yield return 0x0000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0x00000000FF800000ul; // -Infinity + yield return 0x000000007F800000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0x00000000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload) + yield return 0x000000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload) + } + + for (int cnt = 1; cnt <= RndCnt; cnt++) + { + ulong grbg = TestContext.CurrentContext.Random.NextUInt(); + ulong rnd1 = GenNormalS(); + ulong rnd2 = GenSubnormalS(); + + yield return (grbg << 32) | rnd1; + yield return (grbg << 32) | rnd2; + } + } +#endregion + private const int RndCnt = 2; private const int RndCntImm = 2; + private static readonly bool NoZeros = false; + private static readonly bool NoInfs = false; + private static readonly bool NoNaNs = false; + #region "AluImm & Csel" [Test, Pairwise] public void Adds_Csinc_64bit([Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, @@ -357,6 +409,77 @@ namespace Ryujinx.Tests.Cpu Assert.That(context.GetX(0), Is.EqualTo(a)); } + + [Explicit] + [Test, Pairwise] + public void Misc4([ValueSource("_1S_F_")] ulong a, + [ValueSource("_1S_F_")] ulong b, + [ValueSource("_1S_F_")] ulong c, + [Values(0ul, 1ul, 2ul, 3ul)] ulong displacement) + { + if (!BitConverter.IsLittleEndian) + { + Assert.Ignore(); + } + + for (ulong gapOffset = 0; gapOffset < displacement; gapOffset++) + { + SetWorkingMemory(gapOffset, TestContext.CurrentContext.Random.NextByte()); + } + + SetWorkingMemory(0x0 + displacement, BitConverter.GetBytes((uint)b)); + + SetWorkingMemory(0x4 + displacement, BitConverter.GetBytes((uint)c)); + + SetWorkingMemory(0x8 + displacement, TestContext.CurrentContext.Random.NextByte()); + SetWorkingMemory(0x9 + displacement, TestContext.CurrentContext.Random.NextByte()); + SetWorkingMemory(0xA + displacement, TestContext.CurrentContext.Random.NextByte()); + SetWorkingMemory(0xB + displacement, TestContext.CurrentContext.Random.NextByte()); + + SetContext( + x0: DataBaseAddress + displacement, + v0: MakeVectorE0E1(a, TestContext.CurrentContext.Random.NextULong()), + v1: MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()), + v2: MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()), + overflow: TestContext.CurrentContext.Random.NextBool(), + carry: TestContext.CurrentContext.Random.NextBool(), + zero: TestContext.CurrentContext.Random.NextBool(), + negative: TestContext.CurrentContext.Random.NextBool()); + + Opcode(0xBD400001); // LDR S1, [X0,#0] + Opcode(0xBD400402); // LDR S2, [X0,#4] + Opcode(0x1E215801); // FMIN S1, S0, S1 + Opcode(0x1E222000); // FCMP S0, S2 + Opcode(0x1E214C40); // FCSEL S0, S2, S1, MI + Opcode(0xBD000800); // STR S0, [X0,#8] + Opcode(0xD65F03C0); // RET + ExecuteOpcodes(); + + CompareAgainstUnicorn(); + } + + [Explicit] + [Test] + public void Misc5([ValueSource("_1S_F_")] ulong a) + { + SetContext( + v0: MakeVectorE0E1(a, TestContext.CurrentContext.Random.NextULong()), + v1: MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()), + overflow: TestContext.CurrentContext.Random.NextBool(), + carry: TestContext.CurrentContext.Random.NextBool(), + zero: TestContext.CurrentContext.Random.NextBool(), + negative: TestContext.CurrentContext.Random.NextBool()); + + Opcode(0x1E202008); // FCMP S0, #0.0 + Opcode(0x1E2E1001); // FMOV S1, #1.0 + Opcode(0x1E215800); // FMIN S0, S0, S1 + Opcode(0x1E2703E1); // FMOV S1, WZR + Opcode(0x1E204C20); // FCSEL S0, S1, S0, MI + Opcode(0xD65F03C0); // RET + ExecuteOpcodes(); + + CompareAgainstUnicorn(); + } #endif } } diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index f8a61b158..249447d76 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -918,7 +918,9 @@ namespace Ryujinx.Tests.Cpu return new uint[] { 0x6E30C800u, // FMAXNMV S0, V0.4S - 0x6EB0C800u // FMINNMV S0, V0.4S + 0x6E30F800u, // FMAXV S0, V0.4S + 0x6EB0C800u, // FMINNMV S0, V0.4S + 0x6EB0F800u // FMINV S0, V0.4S }; } diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs index a54583827..828c1bf96 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs @@ -373,12 +373,14 @@ namespace Ryujinx.Tests.Cpu { return new uint[] { - 0x0E20F400u, // FMAX V0.2S, V0.2S, V0.2S - 0x0E20C400u, // FMAXNM V0.2S, V0.2S, V0.2S - 0x2E20F400u, // FMAXP V0.2S, V0.2S, V0.2S - 0x0EA0F400u, // FMIN V0.2S, V0.2S, V0.2S - 0x0EA0C400u, // FMINNM V0.2S, V0.2S, V0.2S - 0x2EA0F400u // FMINP V0.2S, V0.2S, V0.2S + 0x0E20F400u, // FMAX V0.2S, V0.2S, V0.2S + 0x0E20C400u, // FMAXNM V0.2S, V0.2S, V0.2S + 0x2E20C400u, // FMAXNMP V0.2S, V0.2S, V0.2S + 0x2E20F400u, // FMAXP V0.2S, V0.2S, V0.2S + 0x0EA0F400u, // FMIN V0.2S, V0.2S, V0.2S + 0x0EA0C400u, // FMINNM V0.2S, V0.2S, V0.2S + 0x2EA0C400u, // FMINNMP V0.2S, V0.2S, V0.2S + 0x2EA0F400u // FMINP V0.2S, V0.2S, V0.2S }; } @@ -386,12 +388,14 @@ namespace Ryujinx.Tests.Cpu { return new uint[] { - 0x4E60F400u, // FMAX V0.2D, V0.2D, V0.2D - 0x4E60C400u, // FMAXNM V0.2D, V0.2D, V0.2D - 0x6E60F400u, // FMAXP V0.2D, V0.2D, V0.2D - 0x4EE0F400u, // FMIN V0.2D, V0.2D, V0.2D - 0x4EE0C400u, // FMINNM V0.2D, V0.2D, V0.2D - 0x6EE0F400u // FMINP V0.2D, V0.2D, V0.2D + 0x4E60F400u, // FMAX V0.2D, V0.2D, V0.2D + 0x4E60C400u, // FMAXNM V0.2D, V0.2D, V0.2D + 0x6E60C400u, // FMAXNMP V0.2D, V0.2D, V0.2D + 0x6E60F400u, // FMAXP V0.2D, V0.2D, V0.2D + 0x4EE0F400u, // FMIN V0.2D, V0.2D, V0.2D + 0x4EE0C400u, // FMINNM V0.2D, V0.2D, V0.2D + 0x6EE0C400u, // FMINNMP V0.2D, V0.2D, V0.2D + 0x6EE0F400u // FMINP V0.2D, V0.2D, V0.2D }; } @@ -531,6 +535,15 @@ namespace Ryujinx.Tests.Cpu }; } + private static uint[] _ShlReg_S_D_() + { + return new uint[] + { + 0x5EE04400u, // SSHL D0, D0, D0 + 0x7EE04400u // USHL D0, D0, D0 + }; + } + private static uint[] _ShlReg_V_8B_4H_2S_() { return new uint[] @@ -2820,6 +2833,26 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise] + public void ShlReg_S_D([ValueSource("_ShlReg_S_D_")] uint opcodes, + [Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [Values(2u, 0u)] uint rm, + [ValueSource("_1D_")] [Random(RndCnt)] ulong z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong a, + [ValueSource("_1D_")] [Random(0ul, 255ul, RndCnt)] ulong b) + { + opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); + + SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(fpsrMask: Fpsr.Qc); + } + [Test, Pairwise] public void ShlReg_V_8B_4H_2S([ValueSource("_ShlReg_V_8B_4H_2S_")] uint opcodes, [Values(0u)] uint rd,