Implement FACGE and FACGT (Scalar and Vector) AArch64 SIMD instructions (#956)
This commit is contained in:
parent
f35af5f703
commit
b8ee5b15ab
4 changed files with 115 additions and 28 deletions
|
@ -244,6 +244,10 @@ namespace ARMeilleure.Decoders
|
||||||
SetA64("0>1011101<1xxxxx110101xxxxxxxxxx", InstName.Fabd_V, InstEmit.Fabd_V, typeof(OpCodeSimdReg));
|
SetA64("0>1011101<1xxxxx110101xxxxxxxxxx", InstName.Fabd_V, InstEmit.Fabd_V, typeof(OpCodeSimdReg));
|
||||||
SetA64("000111100x100000110000xxxxxxxxxx", InstName.Fabs_S, InstEmit.Fabs_S, typeof(OpCodeSimd));
|
SetA64("000111100x100000110000xxxxxxxxxx", InstName.Fabs_S, InstEmit.Fabs_S, typeof(OpCodeSimd));
|
||||||
SetA64("0>0011101<100000111110xxxxxxxxxx", InstName.Fabs_V, InstEmit.Fabs_V, typeof(OpCodeSimd));
|
SetA64("0>0011101<100000111110xxxxxxxxxx", InstName.Fabs_V, InstEmit.Fabs_V, typeof(OpCodeSimd));
|
||||||
|
SetA64("011111100x1xxxxx111011xxxxxxxxxx", InstName.Facge_S, InstEmit.Facge_S, typeof(OpCodeSimdReg));
|
||||||
|
SetA64("0>1011100<1xxxxx111011xxxxxxxxxx", InstName.Facge_V, InstEmit.Facge_V, typeof(OpCodeSimdReg));
|
||||||
|
SetA64("011111101x1xxxxx111011xxxxxxxxxx", InstName.Facgt_S, InstEmit.Facgt_S, typeof(OpCodeSimdReg));
|
||||||
|
SetA64("0>1011101<1xxxxx111011xxxxxxxxxx", InstName.Facgt_V, InstEmit.Facgt_V, typeof(OpCodeSimdReg));
|
||||||
SetA64("000111100x1xxxxx001010xxxxxxxxxx", InstName.Fadd_S, InstEmit.Fadd_S, typeof(OpCodeSimdReg));
|
SetA64("000111100x1xxxxx001010xxxxxxxxxx", InstName.Fadd_S, InstEmit.Fadd_S, typeof(OpCodeSimdReg));
|
||||||
SetA64("0>0011100<1xxxxx110101xxxxxxxxxx", InstName.Fadd_V, InstEmit.Fadd_V, typeof(OpCodeSimdReg));
|
SetA64("0>0011100<1xxxxx110101xxxxxxxxxx", InstName.Fadd_V, InstEmit.Fadd_V, typeof(OpCodeSimdReg));
|
||||||
SetA64("011111100x110000110110xxxxxxxxxx", InstName.Faddp_S, InstEmit.Faddp_S, typeof(OpCodeSimd));
|
SetA64("011111100x110000110110xxxxxxxxxx", InstName.Faddp_S, InstEmit.Faddp_S, typeof(OpCodeSimd));
|
||||||
|
|
|
@ -286,6 +286,54 @@ namespace ARMeilleure.Instructions
|
||||||
EmitCmtstOp(context, scalar: false);
|
EmitCmtstOp(context, scalar: false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void Facge_S(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
|
{
|
||||||
|
EmitSse2CmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: true, absolute: true);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitCmpOpF(context, SoftFloat32.FPCompareGE, SoftFloat64.FPCompareGE, scalar: true, absolute: true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void Facge_V(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
|
{
|
||||||
|
EmitSse2CmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: false, absolute: true);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitCmpOpF(context, SoftFloat32.FPCompareGE, SoftFloat64.FPCompareGE, scalar: false, absolute: true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void Facgt_S(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
|
{
|
||||||
|
EmitSse2CmpOpF(context, CmpCondition.GreaterThan, scalar: true, absolute: true);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitCmpOpF(context, SoftFloat32.FPCompareGT, SoftFloat64.FPCompareGT, scalar: true, absolute: true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void Facgt_V(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
|
{
|
||||||
|
EmitSse2CmpOpF(context, CmpCondition.GreaterThan, scalar: false, absolute: true);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitCmpOpF(context, SoftFloat32.FPCompareGT, SoftFloat64.FPCompareGT, scalar: false, absolute: true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static void Fccmp_S(ArmEmitterContext context)
|
public static void Fccmp_S(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
EmitFccmpOrFccmpe(context, signalNaNs: false);
|
EmitFccmpOrFccmpe(context, signalNaNs: false);
|
||||||
|
@ -639,7 +687,8 @@ namespace ARMeilleure.Instructions
|
||||||
ArmEmitterContext context,
|
ArmEmitterContext context,
|
||||||
_F32_F32_F32 f32,
|
_F32_F32_F32 f32,
|
||||||
_F64_F64_F64 f64,
|
_F64_F64_F64 f64,
|
||||||
bool scalar)
|
bool scalar,
|
||||||
|
bool absolute = false)
|
||||||
{
|
{
|
||||||
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
||||||
|
|
||||||
|
@ -665,6 +714,12 @@ namespace ARMeilleure.Instructions
|
||||||
me = sizeF == 0 ? ConstF(0f) : ConstF(0d);
|
me = sizeF == 0 ? ConstF(0f) : ConstF(0d);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (absolute)
|
||||||
|
{
|
||||||
|
ne = EmitUnaryMathCall(context, MathF.Abs, Math.Abs, ne);
|
||||||
|
me = EmitUnaryMathCall(context, MathF.Abs, Math.Abs, me);
|
||||||
|
}
|
||||||
|
|
||||||
Operand e = EmitSoftFloatCall(context, f32, f64, ne, me);
|
Operand e = EmitSoftFloatCall(context, f32, f64, ne, me);
|
||||||
|
|
||||||
res = context.VectorInsert(res, e, index);
|
res = context.VectorInsert(res, e, index);
|
||||||
|
@ -673,7 +728,7 @@ namespace ARMeilleure.Instructions
|
||||||
context.Copy(GetVec(op.Rd), res);
|
context.Copy(GetVec(op.Rd), res);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void EmitSse2CmpOpF(ArmEmitterContext context, CmpCondition cond, bool scalar)
|
private static void EmitSse2CmpOpF(ArmEmitterContext context, CmpCondition cond, bool scalar, bool absolute = false)
|
||||||
{
|
{
|
||||||
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
||||||
|
|
||||||
|
@ -684,6 +739,14 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
if (sizeF == 0)
|
if (sizeF == 0)
|
||||||
{
|
{
|
||||||
|
if (absolute)
|
||||||
|
{
|
||||||
|
Operand mask = scalar ? X86GetScalar(context, int.MaxValue) : X86GetAllElements(context, int.MaxValue);
|
||||||
|
|
||||||
|
n = context.AddIntrinsic(Intrinsic.X86Andps, n, mask);
|
||||||
|
m = context.AddIntrinsic(Intrinsic.X86Andps, m, mask);
|
||||||
|
}
|
||||||
|
|
||||||
Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps;
|
Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps;
|
||||||
|
|
||||||
Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond));
|
Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond));
|
||||||
|
@ -701,6 +764,14 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
else /* if (sizeF == 1) */
|
else /* if (sizeF == 1) */
|
||||||
{
|
{
|
||||||
|
if (absolute)
|
||||||
|
{
|
||||||
|
Operand mask = scalar ? X86GetScalar(context, long.MaxValue) : X86GetAllElements(context, long.MaxValue);
|
||||||
|
|
||||||
|
n = context.AddIntrinsic(Intrinsic.X86Andpd, n, mask);
|
||||||
|
m = context.AddIntrinsic(Intrinsic.X86Andpd, m, mask);
|
||||||
|
}
|
||||||
|
|
||||||
Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd;
|
Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd;
|
||||||
|
|
||||||
Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond));
|
Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond));
|
||||||
|
|
|
@ -152,6 +152,10 @@ namespace ARMeilleure.Instructions
|
||||||
Fabd_V,
|
Fabd_V,
|
||||||
Fabs_S,
|
Fabs_S,
|
||||||
Fabs_V,
|
Fabs_V,
|
||||||
|
Facge_S,
|
||||||
|
Facge_V,
|
||||||
|
Facgt_S,
|
||||||
|
Facgt_V,
|
||||||
Fadd_S,
|
Fadd_S,
|
||||||
Fadd_V,
|
Fadd_V,
|
||||||
Faddp_S,
|
Faddp_S,
|
||||||
|
|
|
@ -259,40 +259,48 @@ namespace Ryujinx.Tests.Cpu
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
private static uint[] _F_Cm_EqGeGt_S_S_()
|
private static uint[] _F_AcCm_EqGeGt_S_S_()
|
||||||
{
|
{
|
||||||
return new uint[]
|
return new uint[]
|
||||||
{
|
{
|
||||||
|
0x7E22EC20u, // FACGE S0, S1, S2
|
||||||
|
0x7EA2EC20u, // FACGT S0, S1, S2
|
||||||
0x5E22E420u, // FCMEQ S0, S1, S2
|
0x5E22E420u, // FCMEQ S0, S1, S2
|
||||||
0x7E22E420u, // FCMGE S0, S1, S2
|
0x7E22E420u, // FCMGE S0, S1, S2
|
||||||
0x7EA2E420u // FCMGT S0, S1, S2
|
0x7EA2E420u // FCMGT S0, S1, S2
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
private static uint[] _F_Cm_EqGeGt_S_D_()
|
private static uint[] _F_AcCm_EqGeGt_S_D_()
|
||||||
{
|
{
|
||||||
return new uint[]
|
return new uint[]
|
||||||
{
|
{
|
||||||
|
0x7E62EC20u, // FACGE D0, D1, D2
|
||||||
|
0x7EE2EC20u, // FACGT D0, D1, D2
|
||||||
0x5E62E420u, // FCMEQ D0, D1, D2
|
0x5E62E420u, // FCMEQ D0, D1, D2
|
||||||
0x7E62E420u, // FCMGE D0, D1, D2
|
0x7E62E420u, // FCMGE D0, D1, D2
|
||||||
0x7EE2E420u // FCMGT D0, D1, D2
|
0x7EE2E420u // FCMGT D0, D1, D2
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
private static uint[] _F_Cm_EqGeGt_V_2S_4S_()
|
private static uint[] _F_AcCm_EqGeGt_V_2S_4S_()
|
||||||
{
|
{
|
||||||
return new uint[]
|
return new uint[]
|
||||||
{
|
{
|
||||||
|
0x2E20EC00u, // FACGE V0.2S, V0.2S, V0.2S
|
||||||
|
0x2EA0EC00u, // FACGT V0.2S, V0.2S, V0.2S
|
||||||
0x0E20E400u, // FCMEQ V0.2S, V0.2S, V0.2S
|
0x0E20E400u, // FCMEQ V0.2S, V0.2S, V0.2S
|
||||||
0x2E20E400u, // FCMGE V0.2S, V0.2S, V0.2S
|
0x2E20E400u, // FCMGE V0.2S, V0.2S, V0.2S
|
||||||
0x2EA0E400u // FCMGT V0.2S, V0.2S, V0.2S
|
0x2EA0E400u // FCMGT V0.2S, V0.2S, V0.2S
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
private static uint[] _F_Cm_EqGeGt_V_2D_()
|
private static uint[] _F_AcCm_EqGeGt_V_2D_()
|
||||||
{
|
{
|
||||||
return new uint[]
|
return new uint[]
|
||||||
{
|
{
|
||||||
|
0x6E60EC00u, // FACGE V0.2D, V0.2D, V0.2D
|
||||||
|
0x6EE0EC00u, // FACGT V0.2D, V0.2D, V0.2D
|
||||||
0x4E60E400u, // FCMEQ V0.2D, V0.2D, V0.2D
|
0x4E60E400u, // FCMEQ V0.2D, V0.2D, V0.2D
|
||||||
0x6E60E400u, // FCMGE V0.2D, V0.2D, V0.2D
|
0x6E60E400u, // FCMGE V0.2D, V0.2D, V0.2D
|
||||||
0x6EE0E400u // FCMGT V0.2D, V0.2D, V0.2D
|
0x6EE0E400u // FCMGT V0.2D, V0.2D, V0.2D
|
||||||
|
@ -1429,7 +1437,7 @@ namespace Ryujinx.Tests.Cpu
|
||||||
}
|
}
|
||||||
|
|
||||||
[Test, Pairwise] [Explicit]
|
[Test, Pairwise] [Explicit]
|
||||||
public void F_Cm_EqGeGt_S_S([ValueSource("_F_Cm_EqGeGt_S_S_")] uint opcodes,
|
public void F_AcCm_EqGeGt_S_S([ValueSource("_F_AcCm_EqGeGt_S_S_")] uint opcodes,
|
||||||
[ValueSource("_1S_F_")] ulong a,
|
[ValueSource("_1S_F_")] ulong a,
|
||||||
[ValueSource("_1S_F_")] ulong b)
|
[ValueSource("_1S_F_")] ulong b)
|
||||||
{
|
{
|
||||||
|
@ -1448,7 +1456,7 @@ namespace Ryujinx.Tests.Cpu
|
||||||
}
|
}
|
||||||
|
|
||||||
[Test, Pairwise] [Explicit]
|
[Test, Pairwise] [Explicit]
|
||||||
public void F_Cm_EqGeGt_S_D([ValueSource("_F_Cm_EqGeGt_S_D_")] uint opcodes,
|
public void F_AcCm_EqGeGt_S_D([ValueSource("_F_AcCm_EqGeGt_S_D_")] uint opcodes,
|
||||||
[ValueSource("_1D_F_")] ulong a,
|
[ValueSource("_1D_F_")] ulong a,
|
||||||
[ValueSource("_1D_F_")] ulong b)
|
[ValueSource("_1D_F_")] ulong b)
|
||||||
{
|
{
|
||||||
|
@ -1467,7 +1475,7 @@ namespace Ryujinx.Tests.Cpu
|
||||||
}
|
}
|
||||||
|
|
||||||
[Test, Pairwise] [Explicit]
|
[Test, Pairwise] [Explicit]
|
||||||
public void F_Cm_EqGeGt_V_2S_4S([ValueSource("_F_Cm_EqGeGt_V_2S_4S_")] uint opcodes,
|
public void F_AcCm_EqGeGt_V_2S_4S([ValueSource("_F_AcCm_EqGeGt_V_2S_4S_")] uint opcodes,
|
||||||
[Values(0u)] uint rd,
|
[Values(0u)] uint rd,
|
||||||
[Values(1u, 0u)] uint rn,
|
[Values(1u, 0u)] uint rn,
|
||||||
[Values(2u, 0u)] uint rm,
|
[Values(2u, 0u)] uint rm,
|
||||||
|
@ -1493,7 +1501,7 @@ namespace Ryujinx.Tests.Cpu
|
||||||
}
|
}
|
||||||
|
|
||||||
[Test, Pairwise] [Explicit]
|
[Test, Pairwise] [Explicit]
|
||||||
public void F_Cm_EqGeGt_V_2D([ValueSource("_F_Cm_EqGeGt_V_2D_")] uint opcodes,
|
public void F_AcCm_EqGeGt_V_2D([ValueSource("_F_AcCm_EqGeGt_V_2D_")] uint opcodes,
|
||||||
[Values(0u)] uint rd,
|
[Values(0u)] uint rd,
|
||||||
[Values(1u, 0u)] uint rn,
|
[Values(1u, 0u)] uint rn,
|
||||||
[Values(2u, 0u)] uint rm,
|
[Values(2u, 0u)] uint rm,
|
||||||
|
|
Loading…
Reference in a new issue