CPU (A64): Add Fmaxp & Fminp Scalar Inst.s, Fast & Slow Paths; with Tests. (#5502)
* Add Fmaxp & Fminp Scalar Inst.s, Fast & Slow Paths; with Tests. * Ptc.InternalVersion = 5502
This commit is contained in:
parent
f95b7c5877
commit
2be8b6ea45
5 changed files with 59 additions and 1 deletions
|
@ -330,6 +330,7 @@ namespace ARMeilleure.Decoders
|
||||||
SetA64("011111100x110000110010xxxxxxxxxx", InstName.Fmaxnmp_S, InstEmit.Fmaxnmp_S, OpCodeSimd.Create);
|
SetA64("011111100x110000110010xxxxxxxxxx", InstName.Fmaxnmp_S, InstEmit.Fmaxnmp_S, OpCodeSimd.Create);
|
||||||
SetA64("0>1011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnmp_V, InstEmit.Fmaxnmp_V, OpCodeSimdReg.Create);
|
SetA64("0>1011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnmp_V, InstEmit.Fmaxnmp_V, OpCodeSimdReg.Create);
|
||||||
SetA64("0110111000110000110010xxxxxxxxxx", InstName.Fmaxnmv_V, InstEmit.Fmaxnmv_V, OpCodeSimd.Create);
|
SetA64("0110111000110000110010xxxxxxxxxx", InstName.Fmaxnmv_V, InstEmit.Fmaxnmv_V, OpCodeSimd.Create);
|
||||||
|
SetA64("011111100x110000111110xxxxxxxxxx", InstName.Fmaxp_S, InstEmit.Fmaxp_S, OpCodeSimd.Create);
|
||||||
SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, OpCodeSimdReg.Create);
|
SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, OpCodeSimdReg.Create);
|
||||||
SetA64("0110111000110000111110xxxxxxxxxx", InstName.Fmaxv_V, InstEmit.Fmaxv_V, OpCodeSimd.Create);
|
SetA64("0110111000110000111110xxxxxxxxxx", InstName.Fmaxv_V, InstEmit.Fmaxv_V, OpCodeSimd.Create);
|
||||||
SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, InstEmit.Fmin_S, OpCodeSimdReg.Create);
|
SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, InstEmit.Fmin_S, OpCodeSimdReg.Create);
|
||||||
|
@ -339,6 +340,7 @@ namespace ARMeilleure.Decoders
|
||||||
SetA64("011111101x110000110010xxxxxxxxxx", InstName.Fminnmp_S, InstEmit.Fminnmp_S, OpCodeSimd.Create);
|
SetA64("011111101x110000110010xxxxxxxxxx", InstName.Fminnmp_S, InstEmit.Fminnmp_S, OpCodeSimd.Create);
|
||||||
SetA64("0>1011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnmp_V, InstEmit.Fminnmp_V, OpCodeSimdReg.Create);
|
SetA64("0>1011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnmp_V, InstEmit.Fminnmp_V, OpCodeSimdReg.Create);
|
||||||
SetA64("0110111010110000110010xxxxxxxxxx", InstName.Fminnmv_V, InstEmit.Fminnmv_V, OpCodeSimd.Create);
|
SetA64("0110111010110000110010xxxxxxxxxx", InstName.Fminnmv_V, InstEmit.Fminnmv_V, OpCodeSimd.Create);
|
||||||
|
SetA64("011111101x110000111110xxxxxxxxxx", InstName.Fminp_S, InstEmit.Fminp_S, OpCodeSimd.Create);
|
||||||
SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, OpCodeSimdReg.Create);
|
SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, OpCodeSimdReg.Create);
|
||||||
SetA64("0110111010110000111110xxxxxxxxxx", InstName.Fminv_V, InstEmit.Fminv_V, OpCodeSimd.Create);
|
SetA64("0110111010110000111110xxxxxxxxxx", InstName.Fminv_V, InstEmit.Fminv_V, OpCodeSimd.Create);
|
||||||
SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, InstEmit.Fmla_Se, OpCodeSimdRegElemF.Create);
|
SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, InstEmit.Fmla_Se, OpCodeSimdRegElemF.Create);
|
||||||
|
|
|
@ -883,6 +883,31 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void Fmaxp_S(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
if (Optimizations.UseAdvSimd)
|
||||||
|
{
|
||||||
|
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FmaxpS);
|
||||||
|
}
|
||||||
|
else if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
|
{
|
||||||
|
EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||||
|
}, scalar: true, op1, op2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitScalarPairwiseOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static void Fmaxp_V(ArmEmitterContext context)
|
public static void Fmaxp_V(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
if (Optimizations.UseAdvSimd)
|
if (Optimizations.UseAdvSimd)
|
||||||
|
@ -1081,6 +1106,31 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void Fminp_S(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
if (Optimizations.UseAdvSimd)
|
||||||
|
{
|
||||||
|
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FminpS);
|
||||||
|
}
|
||||||
|
else if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
|
{
|
||||||
|
EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||||
|
}, scalar: true, op1, op2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitScalarPairwiseOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static void Fminp_V(ArmEmitterContext context)
|
public static void Fminp_V(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
if (Optimizations.UseAdvSimd)
|
if (Optimizations.UseAdvSimd)
|
||||||
|
|
|
@ -228,6 +228,7 @@ namespace ARMeilleure.Instructions
|
||||||
Fmaxnmp_S,
|
Fmaxnmp_S,
|
||||||
Fmaxnmp_V,
|
Fmaxnmp_V,
|
||||||
Fmaxnmv_V,
|
Fmaxnmv_V,
|
||||||
|
Fmaxp_S,
|
||||||
Fmaxp_V,
|
Fmaxp_V,
|
||||||
Fmaxv_V,
|
Fmaxv_V,
|
||||||
Fmin_S,
|
Fmin_S,
|
||||||
|
@ -237,6 +238,7 @@ namespace ARMeilleure.Instructions
|
||||||
Fminnmp_S,
|
Fminnmp_S,
|
||||||
Fminnmp_V,
|
Fminnmp_V,
|
||||||
Fminnmv_V,
|
Fminnmv_V,
|
||||||
|
Fminp_S,
|
||||||
Fminp_V,
|
Fminp_V,
|
||||||
Fminv_V,
|
Fminv_V,
|
||||||
Fmla_Se,
|
Fmla_Se,
|
||||||
|
|
|
@ -29,7 +29,7 @@ namespace ARMeilleure.Translation.PTC
|
||||||
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
||||||
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
||||||
|
|
||||||
private const uint InternalVersion = 5343; //! To be incremented manually for each change to the ARMeilleure project.
|
private const uint InternalVersion = 5502; //! To be incremented manually for each change to the ARMeilleure project.
|
||||||
|
|
||||||
private const string ActualDir = "0";
|
private const string ActualDir = "0";
|
||||||
private const string BackupDir = "1";
|
private const string BackupDir = "1";
|
||||||
|
|
|
@ -764,7 +764,9 @@ namespace Ryujinx.Tests.Cpu
|
||||||
{
|
{
|
||||||
0x7E30D820u, // FADDP S0, V1.2S
|
0x7E30D820u, // FADDP S0, V1.2S
|
||||||
0x7E30C820u, // FMAXNMP S0, V1.2S
|
0x7E30C820u, // FMAXNMP S0, V1.2S
|
||||||
|
0x7E30F820u, // FMAXP S0, V1.2S
|
||||||
0x7EB0C820u, // FMINNMP S0, V1.2S
|
0x7EB0C820u, // FMINNMP S0, V1.2S
|
||||||
|
0x7EB0F820u, // FMINP S0, V1.2S
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -774,7 +776,9 @@ namespace Ryujinx.Tests.Cpu
|
||||||
{
|
{
|
||||||
0x7E70D820u, // FADDP D0, V1.2D
|
0x7E70D820u, // FADDP D0, V1.2D
|
||||||
0x7E70C820u, // FMAXNMP D0, V1.2D
|
0x7E70C820u, // FMAXNMP D0, V1.2D
|
||||||
|
0x7E70F820u, // FMAXP D0, V1.2D
|
||||||
0x7EF0C820u, // FMINNMP D0, V1.2D
|
0x7EF0C820u, // FMINNMP D0, V1.2D
|
||||||
|
0x7EF0F820u, // FMINP D0, V1.2D
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue