Arm64: Cpu feature detection (#4264)
* Arm64: Cpu feature detection * Ptc: Add Arm64 feature info * nits * simplify CheckSysctlName * restore some macos flags * feedback
This commit is contained in:
parent
fd36c8deca
commit
a11784fcbf
4 changed files with 237 additions and 31 deletions
185
ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs
Normal file
185
ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs
Normal file
|
@ -0,0 +1,185 @@
|
||||||
|
using System;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Reflection;
|
||||||
|
using System.Runtime.CompilerServices;
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
using System.Runtime.Intrinsics.Arm;
|
||||||
|
using System.Runtime.Versioning;
|
||||||
|
|
||||||
|
namespace ARMeilleure.CodeGen.Arm64
|
||||||
|
{
|
||||||
|
static partial class HardwareCapabilities
|
||||||
|
{
|
||||||
|
static HardwareCapabilities()
|
||||||
|
{
|
||||||
|
if (!ArmBase.Arm64.IsSupported)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OperatingSystem.IsLinux())
|
||||||
|
{
|
||||||
|
LinuxFeatureInfoHwCap = (LinuxFeatureFlagsHwCap)getauxval(AT_HWCAP);
|
||||||
|
LinuxFeatureInfoHwCap2 = (LinuxFeatureFlagsHwCap2)getauxval(AT_HWCAP2);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (OperatingSystem.IsMacOS())
|
||||||
|
{
|
||||||
|
for (int i = 0; i < _sysctlNames.Length; i++)
|
||||||
|
{
|
||||||
|
if (CheckSysctlName(_sysctlNames[i]))
|
||||||
|
{
|
||||||
|
MacOsFeatureInfo |= (MacOsFeatureFlags)(1 << i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#region Linux
|
||||||
|
|
||||||
|
private const ulong AT_HWCAP = 16;
|
||||||
|
private const ulong AT_HWCAP2 = 26;
|
||||||
|
|
||||||
|
[LibraryImport("libc", SetLastError = true)]
|
||||||
|
private static partial ulong getauxval(ulong type);
|
||||||
|
|
||||||
|
[Flags]
|
||||||
|
public enum LinuxFeatureFlagsHwCap : ulong
|
||||||
|
{
|
||||||
|
Fp = 1 << 0,
|
||||||
|
Asimd = 1 << 1,
|
||||||
|
Evtstrm = 1 << 2,
|
||||||
|
Aes = 1 << 3,
|
||||||
|
Pmull = 1 << 4,
|
||||||
|
Sha1 = 1 << 5,
|
||||||
|
Sha2 = 1 << 6,
|
||||||
|
Crc32 = 1 << 7,
|
||||||
|
Atomics = 1 << 8,
|
||||||
|
FpHp = 1 << 9,
|
||||||
|
AsimdHp = 1 << 10,
|
||||||
|
CpuId = 1 << 11,
|
||||||
|
AsimdRdm = 1 << 12,
|
||||||
|
Jscvt = 1 << 13,
|
||||||
|
Fcma = 1 << 14,
|
||||||
|
Lrcpc = 1 << 15,
|
||||||
|
DcpOp = 1 << 16,
|
||||||
|
Sha3 = 1 << 17,
|
||||||
|
Sm3 = 1 << 18,
|
||||||
|
Sm4 = 1 << 19,
|
||||||
|
AsimdDp = 1 << 20,
|
||||||
|
Sha512 = 1 << 21,
|
||||||
|
Sve = 1 << 22,
|
||||||
|
AsimdFhm = 1 << 23,
|
||||||
|
Dit = 1 << 24,
|
||||||
|
Uscat = 1 << 25,
|
||||||
|
Ilrcpc = 1 << 26,
|
||||||
|
FlagM = 1 << 27,
|
||||||
|
Ssbs = 1 << 28,
|
||||||
|
Sb = 1 << 29,
|
||||||
|
Paca = 1 << 30,
|
||||||
|
Pacg = 1UL << 31
|
||||||
|
}
|
||||||
|
|
||||||
|
[Flags]
|
||||||
|
public enum LinuxFeatureFlagsHwCap2 : ulong
|
||||||
|
{
|
||||||
|
Dcpodp = 1 << 0,
|
||||||
|
Sve2 = 1 << 1,
|
||||||
|
SveAes = 1 << 2,
|
||||||
|
SvePmull = 1 << 3,
|
||||||
|
SveBitperm = 1 << 4,
|
||||||
|
SveSha3 = 1 << 5,
|
||||||
|
SveSm4 = 1 << 6,
|
||||||
|
FlagM2 = 1 << 7,
|
||||||
|
Frint = 1 << 8,
|
||||||
|
SveI8mm = 1 << 9,
|
||||||
|
SveF32mm = 1 << 10,
|
||||||
|
SveF64mm = 1 << 11,
|
||||||
|
SveBf16 = 1 << 12,
|
||||||
|
I8mm = 1 << 13,
|
||||||
|
Bf16 = 1 << 14,
|
||||||
|
Dgh = 1 << 15,
|
||||||
|
Rng = 1 << 16,
|
||||||
|
Bti = 1 << 17,
|
||||||
|
Mte = 1 << 18,
|
||||||
|
Ecv = 1 << 19,
|
||||||
|
Afp = 1 << 20,
|
||||||
|
Rpres = 1 << 21,
|
||||||
|
Mte3 = 1 << 22,
|
||||||
|
Sme = 1 << 23,
|
||||||
|
Sme_i16i64 = 1 << 24,
|
||||||
|
Sme_f64f64 = 1 << 25,
|
||||||
|
Sme_i8i32 = 1 << 26,
|
||||||
|
Sme_f16f32 = 1 << 27,
|
||||||
|
Sme_b16f32 = 1 << 28,
|
||||||
|
Sme_f32f32 = 1 << 29,
|
||||||
|
Sme_fa64 = 1 << 30,
|
||||||
|
Wfxt = 1UL << 31,
|
||||||
|
Ebf16 = 1UL << 32,
|
||||||
|
Sve_Ebf16 = 1UL << 33,
|
||||||
|
Cssc = 1UL << 34,
|
||||||
|
Rprfm = 1UL << 35,
|
||||||
|
Sve2p1 = 1UL << 36
|
||||||
|
}
|
||||||
|
|
||||||
|
public static LinuxFeatureFlagsHwCap LinuxFeatureInfoHwCap { get; } = 0;
|
||||||
|
public static LinuxFeatureFlagsHwCap2 LinuxFeatureInfoHwCap2 { get; } = 0;
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
#region macOS
|
||||||
|
|
||||||
|
[LibraryImport("libSystem.dylib", SetLastError = true)]
|
||||||
|
private static unsafe partial int sysctlbyname([MarshalAs(UnmanagedType.LPStr)] string name, out int oldValue, ref ulong oldSize, IntPtr newValue, ulong newValueSize);
|
||||||
|
|
||||||
|
[SupportedOSPlatform("macos")]
|
||||||
|
private static bool CheckSysctlName(string name)
|
||||||
|
{
|
||||||
|
ulong size = sizeof(int);
|
||||||
|
if (sysctlbyname(name, out int val, ref size, IntPtr.Zero, 0) == 0 && size == sizeof(int))
|
||||||
|
{
|
||||||
|
return val != 0;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string[] _sysctlNames = new string[]
|
||||||
|
{
|
||||||
|
"hw.optional.floatingpoint",
|
||||||
|
"hw.optional.AdvSIMD",
|
||||||
|
"hw.optional.arm.FEAT_FP16",
|
||||||
|
"hw.optional.arm.FEAT_AES",
|
||||||
|
"hw.optional.arm.FEAT_PMULL",
|
||||||
|
"hw.optional.arm.FEAT_LSE",
|
||||||
|
"hw.optional.armv8_crc32",
|
||||||
|
"hw.optional.arm.FEAT_SHA1",
|
||||||
|
"hw.optional.arm.FEAT_SHA256"
|
||||||
|
};
|
||||||
|
|
||||||
|
[Flags]
|
||||||
|
public enum MacOsFeatureFlags
|
||||||
|
{
|
||||||
|
Fp = 1 << 0,
|
||||||
|
AdvSimd = 1 << 1,
|
||||||
|
Fp16 = 1 << 2,
|
||||||
|
Aes = 1 << 3,
|
||||||
|
Pmull = 1 << 4,
|
||||||
|
Lse = 1 << 5,
|
||||||
|
Crc32 = 1 << 6,
|
||||||
|
Sha1 = 1 << 7,
|
||||||
|
Sha256 = 1 << 8
|
||||||
|
}
|
||||||
|
|
||||||
|
public static MacOsFeatureFlags MacOsFeatureInfo { get; } = 0;
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
public static bool SupportsAdvSimd => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Asimd) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.AdvSimd);
|
||||||
|
public static bool SupportsAes => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Aes) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Aes);
|
||||||
|
public static bool SupportsPmull => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Pmull) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Pmull);
|
||||||
|
public static bool SupportsLse => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Atomics) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Lse);
|
||||||
|
public static bool SupportsCrc32 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Crc32) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Crc32);
|
||||||
|
public static bool SupportsSha1 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha1) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha1);
|
||||||
|
public static bool SupportsSha256 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha2) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha256);
|
||||||
|
}
|
||||||
|
}
|
|
@ -2556,7 +2556,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
||||||
|
|
||||||
if (Optimizations.UseAdvSimd && false) // Not supported by all Arm CPUs.
|
if (Optimizations.UseArm64Pmull)
|
||||||
{
|
{
|
||||||
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64PmullV);
|
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64PmullV);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
using ARMeilleure.CodeGen.X86;
|
|
||||||
using System.Runtime.Intrinsics.Arm;
|
using System.Runtime.Intrinsics.Arm;
|
||||||
|
|
||||||
namespace ARMeilleure
|
namespace ARMeilleure
|
||||||
{
|
{
|
||||||
|
using Arm64HardwareCapabilities = ARMeilleure.CodeGen.Arm64.HardwareCapabilities;
|
||||||
|
using X86HardwareCapabilities = ARMeilleure.CodeGen.X86.HardwareCapabilities;
|
||||||
|
|
||||||
public static class Optimizations
|
public static class Optimizations
|
||||||
{
|
{
|
||||||
public static bool FastFP { get; set; } = true;
|
public static bool FastFP { get; set; } = true;
|
||||||
|
@ -11,6 +13,7 @@ namespace ARMeilleure
|
||||||
public static bool UseUnmanagedDispatchLoop { get; set; } = true;
|
public static bool UseUnmanagedDispatchLoop { get; set; } = true;
|
||||||
|
|
||||||
public static bool UseAdvSimdIfAvailable { get; set; } = true;
|
public static bool UseAdvSimdIfAvailable { get; set; } = true;
|
||||||
|
public static bool UseArm64PmullIfAvailable { get; set; } = true;
|
||||||
|
|
||||||
public static bool UseSseIfAvailable { get; set; } = true;
|
public static bool UseSseIfAvailable { get; set; } = true;
|
||||||
public static bool UseSse2IfAvailable { get; set; } = true;
|
public static bool UseSse2IfAvailable { get; set; } = true;
|
||||||
|
@ -29,25 +32,26 @@ namespace ARMeilleure
|
||||||
|
|
||||||
public static bool ForceLegacySse
|
public static bool ForceLegacySse
|
||||||
{
|
{
|
||||||
get => HardwareCapabilities.ForceLegacySse;
|
get => X86HardwareCapabilities.ForceLegacySse;
|
||||||
set => HardwareCapabilities.ForceLegacySse = value;
|
set => X86HardwareCapabilities.ForceLegacySse = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static bool UseAdvSimd => UseAdvSimdIfAvailable && AdvSimd.IsSupported;
|
internal static bool UseAdvSimd => UseAdvSimdIfAvailable && Arm64HardwareCapabilities.SupportsAdvSimd;
|
||||||
|
internal static bool UseArm64Pmull => UseArm64PmullIfAvailable && Arm64HardwareCapabilities.SupportsPmull;
|
||||||
|
|
||||||
internal static bool UseSse => UseSseIfAvailable && HardwareCapabilities.SupportsSse;
|
internal static bool UseSse => UseSseIfAvailable && X86HardwareCapabilities.SupportsSse;
|
||||||
internal static bool UseSse2 => UseSse2IfAvailable && HardwareCapabilities.SupportsSse2;
|
internal static bool UseSse2 => UseSse2IfAvailable && X86HardwareCapabilities.SupportsSse2;
|
||||||
internal static bool UseSse3 => UseSse3IfAvailable && HardwareCapabilities.SupportsSse3;
|
internal static bool UseSse3 => UseSse3IfAvailable && X86HardwareCapabilities.SupportsSse3;
|
||||||
internal static bool UseSsse3 => UseSsse3IfAvailable && HardwareCapabilities.SupportsSsse3;
|
internal static bool UseSsse3 => UseSsse3IfAvailable && X86HardwareCapabilities.SupportsSsse3;
|
||||||
internal static bool UseSse41 => UseSse41IfAvailable && HardwareCapabilities.SupportsSse41;
|
internal static bool UseSse41 => UseSse41IfAvailable && X86HardwareCapabilities.SupportsSse41;
|
||||||
internal static bool UseSse42 => UseSse42IfAvailable && HardwareCapabilities.SupportsSse42;
|
internal static bool UseSse42 => UseSse42IfAvailable && X86HardwareCapabilities.SupportsSse42;
|
||||||
internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt;
|
internal static bool UsePopCnt => UsePopCntIfAvailable && X86HardwareCapabilities.SupportsPopcnt;
|
||||||
internal static bool UseAvx => UseAvxIfAvailable && HardwareCapabilities.SupportsAvx && !ForceLegacySse;
|
internal static bool UseAvx => UseAvxIfAvailable && X86HardwareCapabilities.SupportsAvx && !ForceLegacySse;
|
||||||
internal static bool UseF16c => UseF16cIfAvailable && HardwareCapabilities.SupportsF16c;
|
internal static bool UseF16c => UseF16cIfAvailable && X86HardwareCapabilities.SupportsF16c;
|
||||||
internal static bool UseFma => UseFmaIfAvailable && HardwareCapabilities.SupportsFma;
|
internal static bool UseFma => UseFmaIfAvailable && X86HardwareCapabilities.SupportsFma;
|
||||||
internal static bool UseAesni => UseAesniIfAvailable && HardwareCapabilities.SupportsAesni;
|
internal static bool UseAesni => UseAesniIfAvailable && X86HardwareCapabilities.SupportsAesni;
|
||||||
internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && HardwareCapabilities.SupportsPclmulqdq;
|
internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && X86HardwareCapabilities.SupportsPclmulqdq;
|
||||||
internal static bool UseSha => UseShaIfAvailable && HardwareCapabilities.SupportsSha;
|
internal static bool UseSha => UseShaIfAvailable && X86HardwareCapabilities.SupportsSha;
|
||||||
internal static bool UseGfni => UseGfniIfAvailable && HardwareCapabilities.SupportsGfni;
|
internal static bool UseGfni => UseGfniIfAvailable && X86HardwareCapabilities.SupportsGfni;
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -1,7 +1,6 @@
|
||||||
using ARMeilleure.CodeGen;
|
using ARMeilleure.CodeGen;
|
||||||
using ARMeilleure.CodeGen.Linking;
|
using ARMeilleure.CodeGen.Linking;
|
||||||
using ARMeilleure.CodeGen.Unwinding;
|
using ARMeilleure.CodeGen.Unwinding;
|
||||||
using ARMeilleure.CodeGen.X86;
|
|
||||||
using ARMeilleure.Common;
|
using ARMeilleure.Common;
|
||||||
using ARMeilleure.Memory;
|
using ARMeilleure.Memory;
|
||||||
using Ryujinx.Common;
|
using Ryujinx.Common;
|
||||||
|
@ -22,12 +21,15 @@ using static ARMeilleure.Translation.PTC.PtcFormatter;
|
||||||
|
|
||||||
namespace ARMeilleure.Translation.PTC
|
namespace ARMeilleure.Translation.PTC
|
||||||
{
|
{
|
||||||
|
using Arm64HardwareCapabilities = ARMeilleure.CodeGen.Arm64.HardwareCapabilities;
|
||||||
|
using X86HardwareCapabilities = ARMeilleure.CodeGen.X86.HardwareCapabilities;
|
||||||
|
|
||||||
class Ptc : IPtcLoadState
|
class Ptc : IPtcLoadState
|
||||||
{
|
{
|
||||||
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
||||||
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
||||||
|
|
||||||
private const uint InternalVersion = 4114; //! To be incremented manually for each change to the ARMeilleure project.
|
private const uint InternalVersion = 4264; //! To be incremented manually for each change to the ARMeilleure project.
|
||||||
|
|
||||||
private const string ActualDir = "0";
|
private const string ActualDir = "0";
|
||||||
private const string BackupDir = "1";
|
private const string BackupDir = "1";
|
||||||
|
@ -951,12 +953,27 @@ namespace ARMeilleure.Translation.PTC
|
||||||
}
|
}
|
||||||
|
|
||||||
private static FeatureInfo GetFeatureInfo()
|
private static FeatureInfo GetFeatureInfo()
|
||||||
|
{
|
||||||
|
if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
|
||||||
{
|
{
|
||||||
return new FeatureInfo(
|
return new FeatureInfo(
|
||||||
(uint)HardwareCapabilities.FeatureInfo1Ecx,
|
(ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap,
|
||||||
(uint)HardwareCapabilities.FeatureInfo1Edx,
|
(ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap2,
|
||||||
(uint)HardwareCapabilities.FeatureInfo7Ebx,
|
(ulong)Arm64HardwareCapabilities.MacOsFeatureInfo,
|
||||||
(uint)HardwareCapabilities.FeatureInfo7Ecx);
|
0);
|
||||||
|
}
|
||||||
|
else if (RuntimeInformation.ProcessArchitecture == Architecture.X64)
|
||||||
|
{
|
||||||
|
return new FeatureInfo(
|
||||||
|
(ulong)X86HardwareCapabilities.FeatureInfo1Ecx,
|
||||||
|
(ulong)X86HardwareCapabilities.FeatureInfo1Edx,
|
||||||
|
(ulong)X86HardwareCapabilities.FeatureInfo7Ebx,
|
||||||
|
(ulong)X86HardwareCapabilities.FeatureInfo7Ecx);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return new FeatureInfo(0, 0, 0, 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte GetMemoryManagerMode()
|
private byte GetMemoryManagerMode()
|
||||||
|
@ -976,7 +993,7 @@ namespace ARMeilleure.Translation.PTC
|
||||||
return osPlatform;
|
return osPlatform;
|
||||||
}
|
}
|
||||||
|
|
||||||
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 58*/)]
|
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 74*/)]
|
||||||
private struct OuterHeader
|
private struct OuterHeader
|
||||||
{
|
{
|
||||||
public ulong Magic;
|
public ulong Magic;
|
||||||
|
@ -1007,8 +1024,8 @@ namespace ARMeilleure.Translation.PTC
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 16*/)]
|
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 32*/)]
|
||||||
private record struct FeatureInfo(uint FeatureInfo0, uint FeatureInfo1, uint FeatureInfo2, uint FeatureInfo3);
|
private record struct FeatureInfo(ulong FeatureInfo0, ulong FeatureInfo1, ulong FeatureInfo2, ulong FeatureInfo3);
|
||||||
|
|
||||||
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)]
|
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)]
|
||||||
private struct InnerHeader
|
private struct InnerHeader
|
||||||
|
|
Loading…
Reference in a new issue