Implement JIT Arm64 backend (#4114)

* Implement JIT Arm64 backend

* PPTC version bump

* Address some feedback from Arm64 JIT PR

* Address even more PR feedback

* Remove unused IsPageAligned function

* Sync Qc flag before calls

* Fix comment and remove unused enum

* Address riperiperi PR feedback

* Delete Breakpoint IR instruction that was only implemented for Arm64
This commit is contained in:
gdkchan 2023-01-10 19:16:59 -03:00 committed by GitHub
parent d16288a2a8
commit 5e0f8e8738
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
61 changed files with 10266 additions and 642 deletions

View file

@ -9,4 +9,11 @@
<ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" /> <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
</ItemGroup> </ItemGroup>
<ItemGroup>
<ContentWithTargetPath Include="Native\libs\libarmeilleure-jitsupport.dylib" Condition="'$(RuntimeIdentifier)' == '' OR '$(RuntimeIdentifier)' == 'osx-arm64'">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<TargetPath>libarmeilleure-jitsupport.dylib</TargetPath>
</ContentWithTargetPath>
</ItemGroup>
</Project> </Project>

View file

@ -0,0 +1,270 @@
using ARMeilleure.CodeGen.Optimizations;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation;
using System.Collections.Generic;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
namespace ARMeilleure.CodeGen.Arm64
{
static class Arm64Optimizer
{
private const int MaxConstantUses = 10000;
public static void RunPass(ControlFlowGraph cfg)
{
var constants = new Dictionary<ulong, Operand>();
Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source)
{
// If the constant has many uses, we also force a new constant mov to be added, in order
// to avoid overflow of the counts field (that is limited to 16 bits).
if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses)
{
constant = Local(source.Type);
Operation copyOp = Operation(Instruction.Copy, constant, source);
block.Operations.AddBefore(operation, copyOp);
constants[source.Value] = constant;
}
return constant;
}
for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
{
constants.Clear();
Operation nextNode;
for (Operation node = block.Operations.First; node != default; node = nextNode)
{
nextNode = node.ListNext;
// Insert copies for constants that can't fit on a 32-bit immediate.
// Doing this early unblocks a few optimizations.
if (node.Instruction == Instruction.Add)
{
Operand src1 = node.GetSource(0);
Operand src2 = node.GetSource(1);
if (src1.Kind == OperandKind.Constant && (src1.Relocatable || ConstTooLong(src1, OperandType.I32)))
{
node.SetSource(0, GetConstantCopy(block, node, src1));
}
if (src2.Kind == OperandKind.Constant && (src2.Relocatable || ConstTooLong(src2, OperandType.I32)))
{
node.SetSource(1, GetConstantCopy(block, node, src2));
}
}
// Try to fold something like:
// lsl x1, x1, #2
// add x0, x0, x1
// ldr x0, [x0]
// add x2, x2, #16
// ldr x2, [x2]
// Into:
// ldr x0, [x0, x1, lsl #2]
// ldr x2, [x2, #16]
if (IsMemoryLoadOrStore(node.Instruction))
{
OperandType type;
if (node.Destination != default)
{
type = node.Destination.Type;
}
else
{
type = node.GetSource(1).Type;
}
Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type);
if (memOp != default)
{
node.SetSource(0, memOp);
}
}
}
}
Optimizer.RemoveUnusedNodes(cfg);
}
private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type)
{
Operand baseOp = addr;
// First we check if the address is the result of a local X with immediate
// addition. If that is the case, then the baseOp is X, and the memory operand immediate
// becomes the addition immediate. Otherwise baseOp keeps being the address.
int imm = GetConstOp(ref baseOp, type);
if (imm != 0)
{
return MemoryOp(type, baseOp, default, Multiplier.x1, imm);
}
// Now we check if the baseOp is the result of a local Y with a local Z addition.
// If that is the case, we now set baseOp to Y and indexOp to Z. We further check
// if Z is the result of a left shift of local W by a value == 0 or == Log2(AccessSize),
// if that is the case, we set indexOp to W and adjust the scale value of the memory operand
// to match that of the left shift.
// There is one missed case, which is the address being a shift result, but this is
// probably not worth optimizing as it should never happen.
(Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp, type);
// If baseOp is still equal to address, then there's nothing that can be optimized.
if (baseOp == addr)
{
return default;
}
return MemoryOp(type, baseOp, indexOp, scale, 0);
}
private static int GetConstOp(ref Operand baseOp, OperandType accessType)
{
Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add);
if (operation == default)
{
return 0;
}
Operand src1 = operation.GetSource(0);
Operand src2 = operation.GetSource(1);
Operand constOp;
Operand otherOp;
if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable)
{
constOp = src1;
otherOp = src2;
}
else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant)
{
constOp = src2;
otherOp = src1;
}
else
{
return 0;
}
// If we have addition by a constant that we can't encode on the instruction,
// then we can't optimize it further.
if (ConstTooLong(constOp, accessType))
{
return 0;
}
baseOp = otherOp;
return constOp.AsInt32();
}
private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp, OperandType accessType)
{
Operand indexOp = default;
Multiplier scale = Multiplier.x1;
Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add);
if (addOp == default)
{
return (indexOp, scale);
}
Operand src1 = addOp.GetSource(0);
Operand src2 = addOp.GetSource(1);
if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable)
{
return (indexOp, scale);
}
baseOp = src1;
indexOp = src2;
Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft);
bool indexOnSrc2 = false;
if (shlOp == default)
{
shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft);
indexOnSrc2 = true;
}
if (shlOp != default)
{
Operand shSrc = shlOp.GetSource(0);
Operand shift = shlOp.GetSource(1);
int maxShift = Assembler.GetScaleForType(accessType);
if (shSrc.Kind == OperandKind.LocalVariable &&
shift.Kind == OperandKind.Constant &&
(shift.Value == 0 || shift.Value == (ulong)maxShift))
{
scale = shift.Value switch
{
1 => Multiplier.x2,
2 => Multiplier.x4,
3 => Multiplier.x8,
4 => Multiplier.x16,
_ => Multiplier.x1
};
baseOp = indexOnSrc2 ? src1 : src2;
indexOp = shSrc;
}
}
return (indexOp, scale);
}
private static Operation GetAsgOpWithInst(Operand op, Instruction inst)
{
// If we have multiple assignments, folding is not safe
// as the value may be different depending on the
// control flow path.
if (op.AssignmentsCount != 1)
{
return default;
}
Operation asgOp = op.Assignments[0];
if (asgOp.Instruction != inst)
{
return default;
}
return asgOp;
}
private static bool IsMemoryLoadOrStore(Instruction inst)
{
return inst == Instruction.Load || inst == Instruction.Store;
}
private static bool ConstTooLong(Operand constOp, OperandType accessType)
{
if ((uint)constOp.Value != constOp.Value)
{
return true;
}
return !CodeGenCommon.ConstFitsOnUImm12(constOp.AsInt32(), accessType);
}
}
}

View file

@ -0,0 +1,47 @@
using ARMeilleure.IntermediateRepresentation;
using System;
namespace ARMeilleure.CodeGen.Arm64
{
enum ArmCondition
{
Eq = 0,
Ne = 1,
GeUn = 2,
LtUn = 3,
Mi = 4,
Pl = 5,
Vs = 6,
Vc = 7,
GtUn = 8,
LeUn = 9,
Ge = 10,
Lt = 11,
Gt = 12,
Le = 13,
Al = 14,
Nv = 15
}
static class ComparisonArm64Extensions
{
public static ArmCondition ToArmCondition(this Comparison comp)
{
return comp switch
{
Comparison.Equal => ArmCondition.Eq,
Comparison.NotEqual => ArmCondition.Ne,
Comparison.Greater => ArmCondition.Gt,
Comparison.LessOrEqual => ArmCondition.Le,
Comparison.GreaterUI => ArmCondition.GtUn,
Comparison.LessOrEqualUI => ArmCondition.LeUn,
Comparison.GreaterOrEqual => ArmCondition.Ge,
Comparison.Less => ArmCondition.Lt,
Comparison.GreaterOrEqualUI => ArmCondition.GeUn,
Comparison.LessUI => ArmCondition.LtUn,
_ => throw new ArgumentException(null, nameof(comp))
};
}
}
}

View file

@ -0,0 +1,14 @@
namespace ARMeilleure.CodeGen.Arm64
{
enum ArmExtensionType
{
Uxtb = 0,
Uxth = 1,
Uxtw = 2,
Uxtx = 3,
Sxtb = 4,
Sxth = 5,
Sxtw = 6,
Sxtx = 7
}
}

View file

@ -0,0 +1,11 @@
namespace ARMeilleure.CodeGen.Arm64
{
enum ArmShiftType
{
Lsl = 0,
Lsr = 1,
Asr = 2,
Ror = 3
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,96 @@
using System;
namespace ARMeilleure.CodeGen.Arm64
{
static class CallingConvention
{
private const int RegistersMask = unchecked((int)0xffffffff);
// Some of those register have specific roles and can't be used as general purpose registers.
// X18 - Reserved for platform specific usage.
// X29 - Frame pointer.
// X30 - Return address.
// X31 - Not an actual register, in some cases maps to SP, and in others to ZR.
private const int ReservedRegsMask = (1 << CodeGenCommon.ReservedRegister) | (1 << 18) | (1 << 29) | (1 << 30) | (1 << 31);
public static int GetIntAvailableRegisters()
{
return RegistersMask & ~ReservedRegsMask;
}
public static int GetVecAvailableRegisters()
{
return RegistersMask;
}
public static int GetIntCallerSavedRegisters()
{
return (GetIntCalleeSavedRegisters() ^ RegistersMask) & ~ReservedRegsMask;
}
public static int GetFpCallerSavedRegisters()
{
return GetFpCalleeSavedRegisters() ^ RegistersMask;
}
public static int GetVecCallerSavedRegisters()
{
return GetVecCalleeSavedRegisters() ^ RegistersMask;
}
public static int GetIntCalleeSavedRegisters()
{
return 0x1ff80000; // X19 to X28
}
public static int GetFpCalleeSavedRegisters()
{
return 0xff00; // D8 to D15
}
public static int GetVecCalleeSavedRegisters()
{
return 0;
}
public static int GetArgumentsOnRegsCount()
{
return 8;
}
public static int GetIntArgumentRegister(int index)
{
if ((uint)index < (uint)GetArgumentsOnRegsCount())
{
return index;
}
throw new ArgumentOutOfRangeException(nameof(index));
}
public static int GetVecArgumentRegister(int index)
{
if ((uint)index < (uint)GetArgumentsOnRegsCount())
{
return index;
}
throw new ArgumentOutOfRangeException(nameof(index));
}
public static int GetIntReturnRegister()
{
return 0;
}
public static int GetIntReturnRegisterHigh()
{
return 1;
}
public static int GetVecReturnRegister()
{
return 0;
}
}
}

View file

@ -0,0 +1,173 @@
using ARMeilleure.IntermediateRepresentation;
using System;
using System.Numerics;
namespace ARMeilleure.CodeGen.Arm64
{
static class CodeGenCommon
{
public const int TcAddressRegister = 8;
public const int ReservedRegister = 17;
public static bool ConstFitsOnSImm7(int value, int scale)
{
return (((value >> scale) << 25) >> (25 - scale)) == value;
}
public static bool ConstFitsOnSImm9(int value)
{
return ((value << 23) >> 23) == value;
}
public static bool ConstFitsOnUImm12(int value)
{
return (value & 0xfff) == value;
}
public static bool ConstFitsOnUImm12(int value, OperandType type)
{
int scale = Assembler.GetScaleForType(type);
return (((value >> scale) & 0xfff) << scale) == value;
}
public static bool TryEncodeBitMask(Operand operand, out int immN, out int immS, out int immR)
{
ulong value = operand.Value;
if (operand.Type == OperandType.I32)
{
value |= value << 32;
}
return TryEncodeBitMask(value, out immN, out immS, out immR);
}
public static bool TryEncodeBitMask(ulong value, out int immN, out int immS, out int immR)
{
// Some special values also can't be encoded:
// 0 can't be encoded because we need to subtract 1 from onesCount (which would became negative if 0).
// A value with all bits set can't be encoded because it is reserved according to the spec, because:
// Any value AND all ones will be equal itself, so it's effectively a no-op.
// Any value OR all ones will be equal all ones, so one can just use MOV.
// Any value XOR all ones will be equal its inverse, so one can just use MVN.
if (value == ulong.MaxValue)
{
immN = 0;
immS = 0;
immR = 0;
return false;
}
int bitLength = CountSequence(value);
if ((value >> bitLength) != 0)
{
bitLength += CountSequence(value >> bitLength);
}
int bitLengthLog2 = BitOperations.Log2((uint)bitLength);
int bitLengthPow2 = 1 << bitLengthLog2;
if (bitLengthPow2 < bitLength)
{
bitLengthLog2++;
bitLengthPow2 <<= 1;
}
int selectedESize = 64;
int repetitions = 1;
int onesCount = BitOperations.PopCount(value);
if (bitLengthPow2 < 64 && (value >> bitLengthPow2) != 0)
{
for (int eSizeLog2 = bitLengthLog2; eSizeLog2 < 6; eSizeLog2++)
{
bool match = true;
int eSize = 1 << eSizeLog2;
ulong mask = (1UL << eSize) - 1;
ulong eValue = value & mask;
for (int e = 1; e < 64 / eSize; e++)
{
if (((value >> (e * eSize)) & mask) != eValue)
{
match = false;
break;
}
}
if (match)
{
selectedESize = eSize;
repetitions = 64 / eSize;
onesCount = BitOperations.PopCount(eValue);
break;
}
}
}
// Find rotation. We have two cases, one where the highest bit is 0
// and one where it is 1.
// If it's 1, we just need to count the number of 1 bits on the MSB to find the right rotation.
// If it's 0, we just need to count the number of 0 bits on the LSB to find the left rotation,
// then we can convert it to the right rotation shift by subtracting the value from the element size.
int rotation;
long vHigh = (long)(value << (64 - selectedESize));
if (vHigh < 0)
{
rotation = BitOperations.LeadingZeroCount(~(ulong)vHigh);
}
else
{
rotation = (selectedESize - BitOperations.TrailingZeroCount(value)) & (selectedESize - 1);
}
// Reconstruct value and see if it matches. If not, we can't encode.
ulong reconstructed = onesCount == 64 ? ulong.MaxValue : RotateRight((1UL << onesCount) - 1, rotation, selectedESize);
for (int bit = 32; bit >= selectedESize; bit >>= 1)
{
reconstructed |= reconstructed << bit;
}
if (reconstructed != value || onesCount == 0)
{
immN = 0;
immS = 0;
immR = 0;
return false;
}
immR = rotation;
// immN indicates that there are no repetitions.
// The MSB of immS indicates the amount of repetitions, and the LSB the number of bits set.
if (repetitions == 1)
{
immN = 1;
immS = 0;
}
else
{
immN = 0;
immS = (0xf80 >> BitOperations.Log2((uint)repetitions)) & 0x3f;
}
immS |= onesCount - 1;
return true;
}
private static int CountSequence(ulong value)
{
return BitOperations.TrailingZeroCount(value) + BitOperations.TrailingZeroCount(~value);
}
private static ulong RotateRight(ulong bits, int shift, int size)
{
return (bits >> shift) | ((bits << (size - shift)) & (size == 64 ? ulong.MaxValue : (1UL << size) - 1));
}
}
}

View file

@ -0,0 +1,286 @@
using ARMeilleure.CodeGen.Linking;
using ARMeilleure.CodeGen.RegisterAllocators;
using ARMeilleure.IntermediateRepresentation;
using System;
using System.Collections.Generic;
using System.IO;
namespace ARMeilleure.CodeGen.Arm64
{
class CodeGenContext
{
private const int BccInstLength = 4;
private const int CbnzInstLength = 4;
private const int LdrLitInstLength = 4;
private Stream _stream;
public int StreamOffset => (int)_stream.Length;
public AllocationResult AllocResult { get; }
public Assembler Assembler { get; }
public BasicBlock CurrBlock { get; private set; }
public bool HasCall { get; }
public int CallArgsRegionSize { get; }
public int FpLrSaveRegionSize { get; }
private readonly Dictionary<BasicBlock, long> _visitedBlocks;
private readonly Dictionary<BasicBlock, List<(ArmCondition Condition, long BranchPos)>> _pendingBranches;
private struct ConstantPoolEntry
{
public readonly int Offset;
public readonly Symbol Symbol;
public readonly List<(Operand, int)> LdrOffsets;
public ConstantPoolEntry(int offset, Symbol symbol)
{
Offset = offset;
Symbol = symbol;
LdrOffsets = new List<(Operand, int)>();
}
}
private readonly Dictionary<ulong, ConstantPoolEntry> _constantPool;
private bool _constantPoolWritten;
private long _constantPoolOffset;
private ArmCondition _jNearCondition;
private Operand _jNearValue;
private long _jNearPosition;
private readonly bool _relocatable;
public CodeGenContext(AllocationResult allocResult, int maxCallArgs, int blocksCount, bool relocatable)
{
_stream = new MemoryStream();
AllocResult = allocResult;
Assembler = new Assembler(_stream);
bool hasCall = maxCallArgs >= 0;
HasCall = hasCall;
if (maxCallArgs < 0)
{
maxCallArgs = 0;
}
CallArgsRegionSize = maxCallArgs * 16;
FpLrSaveRegionSize = hasCall ? 16 : 0;
_visitedBlocks = new Dictionary<BasicBlock, long>();
_pendingBranches = new Dictionary<BasicBlock, List<(ArmCondition, long)>>();
_constantPool = new Dictionary<ulong, ConstantPoolEntry>();
_relocatable = relocatable;
}
public void EnterBlock(BasicBlock block)
{
CurrBlock = block;
long target = _stream.Position;
if (_pendingBranches.TryGetValue(block, out var list))
{
foreach (var tuple in list)
{
_stream.Seek(tuple.BranchPos, SeekOrigin.Begin);
WriteBranch(tuple.Condition, target);
}
_stream.Seek(target, SeekOrigin.Begin);
_pendingBranches.Remove(block);
}
_visitedBlocks.Add(block, target);
}
public void JumpTo(BasicBlock target)
{
JumpTo(ArmCondition.Al, target);
}
public void JumpTo(ArmCondition condition, BasicBlock target)
{
if (_visitedBlocks.TryGetValue(target, out long offset))
{
WriteBranch(condition, offset);
}
else
{
if (!_pendingBranches.TryGetValue(target, out var list))
{
list = new List<(ArmCondition, long)>();
_pendingBranches.Add(target, list);
}
list.Add((condition, _stream.Position));
_stream.Seek(BccInstLength, SeekOrigin.Current);
}
}
private void WriteBranch(ArmCondition condition, long to)
{
int imm = checked((int)(to - _stream.Position));
if (condition != ArmCondition.Al)
{
Assembler.B(condition, imm);
}
else
{
Assembler.B(imm);
}
}
public void JumpToNear(ArmCondition condition)
{
_jNearCondition = condition;
_jNearPosition = _stream.Position;
_stream.Seek(BccInstLength, SeekOrigin.Current);
}
public void JumpToNearIfNotZero(Operand value)
{
_jNearValue = value;
_jNearPosition = _stream.Position;
_stream.Seek(CbnzInstLength, SeekOrigin.Current);
}
public void JumpHere()
{
long currentPosition = _stream.Position;
long offset = currentPosition - _jNearPosition;
_stream.Seek(_jNearPosition, SeekOrigin.Begin);
if (_jNearValue != default)
{
Assembler.Cbnz(_jNearValue, checked((int)offset));
_jNearValue = default;
}
else
{
Assembler.B(_jNearCondition, checked((int)offset));
}
_stream.Seek(currentPosition, SeekOrigin.Begin);
}
public void ReserveRelocatableConstant(Operand rt, Symbol symbol, ulong value)
{
if (!_constantPool.TryGetValue(value, out ConstantPoolEntry cpe))
{
cpe = new ConstantPoolEntry(_constantPool.Count * sizeof(ulong), symbol);
_constantPool.Add(value, cpe);
}
cpe.LdrOffsets.Add((rt, (int)_stream.Position));
_stream.Seek(LdrLitInstLength, SeekOrigin.Current);
}
private long WriteConstantPool()
{
if (_constantPoolWritten)
{
return _constantPoolOffset;
}
long constantPoolBaseOffset = _stream.Position;
foreach (ulong value in _constantPool.Keys)
{
WriteUInt64(value);
}
foreach (ConstantPoolEntry cpe in _constantPool.Values)
{
foreach ((Operand rt, int ldrOffset) in cpe.LdrOffsets)
{
_stream.Seek(ldrOffset, SeekOrigin.Begin);
int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset));
int pcRelativeOffset = absoluteOffset - ldrOffset;
Assembler.LdrLit(rt, pcRelativeOffset);
}
}
_stream.Seek(constantPoolBaseOffset + _constantPool.Count * sizeof(ulong), SeekOrigin.Begin);
_constantPoolOffset = constantPoolBaseOffset;
_constantPoolWritten = true;
return constantPoolBaseOffset;
}
public (byte[], RelocInfo) GetCode()
{
long constantPoolBaseOffset = WriteConstantPool();
byte[] code = new byte[_stream.Length];
long originalPosition = _stream.Position;
_stream.Seek(0, SeekOrigin.Begin);
_stream.Read(code, 0, code.Length);
_stream.Seek(originalPosition, SeekOrigin.Begin);
RelocInfo relocInfo;
if (_relocatable)
{
RelocEntry[] relocs = new RelocEntry[_constantPool.Count];
int index = 0;
foreach (ConstantPoolEntry cpe in _constantPool.Values)
{
if (cpe.Symbol.Type != SymbolType.None)
{
int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset));
relocs[index++] = new RelocEntry(absoluteOffset, cpe.Symbol);
}
}
if (index != relocs.Length)
{
Array.Resize(ref relocs, index);
}
relocInfo = new RelocInfo(relocs);
}
else
{
relocInfo = new RelocInfo(new RelocEntry[0]);
}
return (code, relocInfo);
}
private void WriteUInt64(ulong value)
{
_stream.WriteByte((byte)(value >> 0));
_stream.WriteByte((byte)(value >> 8));
_stream.WriteByte((byte)(value >> 16));
_stream.WriteByte((byte)(value >> 24));
_stream.WriteByte((byte)(value >> 32));
_stream.WriteByte((byte)(value >> 40));
_stream.WriteByte((byte)(value >> 48));
_stream.WriteByte((byte)(value >> 56));
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,662 @@
using ARMeilleure.IntermediateRepresentation;
using System;
using System.Diagnostics;
namespace ARMeilleure.CodeGen.Arm64
{
static class CodeGeneratorIntrinsic
{
public static void GenerateOperation(CodeGenContext context, Operation operation)
{
Intrinsic intrin = operation.Intrinsic;
IntrinsicInfo info = IntrinsicTable.GetInfo(intrin & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
switch (info.Type)
{
case IntrinsicType.ScalarUnary:
GenerateVectorUnary(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0));
break;
case IntrinsicType.ScalarUnaryByElem:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateVectorUnaryByElem(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(1).AsInt32(),
operation.Destination,
operation.GetSource(0));
break;
case IntrinsicType.ScalarBinary:
GenerateVectorBinary(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
operation.GetSource(1));
break;
case IntrinsicType.ScalarBinaryFPByElem:
Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
GenerateVectorBinaryFPByElem(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(2).AsInt32(),
operation.Destination,
operation.GetSource(0),
operation.GetSource(1));
break;
case IntrinsicType.ScalarBinaryRd:
GenerateVectorUnary(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(1));
break;
case IntrinsicType.ScalarBinaryShl:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateVectorBinaryShlImm(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
(uint)operation.GetSource(1).AsInt32());
break;
case IntrinsicType.ScalarBinaryShr:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateVectorBinaryShrImm(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
(uint)operation.GetSource(1).AsInt32());
break;
case IntrinsicType.ScalarFPCompare:
GenerateScalarFPCompare(
context,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
operation.GetSource(1));
break;
case IntrinsicType.ScalarFPConvFixed:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateVectorBinaryShrImm(
context,
0,
((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u,
info.Inst,
operation.Destination,
operation.GetSource(0),
(uint)operation.GetSource(1).AsInt32());
break;
case IntrinsicType.ScalarFPConvFixedGpr:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateScalarFPConvGpr(
context,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
(uint)operation.GetSource(1).AsInt32());
break;
case IntrinsicType.ScalarFPConvGpr:
GenerateScalarFPConvGpr(
context,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0));
break;
case IntrinsicType.ScalarTernary:
GenerateScalarTernary(
context,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(1),
operation.GetSource(2),
operation.GetSource(0));
break;
case IntrinsicType.ScalarTernaryFPRdByElem:
Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
GenerateVectorBinaryFPByElem(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(3).AsInt32(),
operation.Destination,
operation.GetSource(1),
operation.GetSource(2));
break;
case IntrinsicType.ScalarTernaryShlRd:
Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
GenerateVectorBinaryShlImm(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(1),
(uint)operation.GetSource(2).AsInt32());
break;
case IntrinsicType.ScalarTernaryShrRd:
Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
GenerateVectorBinaryShrImm(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(1),
(uint)operation.GetSource(2).AsInt32());
break;
case IntrinsicType.VectorUnary:
GenerateVectorUnary(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0));
break;
case IntrinsicType.VectorUnaryByElem:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateVectorUnaryByElem(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(1).AsInt32(),
operation.Destination,
operation.GetSource(0));
break;
case IntrinsicType.VectorBinary:
GenerateVectorBinary(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
operation.GetSource(1));
break;
case IntrinsicType.VectorBinaryBitwise:
GenerateVectorBinary(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
operation.GetSource(1));
break;
case IntrinsicType.VectorBinaryByElem:
Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
GenerateVectorBinaryByElem(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(2).AsInt32(),
operation.Destination,
operation.GetSource(0),
operation.GetSource(1));
break;
case IntrinsicType.VectorBinaryFPByElem:
Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
GenerateVectorBinaryFPByElem(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(2).AsInt32(),
operation.Destination,
operation.GetSource(0),
operation.GetSource(1));
break;
case IntrinsicType.VectorBinaryRd:
GenerateVectorUnary(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(1));
break;
case IntrinsicType.VectorBinaryShl:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateVectorBinaryShlImm(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
(uint)operation.GetSource(1).AsInt32());
break;
case IntrinsicType.VectorBinaryShr:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateVectorBinaryShrImm(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
(uint)operation.GetSource(1).AsInt32());
break;
case IntrinsicType.VectorFPConvFixed:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateVectorBinaryShrImm(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u,
info.Inst,
operation.Destination,
operation.GetSource(0),
(uint)operation.GetSource(1).AsInt32());
break;
case IntrinsicType.VectorInsertByElem:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
GenerateVectorInsertByElem(
context,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(3).AsInt32(),
(uint)operation.GetSource(1).AsInt32(),
operation.Destination,
operation.GetSource(2));
break;
case IntrinsicType.VectorLookupTable:
Debug.Assert((uint)(operation.SourcesCount - 2) <= 3);
for (int i = 1; i < operation.SourcesCount - 1; i++)
{
Register currReg = operation.GetSource(i).GetRegister();
Register prevReg = operation.GetSource(i - 1).GetRegister();
Debug.Assert(prevReg.Index + 1 == currReg.Index && currReg.Type == RegisterType.Vector);
}
GenerateVectorBinary(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
info.Inst | ((uint)(operation.SourcesCount - 2) << 13),
operation.Destination,
operation.GetSource(0),
operation.GetSource(operation.SourcesCount - 1));
break;
case IntrinsicType.VectorTernaryFPRdByElem:
Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
GenerateVectorBinaryFPByElem(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(3).AsInt32(),
operation.Destination,
operation.GetSource(1),
operation.GetSource(2));
break;
case IntrinsicType.VectorTernaryRd:
GenerateVectorBinary(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(1),
operation.GetSource(2));
break;
case IntrinsicType.VectorTernaryRdBitwise:
GenerateVectorBinary(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
info.Inst,
operation.Destination,
operation.GetSource(1),
operation.GetSource(2));
break;
case IntrinsicType.VectorTernaryRdByElem:
Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
GenerateVectorBinaryByElem(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(3).AsInt32(),
operation.Destination,
operation.GetSource(1),
operation.GetSource(2));
break;
case IntrinsicType.VectorTernaryShlRd:
Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
GenerateVectorBinaryShlImm(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(1),
(uint)operation.GetSource(2).AsInt32());
break;
case IntrinsicType.VectorTernaryShrRd:
Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
GenerateVectorBinaryShrImm(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(1),
(uint)operation.GetSource(2).AsInt32());
break;
case IntrinsicType.GetRegister:
context.Assembler.WriteInstruction(info.Inst, operation.Destination);
break;
case IntrinsicType.SetRegister:
context.Assembler.WriteInstruction(info.Inst, operation.GetSource(0));
break;
default:
throw new NotImplementedException(info.Type.ToString());
}
}
private static void GenerateScalarFPCompare(
CodeGenContext context,
uint sz,
uint instruction,
Operand dest,
Operand rn,
Operand rm)
{
instruction |= (sz << 22);
if (rm.Kind == OperandKind.Constant && rm.Value == 0)
{
instruction |= 0b1000;
rm = rn;
}
context.Assembler.WriteInstructionRm16NoRet(instruction, rn, rm);
context.Assembler.Mrs(dest, 1, 3, 4, 2, 0);
}
private static void GenerateScalarFPConvGpr(
CodeGenContext context,
uint sz,
uint instruction,
Operand rd,
Operand rn)
{
instruction |= (sz << 22);
if (rd.Type.IsInteger())
{
context.Assembler.WriteInstructionAuto(instruction, rd, rn);
}
else
{
if (rn.Type == OperandType.I64)
{
instruction |= Assembler.SfFlag;
}
context.Assembler.WriteInstruction(instruction, rd, rn);
}
}
private static void GenerateScalarFPConvGpr(
CodeGenContext context,
uint sz,
uint instruction,
Operand rd,
Operand rn,
uint fBits)
{
Debug.Assert(fBits <= 64);
instruction |= (sz << 22);
instruction |= (64 - fBits) << 10;
if (rd.Type.IsInteger())
{
Debug.Assert(rd.Type != OperandType.I32 || fBits <= 32);
context.Assembler.WriteInstructionAuto(instruction, rd, rn);
}
else
{
if (rn.Type == OperandType.I64)
{
instruction |= Assembler.SfFlag;
}
else
{
Debug.Assert(fBits <= 32);
}
context.Assembler.WriteInstruction(instruction, rd, rn);
}
}
private static void GenerateScalarTernary(
CodeGenContext context,
uint sz,
uint instruction,
Operand rd,
Operand rn,
Operand rm,
Operand ra)
{
instruction |= (sz << 22);
context.Assembler.WriteInstruction(instruction, rd, rn, rm, ra);
}
private static void GenerateVectorUnary(
CodeGenContext context,
uint q,
uint sz,
uint instruction,
Operand rd,
Operand rn)
{
instruction |= (q << 30) | (sz << 22);
context.Assembler.WriteInstruction(instruction, rd, rn);
}
private static void GenerateVectorUnaryByElem(
CodeGenContext context,
uint q,
uint sz,
uint instruction,
uint srcIndex,
Operand rd,
Operand rn)
{
uint imm5 = (srcIndex << ((int)sz + 1)) | (1u << (int)sz);
instruction |= (q << 30) | (imm5 << 16);
context.Assembler.WriteInstruction(instruction, rd, rn);
}
private static void GenerateVectorBinary(
CodeGenContext context,
uint q,
uint instruction,
Operand rd,
Operand rn,
Operand rm)
{
instruction |= (q << 30);
context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
}
private static void GenerateVectorBinary(
CodeGenContext context,
uint q,
uint sz,
uint instruction,
Operand rd,
Operand rn,
Operand rm)
{
instruction |= (q << 30) | (sz << 22);
context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
}
private static void GenerateVectorBinaryByElem(
CodeGenContext context,
uint q,
uint size,
uint instruction,
uint srcIndex,
Operand rd,
Operand rn,
Operand rm)
{
instruction |= (q << 30) | (size << 22);
if (size == 2)
{
instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10);
}
else
{
instruction |= ((srcIndex & 3) << 20) | ((srcIndex & 4) << 9);
}
context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
}
private static void GenerateVectorBinaryFPByElem(
CodeGenContext context,
uint q,
uint sz,
uint instruction,
uint srcIndex,
Operand rd,
Operand rn,
Operand rm)
{
instruction |= (q << 30) | (sz << 22);
if (sz != 0)
{
instruction |= (srcIndex & 1) << 11;
}
else
{
instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10);
}
context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
}
private static void GenerateVectorBinaryShlImm(
CodeGenContext context,
uint q,
uint sz,
uint instruction,
Operand rd,
Operand rn,
uint shift)
{
instruction |= (q << 30);
Debug.Assert(shift >= 0 && shift < (8u << (int)sz));
uint imm = (8u << (int)sz) | (shift & (0x3fu >> (int)(3 - sz)));
instruction |= (imm << 16);
context.Assembler.WriteInstruction(instruction, rd, rn);
}
private static void GenerateVectorBinaryShrImm(
CodeGenContext context,
uint q,
uint sz,
uint instruction,
Operand rd,
Operand rn,
uint shift)
{
instruction |= (q << 30);
Debug.Assert(shift > 0 && shift <= (8u << (int)sz));
uint imm = (8u << (int)sz) | ((8u << (int)sz) - shift);
instruction |= (imm << 16);
context.Assembler.WriteInstruction(instruction, rd, rn);
}
private static void GenerateVectorInsertByElem(
CodeGenContext context,
uint sz,
uint instruction,
uint srcIndex,
uint dstIndex,
Operand rd,
Operand rn)
{
uint imm4 = srcIndex << (int)sz;
uint imm5 = (dstIndex << ((int)sz + 1)) | (1u << (int)sz);
instruction |= imm4 << 11;
instruction |= imm5 << 16;
context.Assembler.WriteInstruction(instruction, rd, rn);
}
}
}

View file

@ -0,0 +1,14 @@
namespace ARMeilleure.CodeGen.Arm64
{
struct IntrinsicInfo
{
public uint Inst { get; }
public IntrinsicType Type { get; }
public IntrinsicInfo(uint inst, IntrinsicType type)
{
Inst = inst;
Type = type;
}
}
}

View file

@ -0,0 +1,461 @@
using ARMeilleure.Common;
using ARMeilleure.IntermediateRepresentation;
namespace ARMeilleure.CodeGen.Arm64
{
static class IntrinsicTable
{
private static IntrinsicInfo[] _intrinTable;
static IntrinsicTable()
{
_intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))];
Add(Intrinsic.Arm64AbsS, new IntrinsicInfo(0x5e20b800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64AbsV, new IntrinsicInfo(0x0e20b800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64AddhnV, new IntrinsicInfo(0x0e204000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64AddpS, new IntrinsicInfo(0x5e31b800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64AddpV, new IntrinsicInfo(0x0e20bc00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64AddvV, new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64AddS, new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64AddV, new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128Unary));
Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128Unary));
Add(Intrinsic.Arm64AesimcV, new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary));
Add(Intrinsic.Arm64AesmcV, new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary));
Add(Intrinsic.Arm64AndV, new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise));
Add(Intrinsic.Arm64BicVi, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorBinaryBitwiseImm));
Add(Intrinsic.Arm64BicV, new IntrinsicInfo(0x0e601c00u, IntrinsicType.VectorBinaryBitwise));
Add(Intrinsic.Arm64BifV, new IntrinsicInfo(0x2ee01c00u, IntrinsicType.VectorTernaryRdBitwise));
Add(Intrinsic.Arm64BitV, new IntrinsicInfo(0x2ea01c00u, IntrinsicType.VectorTernaryRdBitwise));
Add(Intrinsic.Arm64BslV, new IntrinsicInfo(0x2e601c00u, IntrinsicType.VectorTernaryRdBitwise));
Add(Intrinsic.Arm64ClsV, new IntrinsicInfo(0x0e204800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64ClzV, new IntrinsicInfo(0x2e204800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64CmeqS, new IntrinsicInfo(0x7e208c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64CmeqV, new IntrinsicInfo(0x2e208c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64CmeqSz, new IntrinsicInfo(0x5e209800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64CmeqVz, new IntrinsicInfo(0x0e209800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64CmgeS, new IntrinsicInfo(0x5e203c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64CmgeV, new IntrinsicInfo(0x0e203c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64CmgeSz, new IntrinsicInfo(0x7e208800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64CmgeVz, new IntrinsicInfo(0x2e208800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64CmgtS, new IntrinsicInfo(0x5e203400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64CmgtV, new IntrinsicInfo(0x0e203400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64CmgtSz, new IntrinsicInfo(0x5e208800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64CmgtVz, new IntrinsicInfo(0x0e208800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64CmhiS, new IntrinsicInfo(0x7e203400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64CmhiV, new IntrinsicInfo(0x2e203400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64CmhsS, new IntrinsicInfo(0x7e203c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64CmhsV, new IntrinsicInfo(0x2e203c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64CmleSz, new IntrinsicInfo(0x7e209800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64CmleVz, new IntrinsicInfo(0x2e209800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64CmltSz, new IntrinsicInfo(0x5e20a800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64CmltVz, new IntrinsicInfo(0x0e20a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64CmtstS, new IntrinsicInfo(0x5e208c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64CmtstV, new IntrinsicInfo(0x0e208c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64CntV, new IntrinsicInfo(0x0e205800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64DupSe, new IntrinsicInfo(0x5e000400u, IntrinsicType.ScalarUnaryByElem));
Add(Intrinsic.Arm64DupVe, new IntrinsicInfo(0x0e000400u, IntrinsicType.VectorUnaryByElem));
Add(Intrinsic.Arm64DupGp, new IntrinsicInfo(0x0e000c00u, IntrinsicType.VectorUnaryByElem));
Add(Intrinsic.Arm64EorV, new IntrinsicInfo(0x2e201c00u, IntrinsicType.VectorBinaryBitwise));
Add(Intrinsic.Arm64ExtV, new IntrinsicInfo(0x2e000000u, IntrinsicType.VectorExt));
Add(Intrinsic.Arm64FabdS, new IntrinsicInfo(0x7ea0d400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FabdV, new IntrinsicInfo(0x2ea0d400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FabsV, new IntrinsicInfo(0x0ea0f800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FabsS, new IntrinsicInfo(0x1e20c000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FacgeS, new IntrinsicInfo(0x7e20ec00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FacgeV, new IntrinsicInfo(0x2e20ec00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FacgtS, new IntrinsicInfo(0x7ea0ec00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FacgtV, new IntrinsicInfo(0x2ea0ec00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FaddpS, new IntrinsicInfo(0x7e30d800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FaddpV, new IntrinsicInfo(0x2e20d400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FaddV, new IntrinsicInfo(0x0e20d400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FaddS, new IntrinsicInfo(0x1e202800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FccmpeS, new IntrinsicInfo(0x1e200410u, IntrinsicType.ScalarFPCompareCond));
Add(Intrinsic.Arm64FccmpS, new IntrinsicInfo(0x1e200400u, IntrinsicType.ScalarFPCompareCond));
Add(Intrinsic.Arm64FcmeqS, new IntrinsicInfo(0x5e20e400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FcmeqV, new IntrinsicInfo(0x0e20e400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FcmeqSz, new IntrinsicInfo(0x5ea0d800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcmeqVz, new IntrinsicInfo(0x0ea0d800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcmgeS, new IntrinsicInfo(0x7e20e400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FcmgeV, new IntrinsicInfo(0x2e20e400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FcmgeSz, new IntrinsicInfo(0x7ea0c800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcmgeVz, new IntrinsicInfo(0x2ea0c800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcmgtS, new IntrinsicInfo(0x7ea0e400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FcmgtV, new IntrinsicInfo(0x2ea0e400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FcmgtSz, new IntrinsicInfo(0x5ea0c800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcmgtVz, new IntrinsicInfo(0x0ea0c800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcmleSz, new IntrinsicInfo(0x7ea0d800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcmleVz, new IntrinsicInfo(0x2ea0d800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcmltSz, new IntrinsicInfo(0x5ea0e800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcmltVz, new IntrinsicInfo(0x0ea0e800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcmpeS, new IntrinsicInfo(0x1e202010u, IntrinsicType.ScalarFPCompare));
Add(Intrinsic.Arm64FcmpS, new IntrinsicInfo(0x1e202000u, IntrinsicType.ScalarFPCompare));
Add(Intrinsic.Arm64FcselS, new IntrinsicInfo(0x1e200c00u, IntrinsicType.ScalarFcsel));
Add(Intrinsic.Arm64FcvtasS, new IntrinsicInfo(0x5e21c800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtasV, new IntrinsicInfo(0x0e21c800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtasGp, new IntrinsicInfo(0x1e240000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtauS, new IntrinsicInfo(0x7e21c800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtauV, new IntrinsicInfo(0x2e21c800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtauGp, new IntrinsicInfo(0x1e250000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtlV, new IntrinsicInfo(0x0e217800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtmsS, new IntrinsicInfo(0x5e21b800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtmsV, new IntrinsicInfo(0x0e21b800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtmsGp, new IntrinsicInfo(0x1e300000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtmuS, new IntrinsicInfo(0x7e21b800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtmuV, new IntrinsicInfo(0x2e21b800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtmuGp, new IntrinsicInfo(0x1e310000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtnsS, new IntrinsicInfo(0x5e21a800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtnsV, new IntrinsicInfo(0x0e21a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtnsGp, new IntrinsicInfo(0x1e200000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtnuS, new IntrinsicInfo(0x7e21a800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtnuV, new IntrinsicInfo(0x2e21a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtnuGp, new IntrinsicInfo(0x1e210000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtnV, new IntrinsicInfo(0x0e216800u, IntrinsicType.VectorBinaryRd));
Add(Intrinsic.Arm64FcvtpsS, new IntrinsicInfo(0x5ea1a800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtpsV, new IntrinsicInfo(0x0ea1a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtpsGp, new IntrinsicInfo(0x1e280000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtpuS, new IntrinsicInfo(0x7ea1a800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtpuV, new IntrinsicInfo(0x2ea1a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtpuGp, new IntrinsicInfo(0x1e290000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtxnS, new IntrinsicInfo(0x7e216800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtxnV, new IntrinsicInfo(0x2e216800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtzsSFixed, new IntrinsicInfo(0x5f00fc00u, IntrinsicType.ScalarFPConvFixed));
Add(Intrinsic.Arm64FcvtzsVFixed, new IntrinsicInfo(0x0f00fc00u, IntrinsicType.VectorFPConvFixed));
Add(Intrinsic.Arm64FcvtzsS, new IntrinsicInfo(0x5ea1b800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtzsV, new IntrinsicInfo(0x0ea1b800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtzsGpFixed, new IntrinsicInfo(0x1e180000u, IntrinsicType.ScalarFPConvFixedGpr));
Add(Intrinsic.Arm64FcvtzsGp, new IntrinsicInfo(0x1e380000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtzuSFixed, new IntrinsicInfo(0x7f00fc00u, IntrinsicType.ScalarFPConvFixed));
Add(Intrinsic.Arm64FcvtzuVFixed, new IntrinsicInfo(0x2f00fc00u, IntrinsicType.VectorFPConvFixed));
Add(Intrinsic.Arm64FcvtzuS, new IntrinsicInfo(0x7ea1b800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtzuV, new IntrinsicInfo(0x2ea1b800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtzuGpFixed, new IntrinsicInfo(0x1e190000u, IntrinsicType.ScalarFPConvFixedGpr));
Add(Intrinsic.Arm64FcvtzuGp, new IntrinsicInfo(0x1e390000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtS, new IntrinsicInfo(0x1e224000u, IntrinsicType.ScalarFPConv));
Add(Intrinsic.Arm64FdivV, new IntrinsicInfo(0x2e20fc00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FdivS, new IntrinsicInfo(0x1e201800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FmaddS, new IntrinsicInfo(0x1f000000u, IntrinsicType.ScalarTernary));
Add(Intrinsic.Arm64FmaxnmpS, new IntrinsicInfo(0x7e30c800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FmaxnmpV, new IntrinsicInfo(0x2e20c400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FmaxnmvV, new IntrinsicInfo(0x2e30c800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FmaxnmV, new IntrinsicInfo(0x0e20c400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FmaxnmS, new IntrinsicInfo(0x1e206800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FmaxpS, new IntrinsicInfo(0x7e30f800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FmaxpV, new IntrinsicInfo(0x2e20f400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FmaxvV, new IntrinsicInfo(0x2e30f800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FmaxV, new IntrinsicInfo(0x0e20f400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FmaxS, new IntrinsicInfo(0x1e204800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FminnmpS, new IntrinsicInfo(0x7eb0c800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FminnmpV, new IntrinsicInfo(0x2ea0c400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FminnmvV, new IntrinsicInfo(0x2eb0c800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FminnmV, new IntrinsicInfo(0x0ea0c400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FminnmS, new IntrinsicInfo(0x1e207800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FminpS, new IntrinsicInfo(0x7eb0f800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FminpV, new IntrinsicInfo(0x2ea0f400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FminvV, new IntrinsicInfo(0x2eb0f800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FminV, new IntrinsicInfo(0x0ea0f400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FminS, new IntrinsicInfo(0x1e205800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FmlaSe, new IntrinsicInfo(0x5f801000u, IntrinsicType.ScalarTernaryFPRdByElem));
Add(Intrinsic.Arm64FmlaVe, new IntrinsicInfo(0x0f801000u, IntrinsicType.VectorTernaryFPRdByElem));
Add(Intrinsic.Arm64FmlaV, new IntrinsicInfo(0x0e20cc00u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64FmlsSe, new IntrinsicInfo(0x5f805000u, IntrinsicType.ScalarTernaryFPRdByElem));
Add(Intrinsic.Arm64FmlsVe, new IntrinsicInfo(0x0f805000u, IntrinsicType.VectorTernaryFPRdByElem));
Add(Intrinsic.Arm64FmlsV, new IntrinsicInfo(0x0ea0cc00u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64FmovVi, new IntrinsicInfo(0x0f00f400u, IntrinsicType.VectorFmovi));
Add(Intrinsic.Arm64FmovS, new IntrinsicInfo(0x1e204000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FmovGp, new IntrinsicInfo(0x1e260000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FmovSi, new IntrinsicInfo(0x1e201000u, IntrinsicType.ScalarFmovi));
Add(Intrinsic.Arm64FmsubS, new IntrinsicInfo(0x1f008000u, IntrinsicType.ScalarTernary));
Add(Intrinsic.Arm64FmulxSe, new IntrinsicInfo(0x7f809000u, IntrinsicType.ScalarBinaryFPByElem));
Add(Intrinsic.Arm64FmulxVe, new IntrinsicInfo(0x2f809000u, IntrinsicType.VectorBinaryFPByElem));
Add(Intrinsic.Arm64FmulxS, new IntrinsicInfo(0x5e20dc00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FmulxV, new IntrinsicInfo(0x0e20dc00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FmulSe, new IntrinsicInfo(0x5f809000u, IntrinsicType.ScalarBinaryFPByElem));
Add(Intrinsic.Arm64FmulVe, new IntrinsicInfo(0x0f809000u, IntrinsicType.VectorBinaryFPByElem));
Add(Intrinsic.Arm64FmulV, new IntrinsicInfo(0x2e20dc00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FmulS, new IntrinsicInfo(0x1e200800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FnegV, new IntrinsicInfo(0x2ea0f800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FnegS, new IntrinsicInfo(0x1e214000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FnmaddS, new IntrinsicInfo(0x1f200000u, IntrinsicType.ScalarTernary));
Add(Intrinsic.Arm64FnmsubS, new IntrinsicInfo(0x1f208000u, IntrinsicType.ScalarTernary));
Add(Intrinsic.Arm64FnmulS, new IntrinsicInfo(0x1e208800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FrecpeS, new IntrinsicInfo(0x5ea1d800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrecpeV, new IntrinsicInfo(0x0ea1d800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrecpsS, new IntrinsicInfo(0x5e20fc00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FrecpsV, new IntrinsicInfo(0x0e20fc00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FrecpxS, new IntrinsicInfo(0x5ea1f800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrintaV, new IntrinsicInfo(0x2e218800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrintaS, new IntrinsicInfo(0x1e264000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrintiV, new IntrinsicInfo(0x2ea19800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrintiS, new IntrinsicInfo(0x1e27c000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrintmV, new IntrinsicInfo(0x0e219800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrintmS, new IntrinsicInfo(0x1e254000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrintnV, new IntrinsicInfo(0x0e218800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrintnS, new IntrinsicInfo(0x1e244000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrintpV, new IntrinsicInfo(0x0ea18800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrintpS, new IntrinsicInfo(0x1e24c000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrintxV, new IntrinsicInfo(0x2e219800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrintxS, new IntrinsicInfo(0x1e274000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrintzV, new IntrinsicInfo(0x0ea19800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrintzS, new IntrinsicInfo(0x1e25c000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrsqrteS, new IntrinsicInfo(0x7ea1d800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrsqrteV, new IntrinsicInfo(0x2ea1d800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrsqrtsS, new IntrinsicInfo(0x5ea0fc00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FrsqrtsV, new IntrinsicInfo(0x0ea0fc00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FsqrtV, new IntrinsicInfo(0x2ea1f800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FsqrtS, new IntrinsicInfo(0x1e21c000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FsubV, new IntrinsicInfo(0x0ea0d400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FsubS, new IntrinsicInfo(0x1e203800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64InsVe, new IntrinsicInfo(0x6e000400u, IntrinsicType.VectorInsertByElem));
Add(Intrinsic.Arm64InsGp, new IntrinsicInfo(0x4e001c00u, IntrinsicType.ScalarUnaryByElem));
Add(Intrinsic.Arm64Ld1rV, new IntrinsicInfo(0x0d40c000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64Ld1Vms, new IntrinsicInfo(0x0c402000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64Ld1Vss, new IntrinsicInfo(0x0d400000u, IntrinsicType.VectorLdStSs));
Add(Intrinsic.Arm64Ld2rV, new IntrinsicInfo(0x0d60c000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64Ld2Vms, new IntrinsicInfo(0x0c408000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64Ld2Vss, new IntrinsicInfo(0x0d600000u, IntrinsicType.VectorLdStSs));
Add(Intrinsic.Arm64Ld3rV, new IntrinsicInfo(0x0d40e000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64Ld3Vms, new IntrinsicInfo(0x0c404000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64Ld3Vss, new IntrinsicInfo(0x0d402000u, IntrinsicType.VectorLdStSs));
Add(Intrinsic.Arm64Ld4rV, new IntrinsicInfo(0x0d60e000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64Ld4Vms, new IntrinsicInfo(0x0c400000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64Ld4Vss, new IntrinsicInfo(0x0d602000u, IntrinsicType.VectorLdStSs));
Add(Intrinsic.Arm64MlaVe, new IntrinsicInfo(0x2f000000u, IntrinsicType.VectorTernaryRdByElem));
Add(Intrinsic.Arm64MlaV, new IntrinsicInfo(0x0e209400u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64MlsVe, new IntrinsicInfo(0x2f004000u, IntrinsicType.VectorTernaryRdByElem));
Add(Intrinsic.Arm64MlsV, new IntrinsicInfo(0x2e209400u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64MoviV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorMovi));
Add(Intrinsic.Arm64MrsFpsr, new IntrinsicInfo(0xd53b4420u, IntrinsicType.GetRegister));
Add(Intrinsic.Arm64MsrFpsr, new IntrinsicInfo(0xd51b4420u, IntrinsicType.SetRegister));
Add(Intrinsic.Arm64MulVe, new IntrinsicInfo(0x0f008000u, IntrinsicType.VectorBinaryByElem));
Add(Intrinsic.Arm64MulV, new IntrinsicInfo(0x0e209c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64MvniV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorMvni));
Add(Intrinsic.Arm64NegS, new IntrinsicInfo(0x7e20b800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64NegV, new IntrinsicInfo(0x2e20b800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64NotV, new IntrinsicInfo(0x2e205800u, IntrinsicType.VectorUnaryBitwise));
Add(Intrinsic.Arm64OrnV, new IntrinsicInfo(0x0ee01c00u, IntrinsicType.VectorBinaryBitwise));
Add(Intrinsic.Arm64OrrVi, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorBinaryBitwiseImm));
Add(Intrinsic.Arm64OrrV, new IntrinsicInfo(0x0ea01c00u, IntrinsicType.VectorBinaryBitwise));
Add(Intrinsic.Arm64PmullV, new IntrinsicInfo(0x0e20e000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64PmulV, new IntrinsicInfo(0x2e209c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64RaddhnV, new IntrinsicInfo(0x2e204000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64RbitV, new IntrinsicInfo(0x2e605800u, IntrinsicType.VectorUnaryBitwise));
Add(Intrinsic.Arm64Rev16V, new IntrinsicInfo(0x0e201800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64Rev32V, new IntrinsicInfo(0x2e200800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64Rev64V, new IntrinsicInfo(0x0e200800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64RshrnV, new IntrinsicInfo(0x0f008c00u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64RsubhnV, new IntrinsicInfo(0x2e206000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64SabalV, new IntrinsicInfo(0x0e205000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64SabaV, new IntrinsicInfo(0x0e207c00u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64SabdlV, new IntrinsicInfo(0x0e207000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SabdV, new IntrinsicInfo(0x0e207400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SadalpV, new IntrinsicInfo(0x0e206800u, IntrinsicType.VectorBinaryRd));
Add(Intrinsic.Arm64SaddlpV, new IntrinsicInfo(0x0e202800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64SaddlvV, new IntrinsicInfo(0x0e303800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64SaddlV, new IntrinsicInfo(0x0e200000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SaddwV, new IntrinsicInfo(0x0e201000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64ScvtfSFixed, new IntrinsicInfo(0x5f00e400u, IntrinsicType.ScalarFPConvFixed));
Add(Intrinsic.Arm64ScvtfVFixed, new IntrinsicInfo(0x0f00e400u, IntrinsicType.VectorFPConvFixed));
Add(Intrinsic.Arm64ScvtfS, new IntrinsicInfo(0x5e21d800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64ScvtfV, new IntrinsicInfo(0x0e21d800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64ScvtfGpFixed, new IntrinsicInfo(0x1e020000u, IntrinsicType.ScalarFPConvFixedGpr));
Add(Intrinsic.Arm64ScvtfGp, new IntrinsicInfo(0x1e220000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64Sha1cV, new IntrinsicInfo(0x5e000000u, IntrinsicType.Vector128Binary));
Add(Intrinsic.Arm64Sha1hV, new IntrinsicInfo(0x5e280800u, IntrinsicType.Vector128Unary));
Add(Intrinsic.Arm64Sha1mV, new IntrinsicInfo(0x5e002000u, IntrinsicType.Vector128Binary));
Add(Intrinsic.Arm64Sha1pV, new IntrinsicInfo(0x5e001000u, IntrinsicType.Vector128Binary));
Add(Intrinsic.Arm64Sha1su0V, new IntrinsicInfo(0x5e003000u, IntrinsicType.Vector128Binary));
Add(Intrinsic.Arm64Sha1su1V, new IntrinsicInfo(0x5e281800u, IntrinsicType.Vector128Unary));
Add(Intrinsic.Arm64Sha256h2V, new IntrinsicInfo(0x5e005000u, IntrinsicType.Vector128Binary));
Add(Intrinsic.Arm64Sha256hV, new IntrinsicInfo(0x5e004000u, IntrinsicType.Vector128Binary));
Add(Intrinsic.Arm64Sha256su0V, new IntrinsicInfo(0x5e282800u, IntrinsicType.Vector128Unary));
Add(Intrinsic.Arm64Sha256su1V, new IntrinsicInfo(0x5e006000u, IntrinsicType.Vector128Binary));
Add(Intrinsic.Arm64ShaddV, new IntrinsicInfo(0x0e200400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64ShllV, new IntrinsicInfo(0x2e213800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64ShlS, new IntrinsicInfo(0x5f005400u, IntrinsicType.ScalarBinaryShl));
Add(Intrinsic.Arm64ShlV, new IntrinsicInfo(0x0f005400u, IntrinsicType.VectorBinaryShl));
Add(Intrinsic.Arm64ShrnV, new IntrinsicInfo(0x0f008400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64ShsubV, new IntrinsicInfo(0x0e202400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SliS, new IntrinsicInfo(0x7f005400u, IntrinsicType.ScalarTernaryShlRd));
Add(Intrinsic.Arm64SliV, new IntrinsicInfo(0x2f005400u, IntrinsicType.VectorTernaryShlRd));
Add(Intrinsic.Arm64SmaxpV, new IntrinsicInfo(0x0e20a400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SmaxvV, new IntrinsicInfo(0x0e30a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64SmaxV, new IntrinsicInfo(0x0e206400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SminpV, new IntrinsicInfo(0x0e20ac00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SminvV, new IntrinsicInfo(0x0e31a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64SminV, new IntrinsicInfo(0x0e206c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SmlalVe, new IntrinsicInfo(0x0f002000u, IntrinsicType.VectorTernaryRdByElem));
Add(Intrinsic.Arm64SmlalV, new IntrinsicInfo(0x0e208000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64SmlslVe, new IntrinsicInfo(0x0f006000u, IntrinsicType.VectorTernaryRdByElem));
Add(Intrinsic.Arm64SmlslV, new IntrinsicInfo(0x0e20a000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64SmovV, new IntrinsicInfo(0x0e002c00u, IntrinsicType.VectorUnaryByElem));
Add(Intrinsic.Arm64SmullVe, new IntrinsicInfo(0x0f00a000u, IntrinsicType.VectorBinaryByElem));
Add(Intrinsic.Arm64SmullV, new IntrinsicInfo(0x0e20c000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqabsS, new IntrinsicInfo(0x5e207800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64SqabsV, new IntrinsicInfo(0x0e207800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64SqaddS, new IntrinsicInfo(0x5e200c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqaddV, new IntrinsicInfo(0x0e200c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqdmlalSe, new IntrinsicInfo(0x5f003000u, IntrinsicType.ScalarBinaryByElem));
Add(Intrinsic.Arm64SqdmlalVe, new IntrinsicInfo(0x0f003000u, IntrinsicType.VectorBinaryByElem));
Add(Intrinsic.Arm64SqdmlalS, new IntrinsicInfo(0x5e209000u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqdmlalV, new IntrinsicInfo(0x0e209000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqdmlslSe, new IntrinsicInfo(0x5f007000u, IntrinsicType.ScalarBinaryByElem));
Add(Intrinsic.Arm64SqdmlslVe, new IntrinsicInfo(0x0f007000u, IntrinsicType.VectorBinaryByElem));
Add(Intrinsic.Arm64SqdmlslS, new IntrinsicInfo(0x5e20b000u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqdmlslV, new IntrinsicInfo(0x0e20b000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqdmulhSe, new IntrinsicInfo(0x5f00c000u, IntrinsicType.ScalarBinaryByElem));
Add(Intrinsic.Arm64SqdmulhVe, new IntrinsicInfo(0x0f00c000u, IntrinsicType.VectorBinaryByElem));
Add(Intrinsic.Arm64SqdmulhS, new IntrinsicInfo(0x5e20b400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqdmulhV, new IntrinsicInfo(0x0e20b400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqdmullSe, new IntrinsicInfo(0x5f00b000u, IntrinsicType.ScalarBinaryByElem));
Add(Intrinsic.Arm64SqdmullVe, new IntrinsicInfo(0x0f00b000u, IntrinsicType.VectorBinaryByElem));
Add(Intrinsic.Arm64SqdmullS, new IntrinsicInfo(0x5e20d000u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqdmullV, new IntrinsicInfo(0x0e20d000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqnegS, new IntrinsicInfo(0x7e207800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64SqnegV, new IntrinsicInfo(0x2e207800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64SqrdmulhSe, new IntrinsicInfo(0x5f00d000u, IntrinsicType.ScalarBinaryByElem));
Add(Intrinsic.Arm64SqrdmulhVe, new IntrinsicInfo(0x0f00d000u, IntrinsicType.VectorBinaryByElem));
Add(Intrinsic.Arm64SqrdmulhS, new IntrinsicInfo(0x7e20b400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqrdmulhV, new IntrinsicInfo(0x2e20b400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqrshlS, new IntrinsicInfo(0x5e205c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqrshlV, new IntrinsicInfo(0x0e205c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqrshrnS, new IntrinsicInfo(0x5f009c00u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64SqrshrnV, new IntrinsicInfo(0x0f009c00u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64SqrshrunS, new IntrinsicInfo(0x7f008c00u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64SqrshrunV, new IntrinsicInfo(0x2f008c00u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64SqshluS, new IntrinsicInfo(0x7f006400u, IntrinsicType.ScalarBinaryShl));
Add(Intrinsic.Arm64SqshluV, new IntrinsicInfo(0x2f006400u, IntrinsicType.VectorBinaryShl));
Add(Intrinsic.Arm64SqshlSi, new IntrinsicInfo(0x5f007400u, IntrinsicType.ScalarBinaryShl));
Add(Intrinsic.Arm64SqshlVi, new IntrinsicInfo(0x0f007400u, IntrinsicType.VectorBinaryShl));
Add(Intrinsic.Arm64SqshlS, new IntrinsicInfo(0x5e204c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqshlV, new IntrinsicInfo(0x0e204c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqshrnS, new IntrinsicInfo(0x5f009400u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64SqshrnV, new IntrinsicInfo(0x0f009400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64SqshrunS, new IntrinsicInfo(0x7f008400u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64SqshrunV, new IntrinsicInfo(0x2f008400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64SqsubS, new IntrinsicInfo(0x5e202c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqsubV, new IntrinsicInfo(0x0e202c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqxtnS, new IntrinsicInfo(0x5e214800u, IntrinsicType.ScalarBinaryRd));
Add(Intrinsic.Arm64SqxtnV, new IntrinsicInfo(0x0e214800u, IntrinsicType.VectorBinaryRd));
Add(Intrinsic.Arm64SqxtunS, new IntrinsicInfo(0x7e212800u, IntrinsicType.ScalarBinaryRd));
Add(Intrinsic.Arm64SqxtunV, new IntrinsicInfo(0x2e212800u, IntrinsicType.VectorBinaryRd));
Add(Intrinsic.Arm64SrhaddV, new IntrinsicInfo(0x0e201400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SriS, new IntrinsicInfo(0x7f004400u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64SriV, new IntrinsicInfo(0x2f004400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64SrshlS, new IntrinsicInfo(0x5e205400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SrshlV, new IntrinsicInfo(0x0e205400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SrshrS, new IntrinsicInfo(0x5f002400u, IntrinsicType.ScalarBinaryShr));
Add(Intrinsic.Arm64SrshrV, new IntrinsicInfo(0x0f002400u, IntrinsicType.VectorBinaryShr));
Add(Intrinsic.Arm64SrsraS, new IntrinsicInfo(0x5f003400u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64SrsraV, new IntrinsicInfo(0x0f003400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64SshllV, new IntrinsicInfo(0x0f00a400u, IntrinsicType.VectorBinaryShl));
Add(Intrinsic.Arm64SshlS, new IntrinsicInfo(0x5e204400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SshlV, new IntrinsicInfo(0x0e204400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SshrS, new IntrinsicInfo(0x5f000400u, IntrinsicType.ScalarBinaryShr));
Add(Intrinsic.Arm64SshrV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorBinaryShr));
Add(Intrinsic.Arm64SsraS, new IntrinsicInfo(0x5f001400u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64SsraV, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64SsublV, new IntrinsicInfo(0x0e202000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SsubwV, new IntrinsicInfo(0x0e203000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64St1Vms, new IntrinsicInfo(0x0c002000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64St1Vss, new IntrinsicInfo(0x0d000000u, IntrinsicType.VectorLdStSs));
Add(Intrinsic.Arm64St2Vms, new IntrinsicInfo(0x0c008000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64St2Vss, new IntrinsicInfo(0x0d200000u, IntrinsicType.VectorLdStSs));
Add(Intrinsic.Arm64St3Vms, new IntrinsicInfo(0x0c004000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64St3Vss, new IntrinsicInfo(0x0d002000u, IntrinsicType.VectorLdStSs));
Add(Intrinsic.Arm64St4Vms, new IntrinsicInfo(0x0c000000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64St4Vss, new IntrinsicInfo(0x0d202000u, IntrinsicType.VectorLdStSs));
Add(Intrinsic.Arm64SubhnV, new IntrinsicInfo(0x0e206000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64SubS, new IntrinsicInfo(0x7e208400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SubV, new IntrinsicInfo(0x2e208400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SuqaddS, new IntrinsicInfo(0x5e203800u, IntrinsicType.ScalarBinaryRd));
Add(Intrinsic.Arm64SuqaddV, new IntrinsicInfo(0x0e203800u, IntrinsicType.VectorBinaryRd));
Add(Intrinsic.Arm64TblV, new IntrinsicInfo(0x0e000000u, IntrinsicType.VectorLookupTable));
Add(Intrinsic.Arm64TbxV, new IntrinsicInfo(0x0e001000u, IntrinsicType.VectorLookupTable));
Add(Intrinsic.Arm64Trn1V, new IntrinsicInfo(0x0e002800u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64Trn2V, new IntrinsicInfo(0x0e006800u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UabalV, new IntrinsicInfo(0x2e205000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64UabaV, new IntrinsicInfo(0x2e207c00u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64UabdlV, new IntrinsicInfo(0x2e207000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UabdV, new IntrinsicInfo(0x2e207400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UadalpV, new IntrinsicInfo(0x2e206800u, IntrinsicType.VectorBinaryRd));
Add(Intrinsic.Arm64UaddlpV, new IntrinsicInfo(0x2e202800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64UaddlvV, new IntrinsicInfo(0x2e303800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64UaddlV, new IntrinsicInfo(0x2e200000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UaddwV, new IntrinsicInfo(0x2e201000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UcvtfSFixed, new IntrinsicInfo(0x7f00e400u, IntrinsicType.ScalarFPConvFixed));
Add(Intrinsic.Arm64UcvtfVFixed, new IntrinsicInfo(0x2f00e400u, IntrinsicType.VectorFPConvFixed));
Add(Intrinsic.Arm64UcvtfS, new IntrinsicInfo(0x7e21d800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64UcvtfV, new IntrinsicInfo(0x2e21d800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64UcvtfGpFixed, new IntrinsicInfo(0x1e030000u, IntrinsicType.ScalarFPConvFixedGpr));
Add(Intrinsic.Arm64UcvtfGp, new IntrinsicInfo(0x1e230000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64UhaddV, new IntrinsicInfo(0x2e200400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UhsubV, new IntrinsicInfo(0x2e202400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UmaxpV, new IntrinsicInfo(0x2e20a400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UmaxvV, new IntrinsicInfo(0x2e30a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64UmaxV, new IntrinsicInfo(0x2e206400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UminpV, new IntrinsicInfo(0x2e20ac00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UminvV, new IntrinsicInfo(0x2e31a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64UminV, new IntrinsicInfo(0x2e206c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UmlalVe, new IntrinsicInfo(0x2f002000u, IntrinsicType.VectorTernaryRdByElem));
Add(Intrinsic.Arm64UmlalV, new IntrinsicInfo(0x2e208000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64UmlslVe, new IntrinsicInfo(0x2f006000u, IntrinsicType.VectorTernaryRdByElem));
Add(Intrinsic.Arm64UmlslV, new IntrinsicInfo(0x2e20a000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64UmovV, new IntrinsicInfo(0x0e003c00u, IntrinsicType.VectorUnaryByElem));
Add(Intrinsic.Arm64UmullVe, new IntrinsicInfo(0x2f00a000u, IntrinsicType.VectorBinaryByElem));
Add(Intrinsic.Arm64UmullV, new IntrinsicInfo(0x2e20c000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UqaddS, new IntrinsicInfo(0x7e200c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64UqaddV, new IntrinsicInfo(0x2e200c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UqrshlS, new IntrinsicInfo(0x7e205c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64UqrshlV, new IntrinsicInfo(0x2e205c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UqrshrnS, new IntrinsicInfo(0x7f009c00u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64UqrshrnV, new IntrinsicInfo(0x2f009c00u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64UqshlSi, new IntrinsicInfo(0x7f007400u, IntrinsicType.ScalarBinaryShl));
Add(Intrinsic.Arm64UqshlVi, new IntrinsicInfo(0x2f007400u, IntrinsicType.VectorBinaryShl));
Add(Intrinsic.Arm64UqshlS, new IntrinsicInfo(0x7e204c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64UqshlV, new IntrinsicInfo(0x2e204c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UqshrnS, new IntrinsicInfo(0x7f009400u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64UqshrnV, new IntrinsicInfo(0x2f009400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64UqsubS, new IntrinsicInfo(0x7e202c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64UqsubV, new IntrinsicInfo(0x2e202c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UqxtnS, new IntrinsicInfo(0x7e214800u, IntrinsicType.ScalarBinaryRd));
Add(Intrinsic.Arm64UqxtnV, new IntrinsicInfo(0x2e214800u, IntrinsicType.VectorBinaryRd));
Add(Intrinsic.Arm64UrecpeV, new IntrinsicInfo(0x0ea1c800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64UrhaddV, new IntrinsicInfo(0x2e201400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UrshlS, new IntrinsicInfo(0x7e205400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64UrshlV, new IntrinsicInfo(0x2e205400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UrshrS, new IntrinsicInfo(0x7f002400u, IntrinsicType.ScalarBinaryShr));
Add(Intrinsic.Arm64UrshrV, new IntrinsicInfo(0x2f002400u, IntrinsicType.VectorBinaryShr));
Add(Intrinsic.Arm64UrsqrteV, new IntrinsicInfo(0x2ea1c800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64UrsraS, new IntrinsicInfo(0x7f003400u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64UrsraV, new IntrinsicInfo(0x2f003400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64UshllV, new IntrinsicInfo(0x2f00a400u, IntrinsicType.VectorBinaryShl));
Add(Intrinsic.Arm64UshlS, new IntrinsicInfo(0x7e204400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64UshlV, new IntrinsicInfo(0x2e204400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UshrS, new IntrinsicInfo(0x7f000400u, IntrinsicType.ScalarBinaryShr));
Add(Intrinsic.Arm64UshrV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorBinaryShr));
Add(Intrinsic.Arm64UsqaddS, new IntrinsicInfo(0x7e203800u, IntrinsicType.ScalarBinaryRd));
Add(Intrinsic.Arm64UsqaddV, new IntrinsicInfo(0x2e203800u, IntrinsicType.VectorBinaryRd));
Add(Intrinsic.Arm64UsraS, new IntrinsicInfo(0x7f001400u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64UsraV, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64UsublV, new IntrinsicInfo(0x2e202000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UsubwV, new IntrinsicInfo(0x2e203000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64Uzp1V, new IntrinsicInfo(0x0e001800u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64Uzp2V, new IntrinsicInfo(0x0e005800u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64XtnV, new IntrinsicInfo(0x0e212800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64Zip1V, new IntrinsicInfo(0x0e003800u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64Zip2V, new IntrinsicInfo(0x0e007800u, IntrinsicType.VectorBinary));
}
private static void Add(Intrinsic intrin, IntrinsicInfo info)
{
_intrinTable[(int)intrin] = info;
}
public static IntrinsicInfo GetInfo(Intrinsic intrin)
{
return _intrinTable[(int)intrin];
}
}
}

View file

@ -0,0 +1,59 @@
namespace ARMeilleure.CodeGen.Arm64
{
enum IntrinsicType
{
ScalarUnary,
ScalarUnaryByElem,
ScalarBinary,
ScalarBinaryByElem,
ScalarBinaryFPByElem,
ScalarBinaryRd,
ScalarBinaryShl,
ScalarBinaryShr,
ScalarFcsel,
ScalarFmovi,
ScalarFPCompare,
ScalarFPCompareCond,
ScalarFPConv,
ScalarFPConvFixed,
ScalarFPConvFixedGpr,
ScalarFPConvGpr,
ScalarTernary,
ScalarTernaryFPRdByElem,
ScalarTernaryShlRd,
ScalarTernaryShrRd,
VectorUnary,
VectorUnaryBitwise,
VectorUnaryByElem,
VectorBinary,
VectorBinaryBitwise,
VectorBinaryBitwiseImm,
VectorBinaryByElem,
VectorBinaryFPByElem,
VectorBinaryRd,
VectorBinaryShl,
VectorBinaryShr,
VectorExt,
VectorFmovi,
VectorFPConvFixed,
VectorInsertByElem,
VectorLdSt,
VectorLdStSs,
VectorLookupTable,
VectorMovi,
VectorMvni,
VectorTernaryFPRdByElem,
VectorTernaryRd,
VectorTernaryRdBitwise,
VectorTernaryRdByElem,
VectorTernaryShlRd,
VectorTernaryShrRd,
Vector128Unary,
Vector128Binary,
GetRegister,
SetRegister
}
}

View file

@ -0,0 +1,940 @@
using ARMeilleure.CodeGen.RegisterAllocators;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
namespace ARMeilleure.CodeGen.Arm64
{
class PreAllocator
{
private class ConstantDict
{
private readonly Dictionary<(ulong, OperandType), Operand> _constants;
public ConstantDict()
{
_constants = new Dictionary<(ulong, OperandType), Operand>();
}
public void Add(ulong value, OperandType type, Operand local)
{
_constants.Add((value, type), local);
}
public bool TryGetValue(ulong value, OperandType type, out Operand local)
{
return _constants.TryGetValue((value, type), out local);
}
}
public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs)
{
maxCallArgs = -1;
Span<Operation> buffer = default;
Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()];
for (BasicBlock block = cctx.Cfg.Blocks.First; block != null; block = block.ListNext)
{
ConstantDict constants = new ConstantDict();
Operation nextNode;
for (Operation node = block.Operations.First; node != default; node = nextNode)
{
nextNode = node.ListNext;
if (node.Instruction == Instruction.Phi)
{
continue;
}
HandleConstantRegCopy(constants, block.Operations, node);
HandleDestructiveRegCopy(block.Operations, node);
switch (node.Instruction)
{
case Instruction.Call:
// Get the maximum number of arguments used on a call.
// On windows, when a struct is returned from the call,
// we also need to pass the pointer where the struct
// should be written on the first argument.
int argsCount = node.SourcesCount - 1;
if (node.Destination != default && node.Destination.Type == OperandType.V128)
{
argsCount++;
}
if (maxCallArgs < argsCount)
{
maxCallArgs = argsCount;
}
// Copy values to registers expected by the function
// being called, as mandated by the ABI.
HandleCall(constants, block.Operations, node);
break;
case Instruction.CompareAndSwap:
case Instruction.CompareAndSwap16:
case Instruction.CompareAndSwap8:
nextNode = HandleCompareAndSwap(block.Operations, node);
break;
case Instruction.LoadArgument:
nextNode = HandleLoadArgument(cctx, ref buffer, block.Operations, preservedArgs, node);
break;
case Instruction.Return:
HandleReturn(block.Operations, node);
break;
case Instruction.Tailcall:
HandleTailcall(constants, block.Operations, stackAlloc, node, node);
break;
}
}
}
}
private static void HandleConstantRegCopy(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node)
{
if (node.SourcesCount == 0 || IsIntrinsicWithConst(node))
{
return;
}
Instruction inst = node.Instruction;
Operand src1 = node.GetSource(0);
Operand src2;
if (src1.Kind == OperandKind.Constant)
{
if (!src1.Type.IsInteger())
{
// Handle non-integer types (FP32, FP64 and V128).
// For instructions without an immediate operand, we do the following:
// - Insert a copy with the constant value (as integer) to a GPR.
// - Insert a copy from the GPR to a XMM register.
// - Replace the constant use with the XMM register.
src1 = AddFloatConstantCopy(constants, nodes, node, src1);
node.SetSource(0, src1);
}
else if (!HasConstSrc1(node, src1.Value))
{
// Handle integer types.
// Most ALU instructions accepts a 32-bits immediate on the second operand.
// We need to ensure the following:
// - If the constant is on operand 1, we need to move it.
// -- But first, we try to swap operand 1 and 2 if the instruction is commutative.
// -- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
// - If the constant is on operand 2, we check if the instruction supports it,
// if not, we also add a copy. 64-bits constants are usually not supported.
if (IsCommutative(node))
{
src2 = node.GetSource(1);
Operand temp = src1;
src1 = src2;
src2 = temp;
node.SetSource(0, src1);
node.SetSource(1, src2);
}
if (src1.Kind == OperandKind.Constant)
{
src1 = AddIntConstantCopy(constants, nodes, node, src1);
node.SetSource(0, src1);
}
}
}
if (node.SourcesCount < 2)
{
return;
}
src2 = node.GetSource(1);
if (src2.Kind == OperandKind.Constant)
{
if (!src2.Type.IsInteger())
{
src2 = AddFloatConstantCopy(constants, nodes, node, src2);
node.SetSource(1, src2);
}
else if (!HasConstSrc2(inst, src2))
{
src2 = AddIntConstantCopy(constants, nodes, node, src2);
node.SetSource(1, src2);
}
}
if (node.SourcesCount < 3 ||
node.Instruction == Instruction.BranchIf ||
node.Instruction == Instruction.Compare ||
node.Instruction == Instruction.VectorInsert ||
node.Instruction == Instruction.VectorInsert16 ||
node.Instruction == Instruction.VectorInsert8)
{
return;
}
for (int srcIndex = 2; srcIndex < node.SourcesCount; srcIndex++)
{
Operand src = node.GetSource(srcIndex);
if (src.Kind == OperandKind.Constant)
{
if (!src.Type.IsInteger())
{
src = AddFloatConstantCopy(constants, nodes, node, src);
node.SetSource(srcIndex, src);
}
else
{
src = AddIntConstantCopy(constants, nodes, node, src);
node.SetSource(srcIndex, src);
}
}
}
}
private static void HandleDestructiveRegCopy(IntrusiveList<Operation> nodes, Operation node)
{
if (node.Destination == default || node.SourcesCount == 0)
{
return;
}
Operand dest = node.Destination;
Operand src1 = node.GetSource(0);
if (IsSameOperandDestSrc1(node) && src1.Kind == OperandKind.LocalVariable)
{
bool useNewLocal = false;
for (int srcIndex = 1; srcIndex < node.SourcesCount; srcIndex++)
{
if (node.GetSource(srcIndex) == dest)
{
useNewLocal = true;
break;
}
}
if (useNewLocal)
{
// Dest is being used as some source already, we need to use a new
// local to store the temporary value, otherwise the value on dest
// local would be overwritten.
Operand temp = Local(dest.Type);
nodes.AddBefore(node, Operation(Instruction.Copy, temp, src1));
node.SetSource(0, temp);
nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp));
node.Destination = temp;
}
else
{
nodes.AddBefore(node, Operation(Instruction.Copy, dest, src1));
node.SetSource(0, dest);
}
}
}
private static void HandleCall(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node)
{
Operation operation = node;
Operand dest = operation.Destination;
List<Operand> sources = new List<Operand>
{
operation.GetSource(0)
};
int argsCount = operation.SourcesCount - 1;
int intMax = CallingConvention.GetArgumentsOnRegsCount();
int vecMax = CallingConvention.GetArgumentsOnRegsCount();
int intCount = 0;
int vecCount = 0;
int stackOffset = 0;
for (int index = 0; index < argsCount; index++)
{
Operand source = operation.GetSource(index + 1);
bool passOnReg;
if (source.Type.IsInteger())
{
passOnReg = intCount < intMax;
}
else if (source.Type == OperandType.V128)
{
passOnReg = intCount + 1 < intMax;
}
else
{
passOnReg = vecCount < vecMax;
}
if (source.Type == OperandType.V128 && passOnReg)
{
// V128 is a struct, we pass each half on a GPR if possible.
Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
continue;
}
if (passOnReg)
{
Operand argReg = source.Type.IsInteger()
? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
: Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
Operation copyOp = Operation(Instruction.Copy, argReg, source);
HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, copyOp));
sources.Add(argReg);
}
else
{
Operand offset = Const(stackOffset);
Operation spillOp = Operation(Instruction.SpillArg, default, offset, source);
HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, spillOp));
stackOffset += source.Type.GetSizeInBytes();
}
}
if (dest != default)
{
if (dest.Type == OperandType.V128)
{
Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, retLReg));
nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1)));
operation.Destination = default;
}
else
{
Operand retReg = dest.Type.IsInteger()
? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
: Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
Operation copyOp = Operation(Instruction.Copy, dest, retReg);
nodes.AddAfter(node, copyOp);
operation.Destination = retReg;
}
}
operation.SetSources(sources.ToArray());
}
private static void HandleTailcall(
ConstantDict constants,
IntrusiveList<Operation> nodes,
StackAllocator stackAlloc,
Operation node,
Operation operation)
{
List<Operand> sources = new List<Operand>
{
operation.GetSource(0)
};
int argsCount = operation.SourcesCount - 1;
int intMax = CallingConvention.GetArgumentsOnRegsCount();
int vecMax = CallingConvention.GetArgumentsOnRegsCount();
int intCount = 0;
int vecCount = 0;
// Handle arguments passed on registers.
for (int index = 0; index < argsCount; index++)
{
Operand source = operation.GetSource(1 + index);
bool passOnReg;
if (source.Type.IsInteger())
{
passOnReg = intCount + 1 < intMax;
}
else
{
passOnReg = vecCount < vecMax;
}
if (source.Type == OperandType.V128 && passOnReg)
{
// V128 is a struct, we pass each half on a GPR if possible.
Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
continue;
}
if (passOnReg)
{
Operand argReg = source.Type.IsInteger()
? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
: Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
Operation copyOp = Operation(Instruction.Copy, argReg, source);
HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, copyOp));
sources.Add(argReg);
}
else
{
throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)");
}
}
// The target address must be on the return registers, since we
// don't return anything and it is guaranteed to not be a
// callee saved register (which would be trashed on the epilogue).
Operand tcAddress = Gpr(CodeGenCommon.TcAddressRegister, OperandType.I64);
Operation addrCopyOp = Operation(Instruction.Copy, tcAddress, operation.GetSource(0));
nodes.AddBefore(node, addrCopyOp);
sources[0] = tcAddress;
operation.SetSources(sources.ToArray());
}
private static Operation HandleCompareAndSwap(IntrusiveList<Operation> nodes, Operation node)
{
Operand expected = node.GetSource(1);
if (expected.Type == OperandType.V128)
{
Operand dest = node.Destination;
Operand expectedLow = Local(OperandType.I64);
Operand expectedHigh = Local(OperandType.I64);
Operand desiredLow = Local(OperandType.I64);
Operand desiredHigh = Local(OperandType.I64);
Operand actualLow = Local(OperandType.I64);
Operand actualHigh = Local(OperandType.I64);
Operand address = node.GetSource(0);
Operand desired = node.GetSource(2);
void SplitOperand(Operand source, Operand low, Operand high)
{
nodes.AddBefore(node, Operation(Instruction.VectorExtract, low, source, Const(0)));
nodes.AddBefore(node, Operation(Instruction.VectorExtract, high, source, Const(1)));
}
SplitOperand(expected, expectedLow, expectedHigh);
SplitOperand(desired, desiredLow, desiredHigh);
Operation operation = node;
// Update the sources and destinations with split 64-bit halfs of the whole 128-bit values.
// We also need a additional registers that will be used to store temporary information.
operation.SetDestinations(new[] { actualLow, actualHigh, Local(OperandType.I64), Local(OperandType.I64) });
operation.SetSources(new[] { address, expectedLow, expectedHigh, desiredLow, desiredHigh });
// Add some dummy uses of the input operands, as the CAS operation will be a loop,
// so they can't be used as destination operand.
for (int i = 0; i < operation.SourcesCount; i++)
{
Operand src = operation.GetSource(i);
node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src));
}
// Assemble the vector with the 64-bit values at the given memory location.
node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, actualLow));
node = nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, actualHigh, Const(1)));
}
else
{
// We need a additional register where the store result will be written to.
node.SetDestinations(new[] { node.Destination, Local(OperandType.I32) });
// Add some dummy uses of the input operands, as the CAS operation will be a loop,
// so they can't be used as destination operand.
Operation operation = node;
for (int i = 0; i < operation.SourcesCount; i++)
{
Operand src = operation.GetSource(i);
node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src));
}
}
return node.ListNext;
}
private static void HandleReturn(IntrusiveList<Operation> nodes, Operation node)
{
if (node.SourcesCount == 0)
{
return;
}
Operand source = node.GetSource(0);
if (source.Type == OperandType.V128)
{
Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
nodes.AddBefore(node, Operation(Instruction.VectorExtract, retLReg, source, Const(0)));
nodes.AddBefore(node, Operation(Instruction.VectorExtract, retHReg, source, Const(1)));
}
else
{
Operand retReg = source.Type.IsInteger()
? Gpr(CallingConvention.GetIntReturnRegister(), source.Type)
: Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
Operation retCopyOp = Operation(Instruction.Copy, retReg, source);
nodes.AddBefore(node, retCopyOp);
}
}
private static Operation HandleLoadArgument(
CompilerContext cctx,
ref Span<Operation> buffer,
IntrusiveList<Operation> nodes,
Operand[] preservedArgs,
Operation node)
{
Operand source = node.GetSource(0);
Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
int index = source.AsInt32();
int intCount = 0;
int vecCount = 0;
for (int cIndex = 0; cIndex < index; cIndex++)
{
OperandType argType = cctx.FuncArgTypes[cIndex];
if (argType.IsInteger())
{
intCount++;
}
else if (argType == OperandType.V128)
{
intCount += 2;
}
else
{
vecCount++;
}
}
bool passOnReg;
if (source.Type.IsInteger())
{
passOnReg = intCount < CallingConvention.GetArgumentsOnRegsCount();
}
else if (source.Type == OperandType.V128)
{
passOnReg = intCount + 1 < CallingConvention.GetArgumentsOnRegsCount();
}
else
{
passOnReg = vecCount < CallingConvention.GetArgumentsOnRegsCount();
}
if (passOnReg)
{
Operand dest = node.Destination;
if (preservedArgs[index] == default)
{
if (dest.Type == OperandType.V128)
{
// V128 is a struct, we pass each half on a GPR if possible.
Operand pArg = Local(OperandType.V128);
Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64);
Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64);
Operation copyL = Operation(Instruction.VectorCreateScalar, pArg, argLReg);
Operation copyH = Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1));
cctx.Cfg.Entry.Operations.AddFirst(copyH);
cctx.Cfg.Entry.Operations.AddFirst(copyL);
preservedArgs[index] = pArg;
}
else
{
Operand pArg = Local(dest.Type);
Operand argReg = dest.Type.IsInteger()
? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type)
: Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type);
Operation copyOp = Operation(Instruction.Copy, pArg, argReg);
cctx.Cfg.Entry.Operations.AddFirst(copyOp);
preservedArgs[index] = pArg;
}
}
Operation nextNode;
if (dest.AssignmentsCount == 1)
{
// Let's propagate the argument if we can to avoid copies.
Propagate(ref buffer, dest, preservedArgs[index]);
nextNode = node.ListNext;
}
else
{
Operation argCopyOp = Operation(Instruction.Copy, dest, preservedArgs[index]);
nextNode = nodes.AddBefore(node, argCopyOp);
}
Delete(nodes, node);
return nextNode;
}
else
{
// TODO: Pass on stack.
return node;
}
}
private static void Propagate(ref Span<Operation> buffer, Operand dest, Operand value)
{
ReadOnlySpan<Operation> uses = dest.GetUses(ref buffer);
foreach (Operation use in uses)
{
for (int srcIndex = 0; srcIndex < use.SourcesCount; srcIndex++)
{
Operand useSrc = use.GetSource(srcIndex);
if (useSrc == dest)
{
use.SetSource(srcIndex, value);
}
else if (useSrc.Kind == OperandKind.Memory)
{
MemoryOperand memoryOp = useSrc.GetMemory();
Operand baseAddr = memoryOp.BaseAddress;
Operand index = memoryOp.Index;
bool changed = false;
if (baseAddr == dest)
{
baseAddr = value;
changed = true;
}
if (index == dest)
{
index = value;
changed = true;
}
if (changed)
{
use.SetSource(srcIndex, MemoryOp(
useSrc.Type,
baseAddr,
index,
memoryOp.Scale,
memoryOp.Displacement));
}
}
}
}
}
private static Operand AddFloatConstantCopy(
ConstantDict constants,
IntrusiveList<Operation> nodes,
Operation node,
Operand source)
{
Operand temp = Local(source.Type);
Operand intConst = AddIntConstantCopy(constants, nodes, node, GetIntConst(source));
Operation copyOp = Operation(Instruction.VectorCreateScalar, temp, intConst);
nodes.AddBefore(node, copyOp);
return temp;
}
private static Operand AddIntConstantCopy(
ConstantDict constants,
IntrusiveList<Operation> nodes,
Operation node,
Operand source)
{
if (constants.TryGetValue(source.Value, source.Type, out Operand temp))
{
return temp;
}
temp = Local(source.Type);
Operation copyOp = Operation(Instruction.Copy, temp, source);
nodes.AddBefore(node, copyOp);
constants.Add(source.Value, source.Type, temp);
return temp;
}
private static Operand GetIntConst(Operand value)
{
if (value.Type == OperandType.FP32)
{
return Const(value.AsInt32());
}
else if (value.Type == OperandType.FP64)
{
return Const(value.AsInt64());
}
return value;
}
private static void Delete(IntrusiveList<Operation> nodes, Operation node)
{
node.Destination = default;
for (int index = 0; index < node.SourcesCount; index++)
{
node.SetSource(index, default);
}
nodes.Remove(node);
}
private static Operand Gpr(int register, OperandType type)
{
return Register(register, RegisterType.Integer, type);
}
private static Operand Xmm(int register, OperandType type)
{
return Register(register, RegisterType.Vector, type);
}
private static bool IsSameOperandDestSrc1(Operation operation)
{
switch (operation.Instruction)
{
case Instruction.Extended:
return IsSameOperandDestSrc1(operation.Intrinsic);
case Instruction.VectorInsert:
case Instruction.VectorInsert16:
case Instruction.VectorInsert8:
return true;
}
return false;
}
private static bool IsSameOperandDestSrc1(Intrinsic intrinsic)
{
IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
return info.Type == IntrinsicType.ScalarBinaryRd ||
info.Type == IntrinsicType.ScalarTernaryFPRdByElem ||
info.Type == IntrinsicType.ScalarTernaryShlRd ||
info.Type == IntrinsicType.ScalarTernaryShrRd ||
info.Type == IntrinsicType.VectorBinaryRd ||
info.Type == IntrinsicType.VectorInsertByElem ||
info.Type == IntrinsicType.VectorTernaryRd ||
info.Type == IntrinsicType.VectorTernaryRdBitwise ||
info.Type == IntrinsicType.VectorTernaryFPRdByElem ||
info.Type == IntrinsicType.VectorTernaryRdByElem ||
info.Type == IntrinsicType.VectorTernaryShlRd ||
info.Type == IntrinsicType.VectorTernaryShrRd;
}
private static bool HasConstSrc1(Operation node, ulong value)
{
switch (node.Instruction)
{
case Instruction.Add:
case Instruction.BranchIf:
case Instruction.Compare:
case Instruction.Subtract:
// The immediate encoding of those instructions does not allow Rn to be
// XZR (it will be SP instead), so we can't allow a Rn constant in this case.
return value == 0 && NotConstOrConst0(node.GetSource(1));
case Instruction.BitwiseAnd:
case Instruction.BitwiseExclusiveOr:
case Instruction.BitwiseNot:
case Instruction.BitwiseOr:
case Instruction.ByteSwap:
case Instruction.CountLeadingZeros:
case Instruction.Multiply:
case Instruction.Negate:
case Instruction.RotateRight:
case Instruction.ShiftLeft:
case Instruction.ShiftRightSI:
case Instruction.ShiftRightUI:
return value == 0;
case Instruction.Copy:
case Instruction.LoadArgument:
case Instruction.Spill:
case Instruction.SpillArg:
return true;
case Instruction.Extended:
return value == 0;
}
return false;
}
private static bool NotConstOrConst0(Operand operand)
{
return operand.Kind != OperandKind.Constant || operand.Value == 0;
}
private static bool HasConstSrc2(Instruction inst, Operand operand)
{
ulong value = operand.Value;
switch (inst)
{
case Instruction.Add:
case Instruction.BranchIf:
case Instruction.Compare:
case Instruction.Subtract:
return ConstFitsOnUImm12Sh(value);
case Instruction.BitwiseAnd:
case Instruction.BitwiseExclusiveOr:
case Instruction.BitwiseOr:
return value == 0 || CodeGenCommon.TryEncodeBitMask(operand, out _, out _, out _);
case Instruction.Multiply:
case Instruction.Store:
case Instruction.Store16:
case Instruction.Store8:
return value == 0;
case Instruction.RotateRight:
case Instruction.ShiftLeft:
case Instruction.ShiftRightSI:
case Instruction.ShiftRightUI:
case Instruction.VectorExtract:
case Instruction.VectorExtract16:
case Instruction.VectorExtract8:
return true;
case Instruction.Extended:
// TODO: Check if actual intrinsic is supposed to have consts here?
// Right now we only hit this case for fixed-point int <-> FP conversion instructions.
return true;
}
return false;
}
private static bool IsCommutative(Operation operation)
{
switch (operation.Instruction)
{
case Instruction.Add:
case Instruction.BitwiseAnd:
case Instruction.BitwiseExclusiveOr:
case Instruction.BitwiseOr:
case Instruction.Multiply:
return true;
case Instruction.BranchIf:
case Instruction.Compare:
{
Operand comp = operation.GetSource(2);
Debug.Assert(comp.Kind == OperandKind.Constant);
var compType = (Comparison)comp.AsInt32();
return compType == Comparison.Equal || compType == Comparison.NotEqual;
}
}
return false;
}
private static bool ConstFitsOnUImm12Sh(ulong value)
{
return (value & ~0xfffUL) == 0 || (value & ~0xfff000UL) == 0;
}
private static bool IsIntrinsicWithConst(Operation operation)
{
bool isIntrinsic = IsIntrinsic(operation.Instruction);
if (isIntrinsic)
{
Intrinsic intrinsic = operation.Intrinsic;
IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
// Those have integer inputs that don't support consts.
return info.Type != IntrinsicType.ScalarFPConvGpr &&
info.Type != IntrinsicType.ScalarFPConvFixedGpr &&
info.Type != IntrinsicType.SetRegister;
}
return false;
}
private static bool IsIntrinsic(Instruction inst)
{
return inst == Instruction.Extended;
}
}
}

View file

@ -90,6 +90,47 @@ namespace ARMeilleure.CodeGen.Optimizations
} }
break; break;
case Instruction.Compare:
if (type == OperandType.I32 &&
operation.GetSource(0).Type == type &&
operation.GetSource(1).Type == type)
{
switch ((Comparison)operation.GetSource(2).Value)
{
case Comparison.Equal:
EvaluateBinaryI32(operation, (x, y) => x == y ? 1 : 0);
break;
case Comparison.NotEqual:
EvaluateBinaryI32(operation, (x, y) => x != y ? 1 : 0);
break;
case Comparison.Greater:
EvaluateBinaryI32(operation, (x, y) => x > y ? 1 : 0);
break;
case Comparison.LessOrEqual:
EvaluateBinaryI32(operation, (x, y) => x <= y ? 1 : 0);
break;
case Comparison.GreaterUI:
EvaluateBinaryI32(operation, (x, y) => (uint)x > (uint)y ? 1 : 0);
break;
case Comparison.LessOrEqualUI:
EvaluateBinaryI32(operation, (x, y) => (uint)x <= (uint)y ? 1 : 0);
break;
case Comparison.GreaterOrEqual:
EvaluateBinaryI32(operation, (x, y) => x >= y ? 1 : 0);
break;
case Comparison.Less:
EvaluateBinaryI32(operation, (x, y) => x < y ? 1 : 0);
break;
case Comparison.GreaterOrEqualUI:
EvaluateBinaryI32(operation, (x, y) => (uint)x >= (uint)y ? 1 : 0);
break;
case Comparison.LessUI:
EvaluateBinaryI32(operation, (x, y) => (uint)x < (uint)y ? 1 : 0);
break;
}
}
break;
case Instruction.Copy: case Instruction.Copy:
if (type == OperandType.I32) if (type == OperandType.I32)
{ {

View file

@ -44,7 +44,7 @@ namespace ARMeilleure.CodeGen.Optimizations
ConstantFolding.RunPass(node); ConstantFolding.RunPass(node);
Simplification.RunPass(node); Simplification.RunPass(node);
if (DestIsLocalVar(node)) if (DestIsSingleLocalVar(node))
{ {
if (IsPropagableCompare(node)) if (IsPropagableCompare(node))
{ {
@ -99,20 +99,6 @@ namespace ARMeilleure.CodeGen.Optimizations
while (modified); while (modified);
} }
private static Span<Operation> GetUses(ref Span<Operation> buffer, Operand operand)
{
ReadOnlySpan<Operation> uses = operand.Uses;
if (buffer.Length < uses.Length)
{
buffer = Allocators.Default.AllocateSpan<Operation>((uint)uses.Length);
}
uses.CopyTo(buffer);
return buffer.Slice(0, uses.Length);
}
private static bool PropagateCompare(ref Span<Operation> buffer, Operation compOp) private static bool PropagateCompare(ref Span<Operation> buffer, Operation compOp)
{ {
// Try to propagate Compare operations into their BranchIf uses, when these BranchIf uses are in the form // Try to propagate Compare operations into their BranchIf uses, when these BranchIf uses are in the form
@ -160,7 +146,7 @@ namespace ARMeilleure.CodeGen.Optimizations
Comparison compType = (Comparison)comp.AsInt32(); Comparison compType = (Comparison)comp.AsInt32();
Span<Operation> uses = GetUses(ref buffer, dest); Span<Operation> uses = dest.GetUses(ref buffer);
foreach (Operation use in uses) foreach (Operation use in uses)
{ {
@ -199,7 +185,7 @@ namespace ARMeilleure.CodeGen.Optimizations
Operand dest = copyOp.Destination; Operand dest = copyOp.Destination;
Operand source = copyOp.GetSource(0); Operand source = copyOp.GetSource(0);
Span<Operation> uses = GetUses(ref buffer, dest); Span<Operation> uses = dest.GetUses(ref buffer);
foreach (Operation use in uses) foreach (Operation use in uses)
{ {
@ -231,12 +217,12 @@ namespace ARMeilleure.CodeGen.Optimizations
private static bool IsUnused(Operation node) private static bool IsUnused(Operation node)
{ {
return DestIsLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node); return DestIsSingleLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node);
} }
private static bool DestIsLocalVar(Operation node) private static bool DestIsSingleLocalVar(Operation node)
{ {
return node.Destination != default && node.Destination.Kind == OperandKind.LocalVariable; return node.DestinationsCount == 1 && node.Destination.Kind == OperandKind.LocalVariable;
} }
private static bool HasSideEffects(Operation node) private static bool HasSideEffects(Operation node)

View file

@ -17,8 +17,6 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
private const int InstructionGap = 2; private const int InstructionGap = 2;
private const int InstructionGapMask = InstructionGap - 1; private const int InstructionGapMask = InstructionGap - 1;
private const int RegistersCount = 16;
private HashSet<int> _blockEdges; private HashSet<int> _blockEdges;
private LiveRange[] _blockRanges; private LiveRange[] _blockRanges;
private BitMap[] _blockLiveIn; private BitMap[] _blockLiveIn;
@ -59,7 +57,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
void PopulateFreePositions(RegisterType type, out int[] positions, out int count) void PopulateFreePositions(RegisterType type, out int[] positions, out int count)
{ {
positions = new int[RegistersCount]; positions = new int[masks.RegistersCount];
count = BitOperations.PopCount((uint)masks.GetAvailableRegisters(type)); count = BitOperations.PopCount((uint)masks.GetAvailableRegisters(type));
int mask = masks.GetAvailableRegisters(type); int mask = masks.GetAvailableRegisters(type);
@ -115,7 +113,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
StackAllocator stackAlloc, StackAllocator stackAlloc,
RegisterMasks regMasks) RegisterMasks regMasks)
{ {
NumberLocals(cfg); NumberLocals(cfg, regMasks.RegistersCount);
var context = new AllocationContext(stackAlloc, regMasks, _intervals.Count); var context = new AllocationContext(stackAlloc, regMasks, _intervals.Count);
@ -134,22 +132,25 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{ {
context.Active.Set(index); context.Active.Set(index);
if (current.Register.Type == RegisterType.Integer) if (current.IsFixedAndUsed)
{ {
context.IntUsedRegisters |= 1 << current.Register.Index; if (current.Register.Type == RegisterType.Integer)
} {
else /* if (interval.Register.Type == RegisterType.Vector) */ context.IntUsedRegisters |= 1 << current.Register.Index;
{ }
context.VecUsedRegisters |= 1 << current.Register.Index; else /* if (interval.Register.Type == RegisterType.Vector) */
{
context.VecUsedRegisters |= 1 << current.Register.Index;
}
} }
continue; continue;
} }
AllocateInterval(context, current, index); AllocateInterval(context, current, index, regMasks.RegistersCount);
} }
for (int index = RegistersCount * 2; index < _intervals.Count; index++) for (int index = regMasks.RegistersCount * 2; index < _intervals.Count; index++)
{ {
if (!_intervals[index].IsSpilled) if (!_intervals[index].IsSpilled)
{ {
@ -163,7 +164,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
return new AllocationResult(context.IntUsedRegisters, context.VecUsedRegisters, context.StackAlloc.TotalSize); return new AllocationResult(context.IntUsedRegisters, context.VecUsedRegisters, context.StackAlloc.TotalSize);
} }
private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex) private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
{ {
// Check active intervals that already ended. // Check active intervals that already ended.
foreach (int iIndex in context.Active) foreach (int iIndex in context.Active)
@ -199,17 +200,17 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
} }
} }
if (!TryAllocateRegWithoutSpill(context, current, cIndex)) if (!TryAllocateRegWithoutSpill(context, current, cIndex, registersCount))
{ {
AllocateRegWithSpill(context, current, cIndex); AllocateRegWithSpill(context, current, cIndex, registersCount);
} }
} }
private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex) private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
{ {
RegisterType regType = current.Local.Type.ToRegisterType(); RegisterType regType = current.Local.Type.ToRegisterType();
Span<int> freePositions = stackalloc int[RegistersCount]; Span<int> freePositions = stackalloc int[registersCount];
context.GetFreePositions(regType, freePositions, out int freePositionsCount); context.GetFreePositions(regType, freePositions, out int freePositionsCount);
@ -278,7 +279,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{ {
Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
InsertInterval(splitChild); InsertInterval(splitChild, registersCount);
} }
else else
{ {
@ -302,12 +303,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
return true; return true;
} }
private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex) private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
{ {
RegisterType regType = current.Local.Type.ToRegisterType(); RegisterType regType = current.Local.Type.ToRegisterType();
Span<int> usePositions = stackalloc int[RegistersCount]; Span<int> usePositions = stackalloc int[registersCount];
Span<int> blockedPositions = stackalloc int[RegistersCount]; Span<int> blockedPositions = stackalloc int[registersCount];
context.GetFreePositions(regType, usePositions, out _); context.GetFreePositions(regType, usePositions, out _);
context.GetFreePositions(regType, blockedPositions, out _); context.GetFreePositions(regType, blockedPositions, out _);
@ -386,7 +387,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
InsertInterval(splitChild); InsertInterval(splitChild, registersCount);
Spill(context, current); Spill(context, current);
} }
@ -396,7 +397,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
// so we only need to split the intervals using the selected register. // so we only need to split the intervals using the selected register.
current.Register = new Register(selectedReg, regType); current.Register = new Register(selectedReg, regType);
SplitAndSpillOverlappingIntervals(context, current); SplitAndSpillOverlappingIntervals(context, current, registersCount);
context.Active.Set(cIndex); context.Active.Set(cIndex);
} }
@ -417,14 +418,14 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{ {
Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
InsertInterval(splitChild); InsertInterval(splitChild, registersCount);
} }
else else
{ {
Spill(context, splitChild); Spill(context, splitChild);
} }
SplitAndSpillOverlappingIntervals(context, current); SplitAndSpillOverlappingIntervals(context, current, registersCount);
context.Active.Set(cIndex); context.Active.Set(cIndex);
} }
@ -460,7 +461,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
return selected; return selected;
} }
private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current) private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current, int registersCount)
{ {
foreach (int iIndex in context.Active) foreach (int iIndex in context.Active)
{ {
@ -468,7 +469,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
if (!interval.IsFixed && interval.Register == current.Register) if (!interval.IsFixed && interval.Register == current.Register)
{ {
SplitAndSpillOverlappingInterval(context, current, interval); SplitAndSpillOverlappingInterval(context, current, interval, registersCount);
context.Active.Clear(iIndex); context.Active.Clear(iIndex);
} }
@ -480,7 +481,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current)) if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current))
{ {
SplitAndSpillOverlappingInterval(context, current, interval); SplitAndSpillOverlappingInterval(context, current, interval, registersCount);
context.Inactive.Clear(iIndex); context.Inactive.Clear(iIndex);
} }
@ -490,7 +491,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
private void SplitAndSpillOverlappingInterval( private void SplitAndSpillOverlappingInterval(
AllocationContext context, AllocationContext context,
LiveInterval current, LiveInterval current,
LiveInterval interval) LiveInterval interval,
int registersCount)
{ {
// If there's a next use after the start of the current interval, // If there's a next use after the start of the current interval,
// we need to split the spilled interval twice, and re-insert it // we need to split the spilled interval twice, and re-insert it
@ -522,7 +524,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
splitChild = right; splitChild = right;
} }
InsertInterval(splitChild); InsertInterval(splitChild, registersCount);
} }
else else
{ {
@ -530,13 +532,13 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
} }
} }
private void InsertInterval(LiveInterval interval) private void InsertInterval(LiveInterval interval, int registersCount)
{ {
Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses."); Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses.");
Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval."); Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval.");
Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval."); Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval.");
int startIndex = RegistersCount * 2; int startIndex = registersCount * 2;
int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null); int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null);
@ -790,12 +792,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
return _operationNodes[position / InstructionGap]; return _operationNodes[position / InstructionGap];
} }
private void NumberLocals(ControlFlowGraph cfg) private void NumberLocals(ControlFlowGraph cfg, int registersCount)
{ {
_operationNodes = new List<(IntrusiveList<Operation>, Operation)>(); _operationNodes = new List<(IntrusiveList<Operation>, Operation)>();
_intervals = new List<LiveInterval>(); _intervals = new List<LiveInterval>();
for (int index = 0; index < RegistersCount; index++) for (int index = 0; index < registersCount; index++)
{ {
_intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer))); _intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer)));
_intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector))); _intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector)));
@ -1041,6 +1043,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{ {
LiveInterval interval = _intervals[GetOperandId(dest)]; LiveInterval interval = _intervals[GetOperandId(dest)];
if (interval.IsFixed)
{
interval.IsFixedAndUsed = true;
}
interval.SetStart(operationPos + 1); interval.SetStart(operationPos + 1);
interval.AddUsePosition(operationPos + 1); interval.AddUsePosition(operationPos + 1);
} }

View file

@ -27,6 +27,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
public Register Register; public Register Register;
public bool IsFixed; public bool IsFixed;
public bool IsFixedAndUsed;
} }
private readonly Data* _data; private readonly Data* _data;
@ -44,6 +45,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
public ref int SpillOffset => ref _data->SpillOffset; public ref int SpillOffset => ref _data->SpillOffset;
public bool IsFixed => _data->IsFixed; public bool IsFixed => _data->IsFixed;
public ref bool IsFixedAndUsed => ref _data->IsFixedAndUsed;
public bool IsEmpty => FirstRange == default; public bool IsEmpty => FirstRange == default;
public bool IsSplit => Children.Count != 0; public bool IsSplit => Children.Count != 0;
public bool IsSpilled => SpillOffset != -1; public bool IsSpilled => SpillOffset != -1;

View file

@ -11,6 +11,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
public int VecCallerSavedRegisters { get; } public int VecCallerSavedRegisters { get; }
public int IntCalleeSavedRegisters { get; } public int IntCalleeSavedRegisters { get; }
public int VecCalleeSavedRegisters { get; } public int VecCalleeSavedRegisters { get; }
public int RegistersCount { get; }
public RegisterMasks( public RegisterMasks(
int intAvailableRegisters, int intAvailableRegisters,
@ -18,7 +19,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
int intCallerSavedRegisters, int intCallerSavedRegisters,
int vecCallerSavedRegisters, int vecCallerSavedRegisters,
int intCalleeSavedRegisters, int intCalleeSavedRegisters,
int vecCalleeSavedRegisters) int vecCalleeSavedRegisters,
int registersCount)
{ {
IntAvailableRegisters = intAvailableRegisters; IntAvailableRegisters = intAvailableRegisters;
VecAvailableRegisters = vecAvailableRegisters; VecAvailableRegisters = vecAvailableRegisters;
@ -26,6 +28,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
VecCallerSavedRegisters = vecCallerSavedRegisters; VecCallerSavedRegisters = vecCallerSavedRegisters;
IntCalleeSavedRegisters = intCalleeSavedRegisters; IntCalleeSavedRegisters = intCalleeSavedRegisters;
VecCalleeSavedRegisters = vecCalleeSavedRegisters; VecCalleeSavedRegisters = vecCalleeSavedRegisters;
RegistersCount = registersCount;
} }
public int GetAvailableRegisters(RegisterType type) public int GetAvailableRegisters(RegisterType type)

View file

@ -16,6 +16,7 @@ namespace ARMeilleure.CodeGen.X86
{ {
static class CodeGenerator static class CodeGenerator
{ {
private const int RegistersCount = 16;
private const int PageSize = 0x1000; private const int PageSize = 0x1000;
private const int StackGuardSize = 0x2000; private const int StackGuardSize = 0x2000;
@ -143,7 +144,8 @@ namespace ARMeilleure.CodeGen.X86
CallingConvention.GetIntCallerSavedRegisters(), CallingConvention.GetIntCallerSavedRegisters(),
CallingConvention.GetVecCallerSavedRegisters(), CallingConvention.GetVecCallerSavedRegisters(),
CallingConvention.GetIntCalleeSavedRegisters(), CallingConvention.GetIntCalleeSavedRegisters(),
CallingConvention.GetVecCalleeSavedRegisters()); CallingConvention.GetVecCalleeSavedRegisters(),
RegistersCount);
AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks); AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks);

View file

@ -5,8 +5,6 @@ namespace ARMeilleure.CodeGen.X86
{ {
static class IntrinsicTable static class IntrinsicTable
{ {
private const int BadOp = 0;
private static IntrinsicInfo[] _intrinTable; private static IntrinsicInfo[] _intrinTable;
static IntrinsicTable() static IntrinsicTable()

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
using ARMeilleure.IntermediateRepresentation; using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation; using ARMeilleure.Translation;
using System; using System;
using static ARMeilleure.Instructions.InstEmitFlowHelper; using static ARMeilleure.Instructions.InstEmitFlowHelper;
using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper; using static ARMeilleure.Instructions.InstEmitSimdHelper;
@ -30,7 +31,11 @@ namespace ARMeilleure.Instructions
{ {
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FabsS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitScalarUnaryOpSimd32(context, (m) => EmitScalarUnaryOpSimd32(context, (m) =>
{ {
@ -49,7 +54,11 @@ namespace ARMeilleure.Instructions
if (op.F) if (op.F)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FabsV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitVectorUnaryOpSimd32(context, (m) => EmitVectorUnaryOpSimd32(context, (m) =>
{ {
@ -76,7 +85,11 @@ namespace ARMeilleure.Instructions
public static void Vadd_S(ArmEmitterContext context) public static void Vadd_S(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FaddS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitScalarBinaryOpF32(context, Intrinsic.X86Addss, Intrinsic.X86Addsd); EmitScalarBinaryOpF32(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
} }
@ -92,7 +105,11 @@ namespace ARMeilleure.Instructions
public static void Vadd_V(ArmEmitterContext context) public static void Vadd_V(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FaddV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitVectorBinaryOpF32(context, Intrinsic.X86Addps, Intrinsic.X86Addpd); EmitVectorBinaryOpF32(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
} }
@ -280,7 +297,11 @@ namespace ARMeilleure.Instructions
public static void Vfma_S(ArmEmitterContext context) // Fused. public static void Vfma_S(ArmEmitterContext context) // Fused.
{ {
if (Optimizations.FastFP && Optimizations.UseFma) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS);
}
else if (Optimizations.FastFP && Optimizations.UseFma)
{ {
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmadd231ss, Intrinsic.X86Vfmadd231sd); EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmadd231ss, Intrinsic.X86Vfmadd231sd);
} }
@ -299,7 +320,11 @@ namespace ARMeilleure.Instructions
public static void Vfma_V(ArmEmitterContext context) // Fused. public static void Vfma_V(ArmEmitterContext context) // Fused.
{ {
if (Optimizations.FastFP && Optimizations.UseFma) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV);
}
else if (Optimizations.FastFP && Optimizations.UseFma)
{ {
EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps); EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps);
} }
@ -314,7 +339,11 @@ namespace ARMeilleure.Instructions
public static void Vfms_S(ArmEmitterContext context) // Fused. public static void Vfms_S(ArmEmitterContext context) // Fused.
{ {
if (Optimizations.FastFP && Optimizations.UseFma) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmsubS);
}
else if (Optimizations.FastFP && Optimizations.UseFma)
{ {
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmadd231ss, Intrinsic.X86Vfnmadd231sd); EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmadd231ss, Intrinsic.X86Vfnmadd231sd);
} }
@ -333,7 +362,11 @@ namespace ARMeilleure.Instructions
public static void Vfms_V(ArmEmitterContext context) // Fused. public static void Vfms_V(ArmEmitterContext context) // Fused.
{ {
if (Optimizations.FastFP && Optimizations.UseFma) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV);
}
else if (Optimizations.FastFP && Optimizations.UseFma)
{ {
EmitVectorTernaryOpF32(context, Intrinsic.X86Vfnmadd231ps); EmitVectorTernaryOpF32(context, Intrinsic.X86Vfnmadd231ps);
} }
@ -348,7 +381,11 @@ namespace ARMeilleure.Instructions
public static void Vfnma_S(ArmEmitterContext context) // Fused. public static void Vfnma_S(ArmEmitterContext context) // Fused.
{ {
if (Optimizations.FastFP && Optimizations.UseFma) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS);
}
else if (Optimizations.FastFP && Optimizations.UseFma)
{ {
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd); EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd);
} }
@ -367,7 +404,11 @@ namespace ARMeilleure.Instructions
public static void Vfnms_S(ArmEmitterContext context) // Fused. public static void Vfnms_S(ArmEmitterContext context) // Fused.
{ {
if (Optimizations.FastFP && Optimizations.UseFma) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS);
}
else if (Optimizations.FastFP && Optimizations.UseFma)
{ {
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmsub231ss, Intrinsic.X86Vfmsub231sd); EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmsub231ss, Intrinsic.X86Vfmsub231sd);
} }
@ -419,7 +460,11 @@ namespace ARMeilleure.Instructions
{ {
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
if (Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FnegS);
}
else if (Optimizations.UseSse2)
{ {
EmitScalarUnaryOpSimd32(context, (m) => EmitScalarUnaryOpSimd32(context, (m) =>
{ {
@ -445,7 +490,11 @@ namespace ARMeilleure.Instructions
{ {
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
if (Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FnmulS);
}
else if (Optimizations.UseSse2)
{ {
EmitScalarBinaryOpSimd32(context, (n, m) => EmitScalarBinaryOpSimd32(context, (n, m) =>
{ {
@ -473,7 +522,11 @@ namespace ARMeilleure.Instructions
{ {
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true); EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true);
} }
@ -498,7 +551,11 @@ namespace ARMeilleure.Instructions
{ {
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true); EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true);
} }
@ -525,7 +582,11 @@ namespace ARMeilleure.Instructions
if (op.F) if (op.F)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FnegV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitVectorUnaryOpSimd32(context, (m) => EmitVectorUnaryOpSimd32(context, (m) =>
{ {
@ -554,7 +615,11 @@ namespace ARMeilleure.Instructions
public static void Vdiv_S(ArmEmitterContext context) public static void Vdiv_S(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FdivS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitScalarBinaryOpF32(context, Intrinsic.X86Divss, Intrinsic.X86Divsd); EmitScalarBinaryOpF32(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
} }
@ -573,7 +638,11 @@ namespace ARMeilleure.Instructions
public static void Vmaxnm_S(ArmEmitterContext context) public static void Vmaxnm_S(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse41) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmaxnmS);
}
else if (Optimizations.FastFP && Optimizations.UseSse41)
{ {
EmitSse41MaxMinNumOpF32(context, true, true); EmitSse41MaxMinNumOpF32(context, true, true);
} }
@ -585,7 +654,11 @@ namespace ARMeilleure.Instructions
public static void Vmaxnm_V(ArmEmitterContext context) public static void Vmaxnm_V(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse41) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxnmV);
}
else if (Optimizations.FastFP && Optimizations.UseSse41)
{ {
EmitSse41MaxMinNumOpF32(context, true, false); EmitSse41MaxMinNumOpF32(context, true, false);
} }
@ -597,7 +670,11 @@ namespace ARMeilleure.Instructions
public static void Vminnm_S(ArmEmitterContext context) public static void Vminnm_S(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse41) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FminnmS);
}
else if (Optimizations.FastFP && Optimizations.UseSse41)
{ {
EmitSse41MaxMinNumOpF32(context, false, true); EmitSse41MaxMinNumOpF32(context, false, true);
} }
@ -609,7 +686,11 @@ namespace ARMeilleure.Instructions
public static void Vminnm_V(ArmEmitterContext context) public static void Vminnm_V(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse41) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminnmV);
}
else if (Optimizations.FastFP && Optimizations.UseSse41)
{ {
EmitSse41MaxMinNumOpF32(context, false, false); EmitSse41MaxMinNumOpF32(context, false, false);
} }
@ -621,7 +702,11 @@ namespace ARMeilleure.Instructions
public static void Vmax_V(ArmEmitterContext context) public static void Vmax_V(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitVectorBinaryOpF32(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd); EmitVectorBinaryOpF32(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
} }
@ -664,7 +749,11 @@ namespace ARMeilleure.Instructions
public static void Vmin_V(ArmEmitterContext context) public static void Vmin_V(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitVectorBinaryOpF32(context, Intrinsic.X86Minps, Intrinsic.X86Minpd); EmitVectorBinaryOpF32(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
} }
@ -707,7 +796,11 @@ namespace ARMeilleure.Instructions
public static void Vmla_S(ArmEmitterContext context) public static void Vmla_S(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd); EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
} }
@ -730,7 +823,11 @@ namespace ARMeilleure.Instructions
public static void Vmla_V(ArmEmitterContext context) public static void Vmla_V(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd); EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd);
} }
@ -786,7 +883,11 @@ namespace ARMeilleure.Instructions
public static void Vmls_S(ArmEmitterContext context) public static void Vmls_S(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmlsV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd); EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
} }
@ -809,7 +910,11 @@ namespace ARMeilleure.Instructions
public static void Vmls_V(ArmEmitterContext context) public static void Vmls_V(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd); EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd);
} }
@ -865,7 +970,11 @@ namespace ARMeilleure.Instructions
public static void Vmul_S(ArmEmitterContext context) public static void Vmul_S(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmulS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd); EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
} }
@ -884,7 +993,11 @@ namespace ARMeilleure.Instructions
public static void Vmul_V(ArmEmitterContext context) public static void Vmul_V(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmulV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
} }
@ -975,7 +1088,11 @@ namespace ARMeilleure.Instructions
public static void Vpadd_V(ArmEmitterContext context) public static void Vpadd_V(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FaddpV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Addps); EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Addps);
} }
@ -1008,7 +1125,11 @@ namespace ARMeilleure.Instructions
public static void Vpmax_V(ArmEmitterContext context) public static void Vpmax_V(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FmaxpV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Maxps); EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Maxps);
} }
@ -1038,7 +1159,11 @@ namespace ARMeilleure.Instructions
public static void Vpmin_V(ArmEmitterContext context) public static void Vpmin_V(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FminpV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Minps); EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Minps);
} }
@ -1217,7 +1342,11 @@ namespace ARMeilleure.Instructions
{ {
int sizeF = op.Size & 1; int sizeF = op.Size & 1;
if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrecpeV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
{ {
EmitVectorUnaryOpF32(context, Intrinsic.X86Rcpps, 0); EmitVectorUnaryOpF32(context, Intrinsic.X86Rcpps, 0);
} }
@ -1237,7 +1366,11 @@ namespace ARMeilleure.Instructions
public static void Vrecps(ArmEmitterContext context) public static void Vrecps(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrecpsV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
bool single = (op.Size & 1) == 0; bool single = (op.Size & 1) == 0;
@ -1304,7 +1437,11 @@ namespace ARMeilleure.Instructions
{ {
int sizeF = op.Size & 1; int sizeF = op.Size & 1;
if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrsqrteV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
{ {
EmitVectorUnaryOpF32(context, Intrinsic.X86Rsqrtps, 0); EmitVectorUnaryOpF32(context, Intrinsic.X86Rsqrtps, 0);
} }
@ -1324,7 +1461,11 @@ namespace ARMeilleure.Instructions
public static void Vrsqrts(ArmEmitterContext context) public static void Vrsqrts(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrsqrtsV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
bool single = (op.Size & 1) == 0; bool single = (op.Size & 1) == 0;
@ -1393,7 +1534,11 @@ namespace ARMeilleure.Instructions
public static void Vsqrt_S(ArmEmitterContext context) public static void Vsqrt_S(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FsqrtS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitScalarUnaryOpF32(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd); EmitScalarUnaryOpF32(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
} }
@ -1408,7 +1553,11 @@ namespace ARMeilleure.Instructions
public static void Vsub_S(ArmEmitterContext context) public static void Vsub_S(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FsubS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitScalarBinaryOpF32(context, Intrinsic.X86Subss, Intrinsic.X86Subsd); EmitScalarBinaryOpF32(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
} }
@ -1420,7 +1569,11 @@ namespace ARMeilleure.Instructions
public static void Vsub_V(ArmEmitterContext context) public static void Vsub_V(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FsubV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitVectorBinaryOpF32(context, Intrinsic.X86Subps, Intrinsic.X86Subpd); EmitVectorBinaryOpF32(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
} }

View file

@ -466,12 +466,26 @@ namespace ARMeilleure.Instructions
public static void Fcmp_S(ArmEmitterContext context) public static void Fcmp_S(ArmEmitterContext context)
{ {
EmitFcmpOrFcmpe(context, signalNaNs: false); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: false);
}
else
{
EmitFcmpOrFcmpe(context, signalNaNs: false);
}
} }
public static void Fcmpe_S(ArmEmitterContext context) public static void Fcmpe_S(ArmEmitterContext context)
{ {
EmitFcmpOrFcmpe(context, signalNaNs: true); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: true);
}
else
{
EmitFcmpOrFcmpe(context, signalNaNs: true);
}
} }
private static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs) private static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs)

View file

@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions
{ {
public static void Vceq_V(ArmEmitterContext context) public static void Vceq_V(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, false);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, false); EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, false);
} }
@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions
if (op.F) if (op.F)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, true);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, true); EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, true);
} }
@ -55,7 +63,11 @@ namespace ARMeilleure.Instructions
public static void Vcge_V(ArmEmitterContext context) public static void Vcge_V(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseAvx) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
}
else if (Optimizations.FastFP && Optimizations.UseAvx)
{ {
EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false); EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
} }
@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions
if (op.F) if (op.F)
{ {
if (Optimizations.FastFP && Optimizations.UseAvx) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
}
else if (Optimizations.FastFP && Optimizations.UseAvx)
{ {
EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true); EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
} }
@ -95,7 +111,11 @@ namespace ARMeilleure.Instructions
public static void Vcgt_V(ArmEmitterContext context) public static void Vcgt_V(ArmEmitterContext context)
{ {
if (Optimizations.FastFP && Optimizations.UseAvx) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, false);
}
else if (Optimizations.FastFP && Optimizations.UseAvx)
{ {
EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, false); EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, false);
} }
@ -118,7 +138,11 @@ namespace ARMeilleure.Instructions
if (op.F) if (op.F)
{ {
if (Optimizations.FastFP && Optimizations.UseAvx) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, true);
}
else if (Optimizations.FastFP && Optimizations.UseAvx)
{ {
EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, true); EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, true);
} }
@ -139,7 +163,11 @@ namespace ARMeilleure.Instructions
if (op.F) if (op.F)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThanOrEqual, true);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThanOrEqual, true); EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThanOrEqual, true);
} }
@ -160,7 +188,11 @@ namespace ARMeilleure.Instructions
if (op.F) if (op.F)
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThan, true);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThan, true); EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThan, true);
} }
@ -247,12 +279,26 @@ namespace ARMeilleure.Instructions
public static void Vcmp(ArmEmitterContext context) public static void Vcmp(ArmEmitterContext context)
{ {
EmitVcmpOrVcmpe(context, false); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, false);
}
else
{
EmitVcmpOrVcmpe(context, false);
}
} }
public static void Vcmpe(ArmEmitterContext context) public static void Vcmpe(ArmEmitterContext context)
{ {
EmitVcmpOrVcmpe(context, true); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, true);
}
else
{
EmitVcmpOrVcmpe(context, true);
}
} }
private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs) private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)

View file

@ -164,7 +164,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtas_Gp(ArmEmitterContext context) public static void Fcvtas_Gp(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtasGp);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false); EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
} }
@ -176,7 +180,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtas_S(ArmEmitterContext context) public static void Fcvtas_S(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtasS);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: true); EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
} }
@ -188,7 +196,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtas_V(ArmEmitterContext context) public static void Fcvtas_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtasS);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: false); EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
} }
@ -200,7 +212,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtau_Gp(ArmEmitterContext context) public static void Fcvtau_Gp(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtauGp);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41Fcvtu_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false); EmitSse41Fcvtu_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
} }
@ -212,7 +228,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtau_S(ArmEmitterContext context) public static void Fcvtau_S(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtauS);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: true); EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
} }
@ -224,7 +244,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtau_V(ArmEmitterContext context) public static void Fcvtau_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtauV);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: false); EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
} }
@ -240,7 +264,11 @@ namespace ARMeilleure.Instructions
int sizeF = op.Size & 1; int sizeF = op.Size & 1;
if (Optimizations.UseSse2 && sizeF == 1) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtlV);
}
else if (Optimizations.UseSse2 && sizeF == 1)
{ {
Operand n = GetVec(op.Rn); Operand n = GetVec(op.Rn);
@ -296,7 +324,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtms_Gp(ArmEmitterContext context) public static void Fcvtms_Gp(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmsGp);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false); EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
} }
@ -308,7 +340,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtms_V(ArmEmitterContext context) public static void Fcvtms_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtmsV);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsMinusInfinity, scalar: false); EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsMinusInfinity, scalar: false);
} }
@ -320,7 +356,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtmu_Gp(ArmEmitterContext context) public static void Fcvtmu_Gp(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmuGp);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false); EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
} }
@ -336,7 +376,11 @@ namespace ARMeilleure.Instructions
int sizeF = op.Size & 1; int sizeF = op.Size & 1;
if (Optimizations.UseSse2 && sizeF == 1) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOpFRd(context, Intrinsic.Arm64FcvtnV);
}
else if (Optimizations.UseSse2 && sizeF == 1)
{ {
Operand d = GetVec(op.Rd); Operand d = GetVec(op.Rd);
@ -405,7 +449,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtns_Gp(ArmEmitterContext context) public static void Fcvtns_Gp(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtnsGp);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearest, isFixed: false); EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearest, isFixed: false);
} }
@ -417,7 +465,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtns_S(ArmEmitterContext context) public static void Fcvtns_S(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnsS);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: true); EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: true);
} }
@ -429,7 +481,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtns_V(ArmEmitterContext context) public static void Fcvtns_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnsV);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: false); EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: false);
} }
@ -441,7 +497,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtnu_S(ArmEmitterContext context) public static void Fcvtnu_S(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnuS);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: true); EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: true);
} }
@ -453,7 +513,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtnu_V(ArmEmitterContext context) public static void Fcvtnu_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnuV);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: false); EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: false);
} }
@ -465,7 +529,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtps_Gp(ArmEmitterContext context) public static void Fcvtps_Gp(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpsGp);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false); EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
} }
@ -477,7 +545,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtpu_Gp(ArmEmitterContext context) public static void Fcvtpu_Gp(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpuGp);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false); EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
} }
@ -489,7 +561,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtzs_Gp(ArmEmitterContext context) public static void Fcvtzs_Gp(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzsGp);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: false); EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
} }
@ -501,7 +577,13 @@ namespace ARMeilleure.Instructions
public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context) public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzsGpFixed, op.FBits);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: true); EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
} }
@ -513,7 +595,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtzs_S(ArmEmitterContext context) public static void Fcvtzs_S(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzsS);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: true); EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: true);
} }
@ -525,7 +611,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtzs_V(ArmEmitterContext context) public static void Fcvtzs_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzsV);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false); EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
} }
@ -537,7 +627,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtzs_V_Fixed(ArmEmitterContext context) public static void Fcvtzs_V_Fixed(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzsVFixed, GetFBits(context));
}
else if (Optimizations.UseSse41)
{ {
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false); EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
} }
@ -549,7 +643,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtzu_Gp(ArmEmitterContext context) public static void Fcvtzu_Gp(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzuGp);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: false); EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
} }
@ -561,7 +659,13 @@ namespace ARMeilleure.Instructions
public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context) public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzuGpFixed, op.FBits);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: true); EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
} }
@ -573,7 +677,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtzu_S(ArmEmitterContext context) public static void Fcvtzu_S(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzuS);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: true); EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: true);
} }
@ -585,7 +693,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtzu_V(ArmEmitterContext context) public static void Fcvtzu_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzuV);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false); EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
} }
@ -597,7 +709,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtzu_V_Fixed(ArmEmitterContext context) public static void Fcvtzu_V_Fixed(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzuVFixed, GetFBits(context));
}
else if (Optimizations.UseSse41)
{ {
EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false); EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
} }
@ -609,41 +725,59 @@ namespace ARMeilleure.Instructions
public static void Scvtf_Gp(ArmEmitterContext context) public static void Scvtf_Gp(ArmEmitterContext context)
{ {
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; if (Optimizations.UseAdvSimd)
Operand res = GetIntOrZR(context, op.Rn);
if (op.RegisterSize == RegisterSize.Int32)
{ {
res = context.SignExtend32(OperandType.I64, res); InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64ScvtfGp);
} }
else
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
res = EmitFPConvert(context, res, op.Size, signed: true); Operand res = GetIntOrZR(context, op.Rn);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); if (op.RegisterSize == RegisterSize.Int32)
{
res = context.SignExtend32(OperandType.I64, res);
}
res = EmitFPConvert(context, res, op.Size, signed: true);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
}
} }
public static void Scvtf_Gp_Fixed(ArmEmitterContext context) public static void Scvtf_Gp_Fixed(ArmEmitterContext context)
{ {
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
Operand res = GetIntOrZR(context, op.Rn); if (Optimizations.UseAdvSimd)
if (op.RegisterSize == RegisterSize.Int32)
{ {
res = context.SignExtend32(OperandType.I64, res); InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64ScvtfGpFixed, op.FBits);
} }
else
{
Operand res = GetIntOrZR(context, op.Rn);
res = EmitFPConvert(context, res, op.Size, signed: true); if (op.RegisterSize == RegisterSize.Int32)
{
res = context.SignExtend32(OperandType.I64, res);
}
res = EmitI2fFBitsMul(context, res, op.FBits); res = EmitFPConvert(context, res, op.Size, signed: true);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); res = EmitI2fFBitsMul(context, res, op.FBits);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
}
} }
public static void Scvtf_S(ArmEmitterContext context) public static void Scvtf_S(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64ScvtfS);
}
else if (Optimizations.UseSse2)
{ {
EmitSse2ScvtfOp(context, scalar: true); EmitSse2ScvtfOp(context, scalar: true);
} }
@ -655,7 +789,11 @@ namespace ARMeilleure.Instructions
public static void Scvtf_S_Fixed(ArmEmitterContext context) public static void Scvtf_S_Fixed(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64ScvtfSFixed, GetFBits(context));
}
else if (Optimizations.UseSse2)
{ {
EmitSse2ScvtfOp(context, scalar: true); EmitSse2ScvtfOp(context, scalar: true);
} }
@ -667,7 +805,11 @@ namespace ARMeilleure.Instructions
public static void Scvtf_V(ArmEmitterContext context) public static void Scvtf_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64ScvtfV);
}
else if (Optimizations.UseSse2)
{ {
EmitSse2ScvtfOp(context, scalar: false); EmitSse2ScvtfOp(context, scalar: false);
} }
@ -679,7 +821,11 @@ namespace ARMeilleure.Instructions
public static void Scvtf_V_Fixed(ArmEmitterContext context) public static void Scvtf_V_Fixed(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64ScvtfVFixed, GetFBits(context));
}
else if (Optimizations.UseSse2)
{ {
EmitSse2ScvtfOp(context, scalar: false); EmitSse2ScvtfOp(context, scalar: false);
} }
@ -691,31 +837,49 @@ namespace ARMeilleure.Instructions
public static void Ucvtf_Gp(ArmEmitterContext context) public static void Ucvtf_Gp(ArmEmitterContext context)
{ {
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64UcvtfGp);
}
else
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
Operand res = GetIntOrZR(context, op.Rn); Operand res = GetIntOrZR(context, op.Rn);
res = EmitFPConvert(context, res, op.Size, signed: false); res = EmitFPConvert(context, res, op.Size, signed: false);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
}
} }
public static void Ucvtf_Gp_Fixed(ArmEmitterContext context) public static void Ucvtf_Gp_Fixed(ArmEmitterContext context)
{ {
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
Operand res = GetIntOrZR(context, op.Rn); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64UcvtfGpFixed, op.FBits);
}
else
{
Operand res = GetIntOrZR(context, op.Rn);
res = EmitFPConvert(context, res, op.Size, signed: false); res = EmitFPConvert(context, res, op.Size, signed: false);
res = EmitI2fFBitsMul(context, res, op.FBits); res = EmitI2fFBitsMul(context, res, op.FBits);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
}
} }
public static void Ucvtf_S(ArmEmitterContext context) public static void Ucvtf_S(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64UcvtfS);
}
else if (Optimizations.UseSse2)
{ {
EmitSse2UcvtfOp(context, scalar: true); EmitSse2UcvtfOp(context, scalar: true);
} }
@ -727,7 +891,11 @@ namespace ARMeilleure.Instructions
public static void Ucvtf_S_Fixed(ArmEmitterContext context) public static void Ucvtf_S_Fixed(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64UcvtfSFixed, GetFBits(context));
}
else if (Optimizations.UseSse2)
{ {
EmitSse2UcvtfOp(context, scalar: true); EmitSse2UcvtfOp(context, scalar: true);
} }
@ -739,7 +907,11 @@ namespace ARMeilleure.Instructions
public static void Ucvtf_V(ArmEmitterContext context) public static void Ucvtf_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64UcvtfV);
}
else if (Optimizations.UseSse2)
{ {
EmitSse2UcvtfOp(context, scalar: false); EmitSse2UcvtfOp(context, scalar: false);
} }
@ -751,7 +923,11 @@ namespace ARMeilleure.Instructions
public static void Ucvtf_V_Fixed(ArmEmitterContext context) public static void Ucvtf_V_Fixed(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64UcvtfVFixed, GetFBits(context));
}
else if (Optimizations.UseSse2)
{ {
EmitSse2UcvtfOp(context, scalar: false); EmitSse2UcvtfOp(context, scalar: false);
} }

View file

@ -59,7 +59,11 @@ namespace ARMeilleure.Instructions
if (toInteger) if (toInteger)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuV : Intrinsic.Arm64FcvtzsV);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned); EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned);
} }
@ -153,7 +157,28 @@ namespace ARMeilleure.Instructions
bool unsigned = (op.Opc2 & 1) == 0; bool unsigned = (op.Opc2 & 1) == 0;
bool roundWithFpscr = op.Opc != 1; bool roundWithFpscr = op.Opc != 1;
if (!roundWithFpscr && Optimizations.UseSse41) if (!roundWithFpscr && Optimizations.UseAdvSimd)
{
bool doubleSize = floatSize == OperandType.FP64;
if (doubleSize)
{
Operand m = GetVecA32(op.Vm >> 1);
Operand toConvert = InstEmitSimdHelper32Arm64.EmitExtractScalar(context, m, op.Vm, doubleSize);
Intrinsic inst = (unsigned ? Intrinsic.Arm64FcvtzuGp : Intrinsic.Arm64FcvtzsGp) | Intrinsic.Arm64VDouble;
Operand asInteger = context.AddIntrinsicInt(inst, toConvert);
InsertScalar(context, op.Vd, asInteger);
}
else
{
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuS : Intrinsic.Arm64FcvtzsS);
}
}
else if (!roundWithFpscr && Optimizations.UseSse41)
{ {
EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned); EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned);
} }
@ -231,7 +256,34 @@ namespace ARMeilleure.Instructions
bool unsigned = op.Opc == 0; bool unsigned = op.Opc == 0;
int rm = op.Opc2 & 3; int rm = op.Opc2 & 3;
if (Optimizations.UseSse41) Intrinsic inst;
if (Optimizations.UseAdvSimd)
{
if (unsigned)
{
inst = rm switch {
0b00 => Intrinsic.Arm64FcvtauS,
0b01 => Intrinsic.Arm64FcvtnuS,
0b10 => Intrinsic.Arm64FcvtpuS,
0b11 => Intrinsic.Arm64FcvtmuS,
_ => throw new ArgumentOutOfRangeException(nameof(rm))
};
}
else
{
inst = rm switch {
0b00 => Intrinsic.Arm64FcvtasS,
0b01 => Intrinsic.Arm64FcvtnsS,
0b10 => Intrinsic.Arm64FcvtpsS,
0b11 => Intrinsic.Arm64FcvtmsS,
_ => throw new ArgumentOutOfRangeException(nameof(rm))
};
}
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
}
else if (Optimizations.UseSse41)
{ {
EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned); EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned);
} }
@ -338,7 +390,19 @@ namespace ARMeilleure.Instructions
int rm = op.Opc2 & 3; int rm = op.Opc2 & 3;
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
Intrinsic inst = rm switch {
0b00 => Intrinsic.Arm64FrintaS,
0b01 => Intrinsic.Arm64FrintnS,
0b10 => Intrinsic.Arm64FrintpS,
0b11 => Intrinsic.Arm64FrintmS,
_ => throw new ArgumentOutOfRangeException(nameof(rm))
};
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
}
else if (Optimizations.UseSse41)
{ {
EmitScalarUnaryOpSimd32(context, (m) => EmitScalarUnaryOpSimd32(context, (m) =>
{ {
@ -382,12 +446,9 @@ namespace ARMeilleure.Instructions
// VRINTA (vector). // VRINTA (vector).
public static void Vrinta_V(ArmEmitterContext context) public static void Vrinta_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{ {
EmitVectorUnaryOpSimd32(context, (m) => InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintaS);
{
return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: false);
});
} }
else else
{ {
@ -398,7 +459,11 @@ namespace ARMeilleure.Instructions
// VRINTM (vector). // VRINTM (vector).
public static void Vrintm_V(ArmEmitterContext context) public static void Vrintm_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintmS);
}
else if (Optimizations.UseSse2)
{ {
EmitVectorUnaryOpSimd32(context, (m) => EmitVectorUnaryOpSimd32(context, (m) =>
{ {
@ -414,7 +479,11 @@ namespace ARMeilleure.Instructions
// VRINTN (vector). // VRINTN (vector).
public static void Vrintn_V(ArmEmitterContext context) public static void Vrintn_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintnS);
}
else if (Optimizations.UseSse2)
{ {
EmitVectorUnaryOpSimd32(context, (m) => EmitVectorUnaryOpSimd32(context, (m) =>
{ {
@ -430,7 +499,11 @@ namespace ARMeilleure.Instructions
// VRINTP (vector). // VRINTP (vector).
public static void Vrintp_V(ArmEmitterContext context) public static void Vrintp_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintpS);
}
else if (Optimizations.UseSse2)
{ {
EmitVectorUnaryOpSimd32(context, (m) => EmitVectorUnaryOpSimd32(context, (m) =>
{ {
@ -448,7 +521,11 @@ namespace ARMeilleure.Instructions
{ {
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintzS);
}
else if (Optimizations.UseSse2)
{ {
EmitScalarUnaryOpSimd32(context, (m) => EmitScalarUnaryOpSimd32(context, (m) =>
{ {

View file

@ -0,0 +1,366 @@
using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.State;
using ARMeilleure.Translation;
using System;
using System.Diagnostics;
using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
namespace ARMeilleure.Instructions
{
using Func1I = Func<Operand, Operand>;
using Func2I = Func<Operand, Operand, Operand>;
using Func3I = Func<Operand, Operand, Operand, Operand>;
static class InstEmitSimdHelper32Arm64
{
// Intrinsic Helpers
public static Operand EmitMoveDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
{
Debug.Assert(input.Type == OperandType.V128);
int originalSide = originalV & 1;
int targetSide = targetV & 1;
if (originalSide == targetSide)
{
return input;
}
Intrinsic vType = Intrinsic.Arm64VDWord | Intrinsic.Arm64V128;
if (targetSide == 1)
{
return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 0)); // Low to high.
}
else
{
return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 1)); // High to low.
}
}
public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
{
Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
int targetSide = targetV & 1;
Operand idx = Const(targetSide);
return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, idx, value, idx);
}
public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
{
Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
// Insert from index 0 in value to index in target.
int index = reg & (doubleWidth ? 1 : 3);
if (doubleWidth)
{
return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, Const(index), value, Const(0));
}
else
{
return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VWord, target, Const(index), value, Const(0));
}
}
public static Operand EmitExtractScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
{
int index = reg & (doubleWidth ? 1 : 3);
if (index == 0) return target; // Element is already at index 0, so just return the vector directly.
if (doubleWidth)
{
return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VDWord, target, Const(1)); // Extract high (index 1).
}
else
{
return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VWord, target, Const(index)); // Extract element at index.
}
}
// Vector Operand Templates
public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
Operand m = GetVecA32(op.Qm);
Operand d = GetVecA32(op.Qd);
if (!op.Q) // Register swap: move relevant doubleword to destination side.
{
m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
}
Operand res = vectorFunc(m);
if (!op.Q) // Register insert.
{
res = EmitDoubleWordInsert(context, d, res, op.Vd);
}
context.Copy(d, res);
}
public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m));
}
public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc, int side = -1)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
Operand n = GetVecA32(op.Qn);
Operand m = GetVecA32(op.Qm);
Operand d = GetVecA32(op.Qd);
if (side == -1)
{
side = op.Vd;
}
if (!op.Q) // Register swap: move relevant doubleword to destination side.
{
n = EmitMoveDoubleWordToSide(context, n, op.Vn, side);
m = EmitMoveDoubleWordToSide(context, m, op.Vm, side);
}
Operand res = vectorFunc(n, m);
if (!op.Q) // Register insert.
{
if (side != op.Vd)
{
res = EmitMoveDoubleWordToSide(context, res, side, op.Vd);
}
res = EmitDoubleWordInsert(context, d, res, op.Vd);
}
context.Copy(d, res);
}
public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Intrinsic inst)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
}
public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
Operand n = GetVecA32(op.Qn);
Operand m = GetVecA32(op.Qm);
Operand d = GetVecA32(op.Qd);
Operand initialD = d;
if (!op.Q) // Register swap: move relevant doubleword to destination side.
{
n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
}
Operand res = vectorFunc(d, n, m);
if (!op.Q) // Register insert.
{
res = EmitDoubleWordInsert(context, initialD, res, op.Vd);
}
context.Copy(initialD, res);
}
public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m));
}
public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc)
{
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
bool doubleSize = (op.Size & 1) != 0;
int shift = doubleSize ? 1 : 2;
Operand m = GetVecA32(op.Vm >> shift);
Operand d = GetVecA32(op.Vd >> shift);
m = EmitExtractScalar(context, m, op.Vm, doubleSize);
Operand res = scalarFunc(m);
// Insert scalar into vector.
res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
context.Copy(d, res);
}
public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst)
{
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m));
}
public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc)
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
bool doubleSize = (op.Size & 1) != 0;
int shift = doubleSize ? 1 : 2;
Operand n = GetVecA32(op.Vn >> shift);
Operand m = GetVecA32(op.Vm >> shift);
Operand d = GetVecA32(op.Vd >> shift);
n = EmitExtractScalar(context, n, op.Vn, doubleSize);
m = EmitExtractScalar(context, m, op.Vm, doubleSize);
Operand res = scalarFunc(n, m);
// Insert scalar into vector.
res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
context.Copy(d, res);
}
public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst)
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
EmitScalarBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
}
public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc)
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
bool doubleSize = (op.Size & 1) != 0;
int shift = doubleSize ? 1 : 2;
Operand n = GetVecA32(op.Vn >> shift);
Operand m = GetVecA32(op.Vm >> shift);
Operand d = GetVecA32(op.Vd >> shift);
Operand initialD = d;
n = EmitExtractScalar(context, n, op.Vn, doubleSize);
m = EmitExtractScalar(context, m, op.Vm, doubleSize);
d = EmitExtractScalar(context, d, op.Vd, doubleSize);
Operand res = scalarFunc(d, n, m);
// Insert scalar into vector.
res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize);
context.Copy(initialD, res);
}
public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst)
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
EmitScalarTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m));
}
// Pairwise
public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
inst32 |= Intrinsic.Arm64V64 | Intrinsic.Arm64VFloat;
EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst32, n, m), 0);
}
public static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)
{
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
bool cmpWithZero = (op.Opc & 2) != 0;
Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS;
inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
bool doubleSize = (op.Size & 1) != 0;
int shift = doubleSize ? 1 : 2;
Operand n = GetVecA32(op.Vd >> shift);
Operand m = GetVecA32(op.Vm >> shift);
n = EmitExtractScalar(context, n, op.Vd, doubleSize);
m = cmpWithZero ? Const(0) : EmitExtractScalar(context, m, op.Vm, doubleSize);
Operand nzcv = context.AddIntrinsicInt(inst, n, m);
Operand one = Const(1);
SetFpFlag(context, FPState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one));
SetFpFlag(context, FPState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one));
SetFpFlag(context, FPState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one));
SetFpFlag(context, FPState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one));
}
public static void EmitCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
int sizeF = op.Size & 1;
Intrinsic inst;
if (zero)
{
inst = cond switch
{
CmpCondition.Equal => Intrinsic.Arm64FcmeqVz,
CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtVz,
CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeVz,
CmpCondition.LessThan => Intrinsic.Arm64FcmltVz,
CmpCondition.LessThanOrEqual => Intrinsic.Arm64FcmleVz,
_ => throw new InvalidOperationException()
};
}
else {
inst = cond switch
{
CmpCondition.Equal => Intrinsic.Arm64FcmeqV,
CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtV,
CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeV,
_ => throw new InvalidOperationException()
};
}
inst |= (sizeF != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
if (zero)
{
EmitVectorUnaryOpSimd32(context, (m) =>
{
return context.AddIntrinsic(inst, m);
});
}
else
{
EmitVectorBinaryOpSimd32(context, (n, m) =>
{
return context.AddIntrinsic(inst, n, m);
});
}
}
}
}

View file

@ -0,0 +1,720 @@
using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.State;
using ARMeilleure.Translation;
using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
namespace ARMeilleure.Instructions
{
static class InstEmitSimdHelperArm64
{
public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
}
public static void EmitScalarUnaryOpFFromGp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
Operand n = GetIntOrZR(context, op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
}
public static void EmitScalarUnaryOpFToGp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
Operand n = GetVec(op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32
? context.AddIntrinsicInt (inst, n)
: context.AddIntrinsicLong(inst, n));
}
public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
}
public static void EmitScalarBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
}
public static void EmitScalarTernaryOpF(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
Operand a = GetVec(op.Ra);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, a, n, m));
}
public static void EmitScalarTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
}
public static void EmitScalarUnaryOp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
}
public static void EmitScalarBinaryOp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
}
public static void EmitScalarBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
}
public static void EmitScalarTernaryOpRd(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
context.Copy(d, context.AddIntrinsic(inst, d, n, m));
}
public static void EmitScalarShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift)));
}
public static void EmitScalarShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
}
public static void EmitScalarSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
context.SetPendingQcFlagSync();
}
public static void EmitScalarSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
Operand result = context.AddIntrinsic(inst, n);
context.Copy(GetVec(op.Rd), result);
context.SetPendingQcFlagSync();
}
public static void EmitScalarSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
Operand result = context.AddIntrinsic(inst, n, m);
context.Copy(GetVec(op.Rd), result);
context.SetPendingQcFlagSync();
}
public static void EmitScalarSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
Operand result = context.AddIntrinsic(inst, d, n);
context.Copy(GetVec(op.Rd), result);
context.SetPendingQcFlagSync();
}
public static void EmitScalarConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
}
public static void EmitScalarConvertBinaryOpFFromGp(ArmEmitterContext context, Intrinsic inst, int fBits)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetIntOrZR(context, op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
}
public static void EmitScalarConvertBinaryOpFToGp(ArmEmitterContext context, Intrinsic inst, int fBits)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32
? context.AddIntrinsicInt (inst, n, Const(fBits))
: context.AddIntrinsicLong(inst, n, Const(fBits)));
}
public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
}
public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
}
public static void EmitVectorBinaryOpFRd(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
}
public static void EmitVectorBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
}
public static void EmitVectorTernaryOpFRd(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(d, context.AddIntrinsic(inst, d, n, m));
}
public static void EmitVectorTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
}
public static void EmitVectorUnaryOp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
}
public static void EmitVectorBinaryOp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
}
public static void EmitVectorBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
}
public static void EmitVectorBinaryOpByElem(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
}
public static void EmitVectorTernaryOpRd(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(d, context.AddIntrinsic(inst, d, n, m));
}
public static void EmitVectorTernaryOpRdByElem(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
}
public static void EmitVectorShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift)));
}
public static void EmitVectorShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
}
public static void EmitVectorSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
context.SetPendingQcFlagSync();
}
public static void EmitVectorSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
Operand result = context.AddIntrinsic(inst, n);
context.Copy(GetVec(op.Rd), result);
context.SetPendingQcFlagSync();
}
public static void EmitVectorSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
Operand result = context.AddIntrinsic(inst, n, m);
context.Copy(GetVec(op.Rd), result);
context.SetPendingQcFlagSync();
}
public static void EmitVectorSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
Operand result = context.AddIntrinsic(inst, d, n);
context.Copy(GetVec(op.Rd), result);
context.SetPendingQcFlagSync();
}
public static void EmitVectorSaturatingBinaryOpByElem(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
Operand result = context.AddIntrinsic(inst, n, m, Const(op.Index));
context.Copy(GetVec(op.Rd), result);
context.SetPendingQcFlagSync();
}
public static void EmitVectorConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
}
public static void EmitVectorLookupTable(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp;
Operand[] operands = new Operand[op.Size + 1];
operands[op.Size] = GetVec(op.Rm);
for (int index = 0; index < op.Size; index++)
{
operands[index] = GetVec((op.Rn + index) & 0x1F);
}
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, operands));
}
public static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false;
Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS;
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
Operand n = GetVec(op.Rn);
Operand m = cmpWithZero ? Const(0) : GetVec(op.Rm);
Operand nzcv = context.AddIntrinsicInt(inst, n, m);
Operand one = Const(1);
SetFlag(context, PState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one));
SetFlag(context, PState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one));
SetFlag(context, PState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one));
SetFlag(context, PState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one));
}
}
}

View file

@ -14,7 +14,11 @@ namespace ARMeilleure.Instructions
{ {
public static void And_V(ArmEmitterContext context) public static void And_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AndV);
}
else if (Optimizations.UseSse2)
{ {
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions
public static void Bic_V(ArmEmitterContext context) public static void Bic_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64BicV);
}
else if (Optimizations.UseSse2)
{ {
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
@ -98,12 +106,26 @@ namespace ARMeilleure.Instructions
public static void Bif_V(ArmEmitterContext context) public static void Bif_V(ArmEmitterContext context)
{ {
EmitBifBit(context, notRm: true); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BifV);
}
else
{
EmitBifBit(context, notRm: true);
}
} }
public static void Bit_V(ArmEmitterContext context) public static void Bit_V(ArmEmitterContext context)
{ {
EmitBifBit(context, notRm: false); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BitV);
}
else
{
EmitBifBit(context, notRm: false);
}
} }
private static void EmitBifBit(ArmEmitterContext context, bool notRm) private static void EmitBifBit(ArmEmitterContext context, bool notRm)
@ -167,7 +189,11 @@ namespace ARMeilleure.Instructions
public static void Bsl_V(ArmEmitterContext context) public static void Bsl_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BslV);
}
else if (Optimizations.UseSse2)
{ {
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
@ -200,7 +226,11 @@ namespace ARMeilleure.Instructions
public static void Eor_V(ArmEmitterContext context) public static void Eor_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64EorV);
}
else if (Optimizations.UseSse2)
{ {
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
@ -249,7 +279,11 @@ namespace ARMeilleure.Instructions
public static void Orn_V(ArmEmitterContext context) public static void Orn_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV);
}
else if (Optimizations.UseSse2)
{ {
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
@ -280,7 +314,11 @@ namespace ARMeilleure.Instructions
public static void Orr_V(ArmEmitterContext context) public static void Orr_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrrV);
}
else if (Optimizations.UseSse2)
{ {
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

View file

@ -13,7 +13,11 @@ namespace ARMeilleure.Instructions
{ {
public static void Vand_I(ArmEmitterContext context) public static void Vand_I(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64AndV | Intrinsic.Arm64V128, n, m));
}
else if (Optimizations.UseSse2)
{ {
EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pand, n, m)); EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pand, n, m));
} }
@ -25,7 +29,11 @@ namespace ARMeilleure.Instructions
public static void Vbic_I(ArmEmitterContext context) public static void Vbic_I(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64BicV | Intrinsic.Arm64V128, n, m));
}
else if (Optimizations.UseSse2)
{ {
EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pandn, m, n)); EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pandn, m, n));
} }
@ -73,17 +81,35 @@ namespace ARMeilleure.Instructions
public static void Vbif(ArmEmitterContext context) public static void Vbif(ArmEmitterContext context)
{ {
EmitBifBit(context, true); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BifV | Intrinsic.Arm64V128, d, n, m));
}
else
{
EmitBifBit(context, true);
}
} }
public static void Vbit(ArmEmitterContext context) public static void Vbit(ArmEmitterContext context)
{ {
EmitBifBit(context, false); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BitV | Intrinsic.Arm64V128, d, n, m));
}
else
{
EmitBifBit(context, false);
}
} }
public static void Vbsl(ArmEmitterContext context) public static void Vbsl(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BslV | Intrinsic.Arm64V128, d, n, m));
}
else if (Optimizations.UseSse2)
{ {
EmitVectorTernaryOpSimd32(context, (d, n, m) => EmitVectorTernaryOpSimd32(context, (d, n, m) =>
{ {
@ -105,7 +131,11 @@ namespace ARMeilleure.Instructions
public static void Veor_I(ArmEmitterContext context) public static void Veor_I(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64EorV | Intrinsic.Arm64V128, n, m));
}
else if (Optimizations.UseSse2)
{ {
EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pxor, n, m)); EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pxor, n, m));
} }
@ -117,7 +147,11 @@ namespace ARMeilleure.Instructions
public static void Vorn_I(ArmEmitterContext context) public static void Vorn_I(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrnV | Intrinsic.Arm64V128, n, m));
}
else if (Optimizations.UseSse2)
{ {
Operand mask = context.VectorOne(); Operand mask = context.VectorOne();
@ -135,7 +169,11 @@ namespace ARMeilleure.Instructions
public static void Vorr_I(ArmEmitterContext context) public static void Vorr_I(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrrV | Intrinsic.Arm64V128, n, m));
}
else if (Optimizations.UseSse2)
{ {
EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Por, n, m)); EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Por, n, m));
} }

View file

@ -392,7 +392,11 @@ namespace ARMeilleure.Instructions
{ {
OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
if (Optimizations.UseSse2) if (Optimizations.UseAdvSimd)
{
EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Zip1V, Intrinsic.Arm64Zip2V);
}
else if (Optimizations.UseSse2)
{ {
EmitVectorShuffleOpSimd32(context, (m, d) => EmitVectorShuffleOpSimd32(context, (m, d) =>
{ {
@ -461,7 +465,11 @@ namespace ARMeilleure.Instructions
{ {
OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
if (Optimizations.UseSsse3) if (Optimizations.UseAdvSimd)
{
EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Uzp1V, Intrinsic.Arm64Uzp2V);
}
else if (Optimizations.UseSsse3)
{ {
EmitVectorShuffleOpSimd32(context, (m, d) => EmitVectorShuffleOpSimd32(context, (m, d) =>
{ {
@ -559,6 +567,52 @@ namespace ARMeilleure.Instructions
} }
} }
private static void EmitVectorZipUzpOpSimd32(ArmEmitterContext context, Intrinsic inst1, Intrinsic inst2)
{
OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
bool overlap = op.Qm == op.Qd;
Operand d = GetVecA32(op.Qd);
Operand m = GetVecA32(op.Qm);
Operand dPart = d;
Operand mPart = m;
if (!op.Q) // Register swap: move relevant doubleword to destination side.
{
dPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, d, op.Vd, 0);
mPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, m, op.Vm, 0);
}
Intrinsic vSize = op.Q ? Intrinsic.Arm64V128 : Intrinsic.Arm64V64;
vSize |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
Operand resD = context.AddIntrinsic(inst1 | vSize, dPart, mPart);
Operand resM = context.AddIntrinsic(inst2 | vSize, dPart, mPart);
if (!op.Q) // Register insert.
{
resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, d, Const(op.Vd & 1), resD, Const(0));
if (overlap)
{
resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, resD, Const(op.Vm & 1), resM, Const(0));
}
else
{
resM = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, m, Const(op.Vm & 1), resM, Const(0));
}
}
context.Copy(d, resD);
if (!overlap)
{
context.Copy(m, resM);
}
}
private static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc) private static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc)
{ {
OpCode32Simd op = (OpCode32Simd)context.CurrOp; OpCode32Simd op = (OpCode32Simd)context.CurrOp;

View file

@ -26,7 +26,15 @@ namespace ARMeilleure.Instructions
public static void Rshrn_V(ArmEmitterContext context) public static void Rshrn_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSsse3) if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64RshrnV, shift);
}
else if (Optimizations.UseSsse3)
{ {
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
@ -80,7 +88,14 @@ namespace ARMeilleure.Instructions
int shift = GetImmShl(op); int shift = GetImmShl(op);
EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64ShlS, shift);
}
else
{
EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
}
} }
public static void Shl_V(ArmEmitterContext context) public static void Shl_V(ArmEmitterContext context)
@ -90,7 +105,11 @@ namespace ARMeilleure.Instructions
int shift = GetImmShl(op); int shift = GetImmShl(op);
int eSize = 8 << op.Size; int eSize = 8 << op.Size;
if (shift >= eSize) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64ShlV, shift);
}
else if (shift >= eSize)
{ {
if ((op.RegisterSize == RegisterSize.Simd64)) if ((op.RegisterSize == RegisterSize.Simd64))
{ {
@ -143,7 +162,11 @@ namespace ARMeilleure.Instructions
int shift = 8 << op.Size; int shift = 8 << op.Size;
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ShllV);
}
else if (Optimizations.UseSse41)
{ {
Operand n = GetVec(op.Rn); Operand n = GetVec(op.Rn);
@ -170,7 +193,15 @@ namespace ARMeilleure.Instructions
public static void Shrn_V(ArmEmitterContext context) public static void Shrn_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSsse3) if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64ShrnV, shift);
}
else if (Optimizations.UseSsse3)
{ {
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
@ -205,89 +236,259 @@ namespace ARMeilleure.Instructions
public static void Sli_S(ArmEmitterContext context) public static void Sli_S(ArmEmitterContext context)
{ {
EmitSli(context, scalar: true); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShl(op);
InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SliS, shift);
}
else
{
EmitSli(context, scalar: true);
}
} }
public static void Sli_V(ArmEmitterContext context) public static void Sli_V(ArmEmitterContext context)
{ {
EmitSli(context, scalar: false); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShl(op);
InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SliV, shift);
}
else
{
EmitSli(context, scalar: false);
}
} }
public static void Sqrshl_V(ArmEmitterContext context) public static void Sqrshl_V(ArmEmitterContext context)
{ {
EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrshlV);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating);
}
} }
public static void Sqrshrn_S(ArmEmitterContext context) public static void Sqrshrn_S(ArmEmitterContext context)
{ {
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnS, shift);
}
else
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
}
} }
public static void Sqrshrn_V(ArmEmitterContext context) public static void Sqrshrn_V(ArmEmitterContext context)
{ {
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnV, shift);
}
else
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
}
} }
public static void Sqrshrun_S(ArmEmitterContext context) public static void Sqrshrun_S(ArmEmitterContext context)
{ {
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunS, shift);
}
else
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
}
} }
public static void Sqrshrun_V(ArmEmitterContext context) public static void Sqrshrun_V(ArmEmitterContext context)
{ {
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunV, shift);
}
else
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
}
} }
public static void Sqshl_V(ArmEmitterContext context) public static void Sqshl_V(ArmEmitterContext context)
{ {
EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqshlV);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating);
}
} }
public static void Sqshrn_S(ArmEmitterContext context) public static void Sqshrn_S(ArmEmitterContext context)
{ {
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnS, shift);
}
else
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
}
} }
public static void Sqshrn_V(ArmEmitterContext context) public static void Sqshrn_V(ArmEmitterContext context)
{ {
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnV, shift);
}
else
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
}
} }
public static void Sqshrun_S(ArmEmitterContext context) public static void Sqshrun_S(ArmEmitterContext context)
{ {
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunS, shift);
}
else
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
}
} }
public static void Sqshrun_V(ArmEmitterContext context) public static void Sqshrun_V(ArmEmitterContext context)
{ {
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunV, shift);
}
else
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
}
} }
public static void Sri_S(ArmEmitterContext context) public static void Sri_S(ArmEmitterContext context)
{ {
EmitSri(context, scalar: true); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SriS, shift);
}
else
{
EmitSri(context, scalar: true);
}
} }
public static void Sri_V(ArmEmitterContext context) public static void Sri_V(ArmEmitterContext context)
{ {
EmitSri(context, scalar: false); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SriV, shift);
}
else
{
EmitSri(context, scalar: false);
}
} }
public static void Srshl_V(ArmEmitterContext context) public static void Srshl_V(ArmEmitterContext context)
{ {
EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrshlV);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round);
}
} }
public static void Srshr_S(ArmEmitterContext context) public static void Srshr_S(ArmEmitterContext context)
{ {
EmitScalarShrImmOpSx(context, ShrImmFlags.Round); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SrshrS, shift);
}
else
{
EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
}
} }
public static void Srshr_V(ArmEmitterContext context) public static void Srshr_V(ArmEmitterContext context)
{ {
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) if (Optimizations.UseAdvSimd)
{
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SrshrV, shift);
}
else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
{ {
int shift = GetImmShr(op); int shift = GetImmShr(op);
int eSize = 8 << op.Size; int eSize = 8 << op.Size;
@ -325,14 +526,31 @@ namespace ARMeilleure.Instructions
public static void Srsra_S(ArmEmitterContext context) public static void Srsra_S(ArmEmitterContext context)
{ {
EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SrsraS, shift);
}
else
{
EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
}
} }
public static void Srsra_V(ArmEmitterContext context) public static void Srsra_V(ArmEmitterContext context)
{ {
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) if (Optimizations.UseAdvSimd)
{
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SrsraV, shift);
}
else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
{ {
int shift = GetImmShr(op); int shift = GetImmShr(op);
int eSize = 8 << op.Size; int eSize = 8 << op.Size;
@ -372,12 +590,26 @@ namespace ARMeilleure.Instructions
public static void Sshl_S(ArmEmitterContext context) public static void Sshl_S(ArmEmitterContext context)
{ {
EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SshlS);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed);
}
} }
public static void Sshl_V(ArmEmitterContext context) public static void Sshl_V(ArmEmitterContext context)
{ {
EmitShlRegOp(context, ShlRegFlags.Signed); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SshlV);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Signed);
}
} }
public static void Sshll_V(ArmEmitterContext context) public static void Sshll_V(ArmEmitterContext context)
@ -386,7 +618,11 @@ namespace ARMeilleure.Instructions
int shift = GetImmShl(op); int shift = GetImmShl(op);
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshllV, shift);
}
else if (Optimizations.UseSse41)
{ {
Operand n = GetVec(op.Rn); Operand n = GetVec(op.Rn);
@ -416,7 +652,18 @@ namespace ARMeilleure.Instructions
public static void Sshr_S(ArmEmitterContext context) public static void Sshr_S(ArmEmitterContext context)
{ {
EmitShrImmOp(context, ShrImmFlags.ScalarSx); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SshrS, shift);
}
else
{
EmitShrImmOp(context, ShrImmFlags.ScalarSx);
}
} }
public static void Sshr_V(ArmEmitterContext context) public static void Sshr_V(ArmEmitterContext context)
@ -425,7 +672,11 @@ namespace ARMeilleure.Instructions
int shift = GetImmShr(op); int shift = GetImmShr(op);
if (Optimizations.UseGfni && op.Size == 0) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshrV, shift);
}
else if (Optimizations.UseGfni && op.Size == 0)
{ {
Operand n = GetVec(op.Rn); Operand n = GetVec(op.Rn);
@ -478,14 +729,31 @@ namespace ARMeilleure.Instructions
public static void Ssra_S(ArmEmitterContext context) public static void Ssra_S(ArmEmitterContext context)
{ {
EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SsraS, shift);
}
else
{
EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
}
} }
public static void Ssra_V(ArmEmitterContext context) public static void Ssra_V(ArmEmitterContext context)
{ {
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) if (Optimizations.UseAdvSimd)
{
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SsraV, shift);
}
else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
{ {
int shift = GetImmShr(op); int shift = GetImmShr(op);
@ -515,49 +783,131 @@ namespace ARMeilleure.Instructions
public static void Uqrshl_V(ArmEmitterContext context) public static void Uqrshl_V(ArmEmitterContext context)
{ {
EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqrshlV);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating);
}
} }
public static void Uqrshrn_S(ArmEmitterContext context) public static void Uqrshrn_S(ArmEmitterContext context)
{ {
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnS, shift);
}
else
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
}
} }
public static void Uqrshrn_V(ArmEmitterContext context) public static void Uqrshrn_V(ArmEmitterContext context)
{ {
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnV, shift);
}
else
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
}
} }
public static void Uqshl_V(ArmEmitterContext context) public static void Uqshl_V(ArmEmitterContext context)
{ {
EmitShlRegOp(context, ShlRegFlags.Saturating); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqshlV);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Saturating);
}
} }
public static void Uqshrn_S(ArmEmitterContext context) public static void Uqshrn_S(ArmEmitterContext context)
{ {
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnS, shift);
}
else
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
}
} }
public static void Uqshrn_V(ArmEmitterContext context) public static void Uqshrn_V(ArmEmitterContext context)
{ {
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnV, shift);
}
else
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
}
} }
public static void Urshl_V(ArmEmitterContext context) public static void Urshl_V(ArmEmitterContext context)
{ {
EmitShlRegOp(context, ShlRegFlags.Round); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrshlV);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Round);
}
} }
public static void Urshr_S(ArmEmitterContext context) public static void Urshr_S(ArmEmitterContext context)
{ {
EmitScalarShrImmOpZx(context, ShrImmFlags.Round); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UrshrS, shift);
}
else
{
EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
}
} }
public static void Urshr_V(ArmEmitterContext context) public static void Urshr_V(ArmEmitterContext context)
{ {
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0) if (Optimizations.UseAdvSimd)
{
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UrshrV, shift);
}
else if (Optimizations.UseSse2 && op.Size > 0)
{ {
int shift = GetImmShr(op); int shift = GetImmShr(op);
int eSize = 8 << op.Size; int eSize = 8 << op.Size;
@ -593,14 +943,31 @@ namespace ARMeilleure.Instructions
public static void Ursra_S(ArmEmitterContext context) public static void Ursra_S(ArmEmitterContext context)
{ {
EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UrsraS, shift);
}
else
{
EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
}
} }
public static void Ursra_V(ArmEmitterContext context) public static void Ursra_V(ArmEmitterContext context)
{ {
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0) if (Optimizations.UseAdvSimd)
{
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UrsraV, shift);
}
else if (Optimizations.UseSse2 && op.Size > 0)
{ {
int shift = GetImmShr(op); int shift = GetImmShr(op);
int eSize = 8 << op.Size; int eSize = 8 << op.Size;
@ -638,12 +1005,26 @@ namespace ARMeilleure.Instructions
public static void Ushl_S(ArmEmitterContext context) public static void Ushl_S(ArmEmitterContext context)
{ {
EmitShlRegOp(context, ShlRegFlags.Scalar); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64UshlS);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Scalar);
}
} }
public static void Ushl_V(ArmEmitterContext context) public static void Ushl_V(ArmEmitterContext context)
{ {
EmitShlRegOp(context, ShlRegFlags.None); if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UshlV);
}
else
{
EmitShlRegOp(context, ShlRegFlags.None);
}
} }
public static void Ushll_V(ArmEmitterContext context) public static void Ushll_V(ArmEmitterContext context)
@ -652,7 +1033,11 @@ namespace ARMeilleure.Instructions
int shift = GetImmShl(op); int shift = GetImmShl(op);
if (Optimizations.UseSse41) if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshllV, shift);
}
else if (Optimizations.UseSse41)
{ {
Operand n = GetVec(op.Rn); Operand n = GetVec(op.Rn);
@ -682,14 +1067,31 @@ namespace ARMeilleure.Instructions
public static void Ushr_S(ArmEmitterContext context) public static void Ushr_S(ArmEmitterContext context)
{ {
EmitShrImmOp(context, ShrImmFlags.ScalarZx); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UshrS, shift);
}
else
{
EmitShrImmOp(context, ShrImmFlags.ScalarZx);
}
} }
public static void Ushr_V(ArmEmitterContext context) public static void Ushr_V(ArmEmitterContext context)
{ {
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0) if (Optimizations.UseAdvSimd)
{
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshrV, shift);
}
else if (Optimizations.UseSse2 && op.Size > 0)
{ {
int shift = GetImmShr(op); int shift = GetImmShr(op);
@ -714,14 +1116,31 @@ namespace ARMeilleure.Instructions
public static void Usra_S(ArmEmitterContext context) public static void Usra_S(ArmEmitterContext context)
{ {
EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate); if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UsraS, shift);
}
else
{
EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
}
} }
public static void Usra_V(ArmEmitterContext context) public static void Usra_V(ArmEmitterContext context)
{ {
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0) if (Optimizations.UseAdvSimd)
{
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UsraV, shift);
}
else if (Optimizations.UseSse2 && op.Size > 0)
{ {
int shift = GetImmShr(op); int shift = GetImmShr(op);

View file

@ -150,6 +150,8 @@ namespace ARMeilleure.Instructions
{ {
OpCodeSystem op = (OpCodeSystem)context.CurrOp; OpCodeSystem op = (OpCodeSystem)context.CurrOp;
context.SyncQcFlag();
Operand fpsr = Const(0); Operand fpsr = Const(0);
for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++) for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
@ -196,6 +198,8 @@ namespace ARMeilleure.Instructions
{ {
OpCodeSystem op = (OpCodeSystem)context.CurrOp; OpCodeSystem op = (OpCodeSystem)context.CurrOp;
context.ClearQcFlagIfModified();
Operand fpsr = GetIntOrZR(context, op.Rt); Operand fpsr = GetIntOrZR(context, op.Rt);
fpsr = context.ConvertI64ToI32(fpsr); fpsr = context.ConvertI64ToI32(fpsr);

View file

@ -2,6 +2,8 @@ namespace ARMeilleure.IntermediateRepresentation
{ {
enum Intrinsic : ushort enum Intrinsic : ushort
{ {
// X86 (SSE and AVX)
X86Addpd, X86Addpd,
X86Addps, X86Addps,
X86Addsd, X86Addsd,
@ -172,6 +174,458 @@ namespace ARMeilleure.IntermediateRepresentation
X86Vfnmsub231sd, X86Vfnmsub231sd,
X86Vfnmsub231ss, X86Vfnmsub231ss,
X86Xorpd, X86Xorpd,
X86Xorps X86Xorps,
// Arm64 (FP and Advanced SIMD)
Arm64AbsS,
Arm64AbsV,
Arm64AddhnV,
Arm64AddpS,
Arm64AddpV,
Arm64AddvV,
Arm64AddS,
Arm64AddV,
Arm64AesdV,
Arm64AeseV,
Arm64AesimcV,
Arm64AesmcV,
Arm64AndV,
Arm64BicVi,
Arm64BicV,
Arm64BifV,
Arm64BitV,
Arm64BslV,
Arm64ClsV,
Arm64ClzV,
Arm64CmeqS,
Arm64CmeqV,
Arm64CmeqSz,
Arm64CmeqVz,
Arm64CmgeS,
Arm64CmgeV,
Arm64CmgeSz,
Arm64CmgeVz,
Arm64CmgtS,
Arm64CmgtV,
Arm64CmgtSz,
Arm64CmgtVz,
Arm64CmhiS,
Arm64CmhiV,
Arm64CmhsS,
Arm64CmhsV,
Arm64CmleSz,
Arm64CmleVz,
Arm64CmltSz,
Arm64CmltVz,
Arm64CmtstS,
Arm64CmtstV,
Arm64CntV,
Arm64DupSe,
Arm64DupVe,
Arm64DupGp,
Arm64EorV,
Arm64ExtV,
Arm64FabdS,
Arm64FabdV,
Arm64FabsV,
Arm64FabsS,
Arm64FacgeS,
Arm64FacgeV,
Arm64FacgtS,
Arm64FacgtV,
Arm64FaddpS,
Arm64FaddpV,
Arm64FaddV,
Arm64FaddS,
Arm64FccmpeS,
Arm64FccmpS,
Arm64FcmeqS,
Arm64FcmeqV,
Arm64FcmeqSz,
Arm64FcmeqVz,
Arm64FcmgeS,
Arm64FcmgeV,
Arm64FcmgeSz,
Arm64FcmgeVz,
Arm64FcmgtS,
Arm64FcmgtV,
Arm64FcmgtSz,
Arm64FcmgtVz,
Arm64FcmleSz,
Arm64FcmleVz,
Arm64FcmltSz,
Arm64FcmltVz,
Arm64FcmpeS,
Arm64FcmpS,
Arm64FcselS,
Arm64FcvtasS,
Arm64FcvtasV,
Arm64FcvtasGp,
Arm64FcvtauS,
Arm64FcvtauV,
Arm64FcvtauGp,
Arm64FcvtlV,
Arm64FcvtmsS,
Arm64FcvtmsV,
Arm64FcvtmsGp,
Arm64FcvtmuS,
Arm64FcvtmuV,
Arm64FcvtmuGp,
Arm64FcvtnsS,
Arm64FcvtnsV,
Arm64FcvtnsGp,
Arm64FcvtnuS,
Arm64FcvtnuV,
Arm64FcvtnuGp,
Arm64FcvtnV,
Arm64FcvtpsS,
Arm64FcvtpsV,
Arm64FcvtpsGp,
Arm64FcvtpuS,
Arm64FcvtpuV,
Arm64FcvtpuGp,
Arm64FcvtxnS,
Arm64FcvtxnV,
Arm64FcvtzsSFixed,
Arm64FcvtzsVFixed,
Arm64FcvtzsS,
Arm64FcvtzsV,
Arm64FcvtzsGpFixed,
Arm64FcvtzsGp,
Arm64FcvtzuSFixed,
Arm64FcvtzuVFixed,
Arm64FcvtzuS,
Arm64FcvtzuV,
Arm64FcvtzuGpFixed,
Arm64FcvtzuGp,
Arm64FcvtS,
Arm64FdivV,
Arm64FdivS,
Arm64FmaddS,
Arm64FmaxnmpS,
Arm64FmaxnmpV,
Arm64FmaxnmvV,
Arm64FmaxnmV,
Arm64FmaxnmS,
Arm64FmaxpS,
Arm64FmaxpV,
Arm64FmaxvV,
Arm64FmaxV,
Arm64FmaxS,
Arm64FminnmpS,
Arm64FminnmpV,
Arm64FminnmvV,
Arm64FminnmV,
Arm64FminnmS,
Arm64FminpS,
Arm64FminpV,
Arm64FminvV,
Arm64FminV,
Arm64FminS,
Arm64FmlaSe,
Arm64FmlaVe,
Arm64FmlaV,
Arm64FmlsSe,
Arm64FmlsVe,
Arm64FmlsV,
Arm64FmovVi,
Arm64FmovS,
Arm64FmovGp,
Arm64FmovSi,
Arm64FmsubS,
Arm64FmulxSe,
Arm64FmulxVe,
Arm64FmulxS,
Arm64FmulxV,
Arm64FmulSe,
Arm64FmulVe,
Arm64FmulV,
Arm64FmulS,
Arm64FnegV,
Arm64FnegS,
Arm64FnmaddS,
Arm64FnmsubS,
Arm64FnmulS,
Arm64FrecpeS,
Arm64FrecpeV,
Arm64FrecpsS,
Arm64FrecpsV,
Arm64FrecpxS,
Arm64FrintaV,
Arm64FrintaS,
Arm64FrintiV,
Arm64FrintiS,
Arm64FrintmV,
Arm64FrintmS,
Arm64FrintnV,
Arm64FrintnS,
Arm64FrintpV,
Arm64FrintpS,
Arm64FrintxV,
Arm64FrintxS,
Arm64FrintzV,
Arm64FrintzS,
Arm64FrsqrteS,
Arm64FrsqrteV,
Arm64FrsqrtsS,
Arm64FrsqrtsV,
Arm64FsqrtV,
Arm64FsqrtS,
Arm64FsubV,
Arm64FsubS,
Arm64InsVe,
Arm64InsGp,
Arm64Ld1rV,
Arm64Ld1Vms,
Arm64Ld1Vss,
Arm64Ld2rV,
Arm64Ld2Vms,
Arm64Ld2Vss,
Arm64Ld3rV,
Arm64Ld3Vms,
Arm64Ld3Vss,
Arm64Ld4rV,
Arm64Ld4Vms,
Arm64Ld4Vss,
Arm64MlaVe,
Arm64MlaV,
Arm64MlsVe,
Arm64MlsV,
Arm64MoviV,
Arm64MrsFpsr,
Arm64MsrFpsr,
Arm64MulVe,
Arm64MulV,
Arm64MvniV,
Arm64NegS,
Arm64NegV,
Arm64NotV,
Arm64OrnV,
Arm64OrrVi,
Arm64OrrV,
Arm64PmullV,
Arm64PmulV,
Arm64RaddhnV,
Arm64RbitV,
Arm64Rev16V,
Arm64Rev32V,
Arm64Rev64V,
Arm64RshrnV,
Arm64RsubhnV,
Arm64SabalV,
Arm64SabaV,
Arm64SabdlV,
Arm64SabdV,
Arm64SadalpV,
Arm64SaddlpV,
Arm64SaddlvV,
Arm64SaddlV,
Arm64SaddwV,
Arm64ScvtfSFixed,
Arm64ScvtfVFixed,
Arm64ScvtfS,
Arm64ScvtfV,
Arm64ScvtfGpFixed,
Arm64ScvtfGp,
Arm64Sha1cV,
Arm64Sha1hV,
Arm64Sha1mV,
Arm64Sha1pV,
Arm64Sha1su0V,
Arm64Sha1su1V,
Arm64Sha256h2V,
Arm64Sha256hV,
Arm64Sha256su0V,
Arm64Sha256su1V,
Arm64ShaddV,
Arm64ShllV,
Arm64ShlS,
Arm64ShlV,
Arm64ShrnV,
Arm64ShsubV,
Arm64SliS,
Arm64SliV,
Arm64SmaxpV,
Arm64SmaxvV,
Arm64SmaxV,
Arm64SminpV,
Arm64SminvV,
Arm64SminV,
Arm64SmlalVe,
Arm64SmlalV,
Arm64SmlslVe,
Arm64SmlslV,
Arm64SmovV,
Arm64SmullVe,
Arm64SmullV,
Arm64SqabsS,
Arm64SqabsV,
Arm64SqaddS,
Arm64SqaddV,
Arm64SqdmlalSe,
Arm64SqdmlalVe,
Arm64SqdmlalS,
Arm64SqdmlalV,
Arm64SqdmlslSe,
Arm64SqdmlslVe,
Arm64SqdmlslS,
Arm64SqdmlslV,
Arm64SqdmulhSe,
Arm64SqdmulhVe,
Arm64SqdmulhS,
Arm64SqdmulhV,
Arm64SqdmullSe,
Arm64SqdmullVe,
Arm64SqdmullS,
Arm64SqdmullV,
Arm64SqnegS,
Arm64SqnegV,
Arm64SqrdmulhSe,
Arm64SqrdmulhVe,
Arm64SqrdmulhS,
Arm64SqrdmulhV,
Arm64SqrshlS,
Arm64SqrshlV,
Arm64SqrshrnS,
Arm64SqrshrnV,
Arm64SqrshrunS,
Arm64SqrshrunV,
Arm64SqshluS,
Arm64SqshluV,
Arm64SqshlSi,
Arm64SqshlVi,
Arm64SqshlS,
Arm64SqshlV,
Arm64SqshrnS,
Arm64SqshrnV,
Arm64SqshrunS,
Arm64SqshrunV,
Arm64SqsubS,
Arm64SqsubV,
Arm64SqxtnS,
Arm64SqxtnV,
Arm64SqxtunS,
Arm64SqxtunV,
Arm64SrhaddV,
Arm64SriS,
Arm64SriV,
Arm64SrshlS,
Arm64SrshlV,
Arm64SrshrS,
Arm64SrshrV,
Arm64SrsraS,
Arm64SrsraV,
Arm64SshllV,
Arm64SshlS,
Arm64SshlV,
Arm64SshrS,
Arm64SshrV,
Arm64SsraS,
Arm64SsraV,
Arm64SsublV,
Arm64SsubwV,
Arm64St1Vms,
Arm64St1Vss,
Arm64St2Vms,
Arm64St2Vss,
Arm64St3Vms,
Arm64St3Vss,
Arm64St4Vms,
Arm64St4Vss,
Arm64SubhnV,
Arm64SubS,
Arm64SubV,
Arm64SuqaddS,
Arm64SuqaddV,
Arm64TblV,
Arm64TbxV,
Arm64Trn1V,
Arm64Trn2V,
Arm64UabalV,
Arm64UabaV,
Arm64UabdlV,
Arm64UabdV,
Arm64UadalpV,
Arm64UaddlpV,
Arm64UaddlvV,
Arm64UaddlV,
Arm64UaddwV,
Arm64UcvtfSFixed,
Arm64UcvtfVFixed,
Arm64UcvtfS,
Arm64UcvtfV,
Arm64UcvtfGpFixed,
Arm64UcvtfGp,
Arm64UhaddV,
Arm64UhsubV,
Arm64UmaxpV,
Arm64UmaxvV,
Arm64UmaxV,
Arm64UminpV,
Arm64UminvV,
Arm64UminV,
Arm64UmlalVe,
Arm64UmlalV,
Arm64UmlslVe,
Arm64UmlslV,
Arm64UmovV,
Arm64UmullVe,
Arm64UmullV,
Arm64UqaddS,
Arm64UqaddV,
Arm64UqrshlS,
Arm64UqrshlV,
Arm64UqrshrnS,
Arm64UqrshrnV,
Arm64UqshlSi,
Arm64UqshlVi,
Arm64UqshlS,
Arm64UqshlV,
Arm64UqshrnS,
Arm64UqshrnV,
Arm64UqsubS,
Arm64UqsubV,
Arm64UqxtnS,
Arm64UqxtnV,
Arm64UrecpeV,
Arm64UrhaddV,
Arm64UrshlS,
Arm64UrshlV,
Arm64UrshrS,
Arm64UrshrV,
Arm64UrsqrteV,
Arm64UrsraS,
Arm64UrsraV,
Arm64UshllV,
Arm64UshlS,
Arm64UshlV,
Arm64UshrS,
Arm64UshrV,
Arm64UsqaddS,
Arm64UsqaddV,
Arm64UsraS,
Arm64UsraV,
Arm64UsublV,
Arm64UsubwV,
Arm64Uzp1V,
Arm64Uzp2V,
Arm64XtnV,
Arm64Zip1V,
Arm64Zip2V,
Arm64VTypeShift = 13,
Arm64VTypeMask = 1 << Arm64VTypeShift,
Arm64V64 = 0 << Arm64VTypeShift,
Arm64V128 = 1 << Arm64VTypeShift,
Arm64VSizeShift = 14,
Arm64VSizeMask = 3 << Arm64VSizeShift,
Arm64VFloat = 0 << Arm64VSizeShift,
Arm64VDouble = 1 << Arm64VSizeShift,
Arm64VByte = 0 << Arm64VSizeShift,
Arm64VHWord = 1 << Arm64VSizeShift,
Arm64VWord = 2 << Arm64VSizeShift,
Arm64VDWord = 3 << Arm64VSizeShift
} }
} }

View file

@ -5,6 +5,7 @@ namespace ARMeilleure.IntermediateRepresentation
x1 = 0, x1 = 0,
x2 = 1, x2 = 1,
x4 = 2, x4 = 2,
x8 = 3 x8 = 3,
x16 = 4
} }
} }

View file

@ -259,6 +259,20 @@ namespace ARMeilleure.IntermediateRepresentation
} }
} }
public Span<Operation> GetUses(ref Span<Operation> buffer)
{
ReadOnlySpan<Operation> uses = Uses;
if (buffer.Length < uses.Length)
{
buffer = Allocators.Default.AllocateSpan<Operation>((uint)uses.Length);
}
uses.CopyTo(buffer);
return buffer.Slice(0, uses.Length);
}
private static void New<T>(ref T* data, ref ushort count, ref ushort capacity, ushort initialCapacity) where T : unmanaged private static void New<T>(ref T* data, ref ushort count, ref ushort capacity, ushort initialCapacity) where T : unmanaged
{ {
count = 0; count = 0;

View file

@ -47,5 +47,19 @@ namespace ARMeilleure.IntermediateRepresentation
throw new InvalidOperationException($"Invalid operand type \"{type}\"."); throw new InvalidOperationException($"Invalid operand type \"{type}\".");
} }
public static int GetSizeInBytesLog2(this OperandType type)
{
switch (type)
{
case OperandType.FP32: return 2;
case OperandType.FP64: return 3;
case OperandType.I32: return 2;
case OperandType.I64: return 3;
case OperandType.V128: return 4;
}
throw new InvalidOperationException($"Invalid operand type \"{type}\".");
}
} }
} }

View file

@ -4,7 +4,7 @@ namespace ARMeilleure.Memory
{ {
class ReservedRegion class ReservedRegion
{ {
private const int DefaultGranularity = 65536; // Mapping granularity in Windows. public const int DefaultGranularity = 65536; // Mapping granularity in Windows.
public IJitMemoryBlock Block { get; } public IJitMemoryBlock Block { get; }

View file

@ -0,0 +1,13 @@
using System;
using System.Runtime.InteropServices;
using System.Runtime.Versioning;
namespace ARMeilleure.Native
{
[SupportedOSPlatform("macos")]
public static partial class JitSupportDarwin
{
[LibraryImport("libarmeilleure-jitsupport", EntryPoint = "armeilleure_jit_memcpy")]
public static partial void Copy(IntPtr dst, IntPtr src, ulong n);
}
}

View file

@ -0,0 +1,8 @@
NAME = libarmeilleure-jitsupport.dylib
all: ${NAME}
${NAME}:
clang -O3 -dynamiclib support.c -o ${NAME}
clean:
rm -f ${NAME}

View file

@ -0,0 +1,14 @@
#include <stddef.h>
#include <string.h>
#include <pthread.h>
#include <libkern/OSCacheControl.h>
void armeilleure_jit_memcpy(void *dst, const void *src, size_t n) {
pthread_jit_write_protect_np(0);
memcpy(dst, src, n);
pthread_jit_write_protect_np(1);
// Ensure that the instruction cache for this range is invalidated.
sys_icache_invalidate(dst, n);
}

View file

@ -1,4 +1,5 @@
using ARMeilleure.CodeGen.X86; using ARMeilleure.CodeGen.X86;
using System.Runtime.Intrinsics.Arm;
namespace ARMeilleure namespace ARMeilleure
{ {
@ -9,6 +10,8 @@ namespace ARMeilleure
public static bool AllowLcqInFunctionTable { get; set; } = true; public static bool AllowLcqInFunctionTable { get; set; } = true;
public static bool UseUnmanagedDispatchLoop { get; set; } = true; public static bool UseUnmanagedDispatchLoop { get; set; } = true;
public static bool UseAdvSimdIfAvailable { get; set; } = true;
public static bool UseSseIfAvailable { get; set; } = true; public static bool UseSseIfAvailable { get; set; } = true;
public static bool UseSse2IfAvailable { get; set; } = true; public static bool UseSse2IfAvailable { get; set; } = true;
public static bool UseSse3IfAvailable { get; set; } = true; public static bool UseSse3IfAvailable { get; set; } = true;
@ -30,6 +33,8 @@ namespace ARMeilleure
set => HardwareCapabilities.ForceLegacySse = value; set => HardwareCapabilities.ForceLegacySse = value;
} }
internal static bool UseAdvSimd => UseAdvSimdIfAvailable && AdvSimd.IsSupported;
internal static bool UseSse => UseSseIfAvailable && HardwareCapabilities.SupportsSse; internal static bool UseSse => UseSseIfAvailable && HardwareCapabilities.SupportsSse;
internal static bool UseSse2 => UseSse2IfAvailable && HardwareCapabilities.SupportsSse2; internal static bool UseSse2 => UseSse2IfAvailable && HardwareCapabilities.SupportsSse2;
internal static bool UseSse3 => UseSse3IfAvailable && HardwareCapabilities.SupportsSse3; internal static bool UseSse3 => UseSse3IfAvailable && HardwareCapabilities.SupportsSse3;

View file

@ -1,5 +1,7 @@
using ARMeilleure.IntermediateRepresentation; using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Memory;
using ARMeilleure.Translation; using ARMeilleure.Translation;
using ARMeilleure.Translation.Cache;
using System; using System;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
@ -69,8 +71,8 @@ namespace ARMeilleure.Signal
private const uint EXCEPTION_ACCESS_VIOLATION = 0xc0000005; private const uint EXCEPTION_ACCESS_VIOLATION = 0xc0000005;
private const ulong PageSize = 0x1000; private static ulong _pageSize = GetPageSize();
private const ulong PageMask = PageSize - 1; private static ulong _pageMask = _pageSize - 1;
private static IntPtr _handlerConfig; private static IntPtr _handlerConfig;
private static IntPtr _signalHandlerPtr; private static IntPtr _signalHandlerPtr;
@ -79,6 +81,19 @@ namespace ARMeilleure.Signal
private static readonly object _lock = new object(); private static readonly object _lock = new object();
private static bool _initialized; private static bool _initialized;
private static ulong GetPageSize()
{
// TODO: This needs to be based on the current memory manager configuration.
if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
return 1UL << 14;
}
else
{
return 1UL << 12;
}
}
static NativeSignalHandler() static NativeSignalHandler()
{ {
_handlerConfig = Marshal.AllocHGlobal(Unsafe.SizeOf<SignalHandlerConfig>()); _handlerConfig = Marshal.AllocHGlobal(Unsafe.SizeOf<SignalHandlerConfig>());
@ -87,7 +102,12 @@ namespace ARMeilleure.Signal
config = new SignalHandlerConfig(); config = new SignalHandlerConfig();
} }
public static void InitializeSignalHandler() public static void InitializeJitCache(IJitMemoryAllocator allocator)
{
JitCache.Initialize(allocator);
}
public static void InitializeSignalHandler(Func<IntPtr, IntPtr, IntPtr> customSignalHandlerFactory = null)
{ {
if (_initialized) return; if (_initialized) return;
@ -95,10 +115,9 @@ namespace ARMeilleure.Signal
{ {
if (_initialized) return; if (_initialized) return;
bool unix = OperatingSystem.IsLinux() || OperatingSystem.IsMacOS();
ref SignalHandlerConfig config = ref GetConfigRef(); ref SignalHandlerConfig config = ref GetConfigRef();
if (unix) if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS())
{ {
// Unix siginfo struct locations. // Unix siginfo struct locations.
// NOTE: These are incredibly likely to be different between kernel version and architectures. // NOTE: These are incredibly likely to be different between kernel version and architectures.
@ -108,7 +127,13 @@ namespace ARMeilleure.Signal
_signalHandlerPtr = Marshal.GetFunctionPointerForDelegate(GenerateUnixSignalHandler(_handlerConfig)); _signalHandlerPtr = Marshal.GetFunctionPointerForDelegate(GenerateUnixSignalHandler(_handlerConfig));
SigAction old = UnixSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr); if (customSignalHandlerFactory != null)
{
_signalHandlerPtr = customSignalHandlerFactory(UnixSignalHandlerRegistration.GetSegfaultExceptionHandler().sa_handler, _signalHandlerPtr);
}
var old = UnixSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr);
config.UnixOldSigaction = (nuint)(ulong)old.sa_handler; config.UnixOldSigaction = (nuint)(ulong)old.sa_handler;
config.UnixOldSigaction3Arg = old.sa_flags & 4; config.UnixOldSigaction3Arg = old.sa_flags & 4;
} }
@ -119,6 +144,11 @@ namespace ARMeilleure.Signal
_signalHandlerPtr = Marshal.GetFunctionPointerForDelegate(GenerateWindowsSignalHandler(_handlerConfig)); _signalHandlerPtr = Marshal.GetFunctionPointerForDelegate(GenerateWindowsSignalHandler(_handlerConfig));
if (customSignalHandlerFactory != null)
{
_signalHandlerPtr = customSignalHandlerFactory(IntPtr.Zero, _signalHandlerPtr);
}
_signalHandlerHandle = WindowsSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr); _signalHandlerHandle = WindowsSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr);
} }
@ -197,7 +227,7 @@ namespace ARMeilleure.Signal
// Only call tracking if in range. // Only call tracking if in range.
context.BranchIfFalse(nextLabel, inRange, BasicBlockFrequency.Cold); context.BranchIfFalse(nextLabel, inRange, BasicBlockFrequency.Cold);
Operand offset = context.BitwiseAnd(context.Subtract(faultAddress, rangeAddress), Const(~PageMask)); Operand offset = context.BitwiseAnd(context.Subtract(faultAddress, rangeAddress), Const(~_pageMask));
// Call the tracking action, with the pointer's relative offset to the base address. // Call the tracking action, with the pointer's relative offset to the base address.
Operand trackingActionPtr = context.Load(OperandType.I64, Const((ulong)signalStructPtr + rangeBaseOffset + 20)); Operand trackingActionPtr = context.Load(OperandType.I64, Const((ulong)signalStructPtr + rangeBaseOffset + 20));
@ -208,7 +238,7 @@ namespace ARMeilleure.Signal
// Tracking action should be non-null to call it, otherwise assume false return. // Tracking action should be non-null to call it, otherwise assume false return.
context.BranchIfFalse(skipActionLabel, trackingActionPtr); context.BranchIfFalse(skipActionLabel, trackingActionPtr);
Operand result = context.Call(trackingActionPtr, OperandType.I32, offset, Const(PageSize), isWrite, Const(0)); Operand result = context.Call(trackingActionPtr, OperandType.I32, offset, Const(_pageSize), isWrite, Const(0));
context.Copy(inRegionLocal, result); context.Copy(inRegionLocal, result);
context.MarkLabel(skipActionLabel); context.MarkLabel(skipActionLabel);
@ -278,7 +308,7 @@ namespace ARMeilleure.Signal
OperandType[] argTypes = new OperandType[] { OperandType.I32, OperandType.I64, OperandType.I64 }; OperandType[] argTypes = new OperandType[] { OperandType.I32, OperandType.I64, OperandType.I64 };
return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq).Map<UnixExceptionHandler>(); return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<UnixExceptionHandler>();
} }
private static VectoredExceptionHandler GenerateWindowsSignalHandler(IntPtr signalStructPtr) private static VectoredExceptionHandler GenerateWindowsSignalHandler(IntPtr signalStructPtr)
@ -332,7 +362,7 @@ namespace ARMeilleure.Signal
OperandType[] argTypes = new OperandType[] { OperandType.I64 }; OperandType[] argTypes = new OperandType[] { OperandType.I64 };
return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq).Map<VectoredExceptionHandler>(); return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<VectoredExceptionHandler>();
} }
} }
} }

View file

@ -1,7 +1,7 @@
using ARMeilleure.IntermediateRepresentation; using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation; using ARMeilleure.Translation;
using System; using System;
using System.Runtime.InteropServices;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory; using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
namespace ARMeilleure.Signal namespace ARMeilleure.Signal
@ -32,7 +32,7 @@ namespace ARMeilleure.Signal
OperandType[] argTypes = new OperandType[] { OperandType.I64 }; OperandType[] argTypes = new OperandType[] { OperandType.I64 };
return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq).Map<DebugPartialUnmap>(); return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DebugPartialUnmap>();
} }
public static DebugThreadLocalMapGetOrReserve GenerateDebugThreadLocalMapGetOrReserve(IntPtr structPtr) public static DebugThreadLocalMapGetOrReserve GenerateDebugThreadLocalMapGetOrReserve(IntPtr structPtr)
@ -49,7 +49,7 @@ namespace ARMeilleure.Signal
OperandType[] argTypes = new OperandType[] { OperandType.I64 }; OperandType[] argTypes = new OperandType[] { OperandType.I64 };
return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq).Map<DebugThreadLocalMapGetOrReserve>(); return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DebugThreadLocalMapGetOrReserve>();
} }
public static DebugNativeWriteLoop GenerateDebugNativeWriteLoop() public static DebugNativeWriteLoop GenerateDebugNativeWriteLoop()
@ -78,7 +78,7 @@ namespace ARMeilleure.Signal
OperandType[] argTypes = new OperandType[] { OperandType.I64 }; OperandType[] argTypes = new OperandType[] { OperandType.I64 };
return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq).Map<DebugNativeWriteLoop>(); return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DebugNativeWriteLoop>();
} }
} }
} }

View file

@ -3,23 +3,23 @@ using System.Runtime.InteropServices;
namespace ARMeilleure.Signal namespace ARMeilleure.Signal
{ {
[StructLayout(LayoutKind.Sequential, Pack = 1)]
unsafe struct SigSet
{
fixed long sa_mask[16];
}
[StructLayout(LayoutKind.Sequential, Pack = 1)]
struct SigAction
{
public IntPtr sa_handler;
public SigSet sa_mask;
public int sa_flags;
public IntPtr sa_restorer;
}
static partial class UnixSignalHandlerRegistration static partial class UnixSignalHandlerRegistration
{ {
[StructLayout(LayoutKind.Sequential, Pack = 1)]
public unsafe struct SigSet
{
fixed long sa_mask[16];
}
[StructLayout(LayoutKind.Sequential, Pack = 1)]
public struct SigAction
{
public IntPtr sa_handler;
public SigSet sa_mask;
public int sa_flags;
public IntPtr sa_restorer;
}
private const int SIGSEGV = 11; private const int SIGSEGV = 11;
private const int SIGBUS = 10; private const int SIGBUS = 10;
private const int SA_SIGINFO = 0x00000004; private const int SA_SIGINFO = 0x00000004;
@ -27,9 +27,24 @@ namespace ARMeilleure.Signal
[LibraryImport("libc", SetLastError = true)] [LibraryImport("libc", SetLastError = true)]
private static partial int sigaction(int signum, ref SigAction sigAction, out SigAction oldAction); private static partial int sigaction(int signum, ref SigAction sigAction, out SigAction oldAction);
[LibraryImport("libc", SetLastError = true)]
private static partial int sigaction(int signum, IntPtr sigAction, out SigAction oldAction);
[LibraryImport("libc", SetLastError = true)] [LibraryImport("libc", SetLastError = true)]
private static partial int sigemptyset(ref SigSet set); private static partial int sigemptyset(ref SigSet set);
public static SigAction GetSegfaultExceptionHandler()
{
int result = sigaction(SIGSEGV, IntPtr.Zero, out SigAction old);
if (result != 0)
{
throw new InvalidOperationException($"Could not get SIGSEGV sigaction. Error: {result}");
}
return old;
}
public static SigAction RegisterExceptionHandler(IntPtr action) public static SigAction RegisterExceptionHandler(IntPtr action)
{ {
SigAction sig = new SigAction SigAction sig = new SigAction
@ -49,7 +64,7 @@ namespace ARMeilleure.Signal
if (OperatingSystem.IsMacOS()) if (OperatingSystem.IsMacOS())
{ {
result = sigaction(SIGBUS, ref sig, out SigAction oldb); result = sigaction(SIGBUS, ref sig, out _);
if (result != 0) if (result != 0)
{ {

View file

@ -39,6 +39,8 @@ namespace ARMeilleure.Translation
} }
} }
private bool _pendingQcFlagSync;
public OpCode CurrOp { get; set; } public OpCode CurrOp { get; set; }
public IMemoryManager Memory { get; } public IMemoryManager Memory { get; }
@ -81,6 +83,8 @@ namespace ARMeilleure.Translation
public override Operand Call(MethodInfo info, params Operand[] callArgs) public override Operand Call(MethodInfo info, params Operand[] callArgs)
{ {
SyncQcFlag();
if (!HasPtc) if (!HasPtc)
{ {
return base.Call(info, callArgs); return base.Call(info, callArgs);
@ -139,6 +143,51 @@ namespace ARMeilleure.Translation
_optOpLastFlagSet = null; _optOpLastFlagSet = null;
} }
public void SetPendingQcFlagSync()
{
_pendingQcFlagSync = true;
}
public void SyncQcFlag()
{
if (_pendingQcFlagSync)
{
if (Optimizations.UseAdvSimd)
{
Operand fpsr = AddIntrinsicInt(Intrinsic.Arm64MrsFpsr);
uint qcFlagMask = (uint)FPSR.Qc;
Operand qcClearLabel = Label();
BranchIfFalse(qcClearLabel, BitwiseAnd(fpsr, Const(qcFlagMask)));
AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0));
InstEmitHelper.SetFpFlag(this, FPState.QcFlag, Const(1));
MarkLabel(qcClearLabel);
}
_pendingQcFlagSync = false;
}
}
public void ClearQcFlag()
{
if (Optimizations.UseAdvSimd)
{
AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0));
}
}
public void ClearQcFlagIfModified()
{
if (_pendingQcFlagSync && Optimizations.UseAdvSimd)
{
AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0));
}
}
public Operand TryGetComparisonResult(Condition condition) public Operand TryGetComparisonResult(Condition condition)
{ {
if (_optOpLastCompare == null || _optOpLastCompare != _optOpLastFlagSet) if (_optOpLastCompare == null || _optOpLastCompare != _optOpLastFlagSet)

View file

@ -1,6 +1,7 @@
using ARMeilleure.CodeGen; using ARMeilleure.CodeGen;
using ARMeilleure.CodeGen.Unwinding; using ARMeilleure.CodeGen.Unwinding;
using ARMeilleure.Memory; using ARMeilleure.Memory;
using ARMeilleure.Native;
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Diagnostics; using System.Diagnostics;
@ -17,6 +18,7 @@ namespace ARMeilleure.Translation.Cache
private const int CacheSize = 2047 * 1024 * 1024; private const int CacheSize = 2047 * 1024 * 1024;
private static ReservedRegion _jitRegion; private static ReservedRegion _jitRegion;
private static JitCacheInvalidation _jitCacheInvalidator;
private static CacheMemoryAllocator _cacheAllocator; private static CacheMemoryAllocator _cacheAllocator;
@ -25,8 +27,6 @@ namespace ARMeilleure.Translation.Cache
private static readonly object _lock = new object(); private static readonly object _lock = new object();
private static bool _initialized; private static bool _initialized;
public static IntPtr Base => _jitRegion.Pointer;
public static void Initialize(IJitMemoryAllocator allocator) public static void Initialize(IJitMemoryAllocator allocator)
{ {
if (_initialized) return; if (_initialized) return;
@ -36,6 +36,7 @@ namespace ARMeilleure.Translation.Cache
if (_initialized) return; if (_initialized) return;
_jitRegion = new ReservedRegion(allocator, CacheSize); _jitRegion = new ReservedRegion(allocator, CacheSize);
_jitCacheInvalidator = new JitCacheInvalidation(allocator);
_cacheAllocator = new CacheMemoryAllocator(CacheSize); _cacheAllocator = new CacheMemoryAllocator(CacheSize);
@ -60,11 +61,24 @@ namespace ARMeilleure.Translation.Cache
IntPtr funcPtr = _jitRegion.Pointer + funcOffset; IntPtr funcPtr = _jitRegion.Pointer + funcOffset;
ReprotectAsWritable(funcOffset, code.Length); if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
unsafe
{
fixed (byte *codePtr = code)
{
JitSupportDarwin.Copy(funcPtr, (IntPtr)codePtr, (ulong)code.Length);
}
}
}
else
{
ReprotectAsWritable(funcOffset, code.Length);
Marshal.Copy(code, 0, funcPtr, code.Length);
ReprotectAsExecutable(funcOffset, code.Length);
Marshal.Copy(code, 0, funcPtr, code.Length); _jitCacheInvalidator.Invalidate(funcPtr, (ulong)code.Length);
}
ReprotectAsExecutable(funcOffset, code.Length);
Add(funcOffset, code.Length, func.UnwindInfo); Add(funcOffset, code.Length, func.UnwindInfo);

View file

@ -0,0 +1,79 @@
using ARMeilleure.Memory;
using System;
using System.Runtime.InteropServices;
namespace ARMeilleure.Translation.Cache
{
class JitCacheInvalidation
{
private static int[] _invalidationCode = new int[]
{
unchecked((int)0xd53b0022), // mrs x2, ctr_el0
unchecked((int)0xd3504c44), // ubfx x4, x2, #16, #4
unchecked((int)0x52800083), // mov w3, #0x4
unchecked((int)0x12000c45), // and w5, w2, #0xf
unchecked((int)0x1ac42064), // lsl w4, w3, w4
unchecked((int)0x51000482), // sub w2, w4, #0x1
unchecked((int)0x8a220002), // bic x2, x0, x2
unchecked((int)0x1ac52063), // lsl w3, w3, w5
unchecked((int)0xeb01005f), // cmp x2, x1
unchecked((int)0x93407c84), // sxtw x4, w4
unchecked((int)0x540000a2), // b.cs 3c <do_ic_clear>
unchecked((int)0xd50b7b22), // dc cvau, x2
unchecked((int)0x8b040042), // add x2, x2, x4
unchecked((int)0xeb02003f), // cmp x1, x2
unchecked((int)0x54ffffa8), // b.hi 2c <dc_clear_loop>
unchecked((int)0xd5033b9f), // dsb ish
unchecked((int)0x51000462), // sub w2, w3, #0x1
unchecked((int)0x93407c63), // sxtw x3, w3
unchecked((int)0x8a220000), // bic x0, x0, x2
unchecked((int)0xeb00003f), // cmp x1, x0
unchecked((int)0x540000a9), // b.ls 64 <exit>
unchecked((int)0xd50b7520), // ic ivau, x0
unchecked((int)0x8b030000), // add x0, x0, x3
unchecked((int)0xeb00003f), // cmp x1, x0
unchecked((int)0x54ffffa8), // b.hi 54 <ic_clear_loop>
unchecked((int)0xd5033b9f), // dsb ish
unchecked((int)0xd5033fdf), // isb
unchecked((int)0xd65f03c0), // ret
};
private delegate void InvalidateCache(ulong start, ulong end);
private InvalidateCache _invalidateCache;
private ReservedRegion _invalidateCacheCodeRegion;
private readonly bool _needsInvalidation;
public JitCacheInvalidation(IJitMemoryAllocator allocator)
{
// On macOS, a different path is used to write to the JIT cache, which does the invalidation.
if (!OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
ulong size = (ulong)_invalidationCode.Length * sizeof(int);
ulong mask = (ulong)ReservedRegion.DefaultGranularity - 1;
size = (size + mask) & ~mask;
_invalidateCacheCodeRegion = new ReservedRegion(allocator, size);
_invalidateCacheCodeRegion.ExpandIfNeeded(size);
Marshal.Copy(_invalidationCode, 0, _invalidateCacheCodeRegion.Pointer, _invalidationCode.Length);
_invalidateCacheCodeRegion.Block.MapAsRx(0, size);
_invalidateCache = Marshal.GetDelegateForFunctionPointer<InvalidateCache>(_invalidateCacheCodeRegion.Pointer);
_needsInvalidation = true;
}
}
public void Invalidate(IntPtr basePointer, ulong size)
{
if (_needsInvalidation)
{
_invalidateCache((ulong)basePointer, (ulong)basePointer + size);
}
}
}
}

View file

@ -1,8 +1,9 @@
using ARMeilleure.CodeGen; using ARMeilleure.CodeGen;
using ARMeilleure.CodeGen.Optimizations; using ARMeilleure.CodeGen.Optimizations;
using ARMeilleure.CodeGen.X86;
using ARMeilleure.Diagnostics; using ARMeilleure.Diagnostics;
using ARMeilleure.IntermediateRepresentation; using ARMeilleure.IntermediateRepresentation;
using System;
using System.Runtime.InteropServices;
namespace ARMeilleure.Translation namespace ARMeilleure.Translation
{ {
@ -12,7 +13,8 @@ namespace ARMeilleure.Translation
ControlFlowGraph cfg, ControlFlowGraph cfg,
OperandType[] argTypes, OperandType[] argTypes,
OperandType retType, OperandType retType,
CompilerOptions options) CompilerOptions options,
Architecture target)
{ {
CompilerContext cctx = new(cfg, argTypes, retType, options); CompilerContext cctx = new(cfg, argTypes, retType, options);
@ -49,7 +51,18 @@ namespace ARMeilleure.Translation
Logger.EndPass(PassName.RegisterToLocal, cfg); Logger.EndPass(PassName.RegisterToLocal, cfg);
} }
return CodeGenerator.Generate(cctx); if (target == Architecture.X64)
{
return CodeGen.X86.CodeGenerator.Generate(cctx);
}
else if (target == Architecture.Arm64)
{
return CodeGen.Arm64.CodeGenerator.Generate(cctx);
}
else
{
throw new NotImplementedException(target.ToString());
}
} }
} }
} }

View file

@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC
private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0";
private const uint InternalVersion = 4159; //! To be incremented manually for each change to the ARMeilleure project. private const uint InternalVersion = 4114; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0"; private const string ActualDir = "0";
private const string BackupDir = "1"; private const string BackupDir = "1";

View file

@ -14,6 +14,7 @@ using System;
using System.Collections.Concurrent; using System.Collections.Concurrent;
using System.Collections.Generic; using System.Collections.Generic;
using System.Diagnostics; using System.Diagnostics;
using System.Runtime.InteropServices;
using System.Threading; using System.Threading;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory; using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
@ -282,7 +283,7 @@ namespace ARMeilleure.Translation
options |= CompilerOptions.Relocatable; options |= CompilerOptions.Relocatable;
} }
CompiledFunction compiledFunc = Compiler.Compile(cfg, argTypes, retType, options); CompiledFunction compiledFunc = Compiler.Compile(cfg, argTypes, retType, options, RuntimeInformation.ProcessArchitecture);
if (context.HasPtc && !singleStep) if (context.HasPtc && !singleStep)
{ {
@ -359,9 +360,14 @@ namespace ARMeilleure.Translation
} }
} }
if (block.Address == context.EntryAddress && !context.HighCq) if (block.Address == context.EntryAddress)
{ {
EmitRejitCheck(context, out counter); if (!context.HighCq)
{
EmitRejitCheck(context, out counter);
}
context.ClearQcFlag();
} }
context.CurrBlock = block; context.CurrBlock = block;
@ -386,9 +392,14 @@ namespace ARMeilleure.Translation
bool isLastOp = opcIndex == block.OpCodes.Count - 1; bool isLastOp = opcIndex == block.OpCodes.Count - 1;
if (isLastOp && block.Branch != null && !block.Branch.Exit && block.Branch.Address <= block.Address) if (isLastOp)
{ {
EmitSynchronization(context); context.SyncQcFlag();
if (block.Branch != null && !block.Branch.Exit && block.Branch.Address <= block.Address)
{
EmitSynchronization(context);
}
} }
Operand lblPredicateSkip = default; Operand lblPredicateSkip = default;

View file

@ -171,7 +171,7 @@ namespace ARMeilleure.Translation
var retType = OperandType.I64; var retType = OperandType.I64;
var argTypes = new[] { OperandType.I64 }; var argTypes = new[] { OperandType.I64 };
var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq).Map<GuestFunction>(); var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<GuestFunction>();
return Marshal.GetFunctionPointerForDelegate(func); return Marshal.GetFunctionPointerForDelegate(func);
} }
@ -197,7 +197,7 @@ namespace ARMeilleure.Translation
var retType = OperandType.I64; var retType = OperandType.I64;
var argTypes = new[] { OperandType.I64 }; var argTypes = new[] { OperandType.I64 };
var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq).Map<GuestFunction>(); var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<GuestFunction>();
return Marshal.GetFunctionPointerForDelegate(func); return Marshal.GetFunctionPointerForDelegate(func);
} }
@ -235,7 +235,7 @@ namespace ARMeilleure.Translation
var retType = OperandType.None; var retType = OperandType.None;
var argTypes = new[] { OperandType.I64, OperandType.I64 }; var argTypes = new[] { OperandType.I64, OperandType.I64 };
return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq).Map<DispatcherFunction>(); return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DispatcherFunction>();
} }
} }
} }

View file

@ -6,6 +6,6 @@ namespace Ryujinx.Cpu.Jit
public class JitMemoryAllocator : IJitMemoryAllocator public class JitMemoryAllocator : IJitMemoryAllocator
{ {
public IJitMemoryBlock Allocate(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.None); public IJitMemoryBlock Allocate(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.None);
public IJitMemoryBlock Reserve(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.Reserve); public IJitMemoryBlock Reserve(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.Reserve | MemoryAllocationFlags.Jit);
} }
} }

View file

@ -35,6 +35,18 @@ namespace Ryujinx.Memory
/// Indicates that the memory block should support mapping views of a mirrorable memory block. /// Indicates that the memory block should support mapping views of a mirrorable memory block.
/// The block that is to have their views mapped should be created with the <see cref="Mirrorable"/> flag. /// The block that is to have their views mapped should be created with the <see cref="Mirrorable"/> flag.
/// </summary> /// </summary>
ViewCompatible = 1 << 3 ViewCompatible = 1 << 3,
/// <summary>
/// If used with the <see cref="Mirrorable"/> flag, indicates that the memory block will only be used as
/// backing storage and will never be accessed directly, so the memory for the block will not be mapped.
/// </summary>
NoMap = 1 << 4,
/// <summary>
/// Indicates that the memory will be used to store JIT generated code.
/// On some platforms, this requires special flags to be passed that will allow the memory to be executable.
/// </summary>
Jit = 1 << 5
} }
} }

View file

@ -1,6 +1,6 @@
using System; using System;
using System.Collections.Concurrent;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading; using System.Threading;
namespace Ryujinx.Memory namespace Ryujinx.Memory
@ -13,10 +13,9 @@ namespace Ryujinx.Memory
private readonly bool _usesSharedMemory; private readonly bool _usesSharedMemory;
private readonly bool _isMirror; private readonly bool _isMirror;
private readonly bool _viewCompatible; private readonly bool _viewCompatible;
private readonly bool _forJit;
private IntPtr _sharedMemory; private IntPtr _sharedMemory;
private IntPtr _pointer; private IntPtr _pointer;
private ConcurrentDictionary<MemoryBlock, byte> _viewStorages;
private int _viewCount;
/// <summary> /// <summary>
/// Pointer to the memory block data. /// Pointer to the memory block data.
@ -40,24 +39,27 @@ namespace Ryujinx.Memory
if (flags.HasFlag(MemoryAllocationFlags.Mirrorable)) if (flags.HasFlag(MemoryAllocationFlags.Mirrorable))
{ {
_sharedMemory = MemoryManagement.CreateSharedMemory(size, flags.HasFlag(MemoryAllocationFlags.Reserve)); _sharedMemory = MemoryManagement.CreateSharedMemory(size, flags.HasFlag(MemoryAllocationFlags.Reserve));
_pointer = MemoryManagement.MapSharedMemory(_sharedMemory, size);
if (!flags.HasFlag(MemoryAllocationFlags.NoMap))
{
_pointer = MemoryManagement.MapSharedMemory(_sharedMemory, size);
}
_usesSharedMemory = true; _usesSharedMemory = true;
} }
else if (flags.HasFlag(MemoryAllocationFlags.Reserve)) else if (flags.HasFlag(MemoryAllocationFlags.Reserve))
{ {
_viewCompatible = flags.HasFlag(MemoryAllocationFlags.ViewCompatible); _viewCompatible = flags.HasFlag(MemoryAllocationFlags.ViewCompatible);
_pointer = MemoryManagement.Reserve(size, _viewCompatible); _forJit = flags.HasFlag(MemoryAllocationFlags.Jit);
_pointer = MemoryManagement.Reserve(size, _forJit, _viewCompatible);
} }
else else
{ {
_pointer = MemoryManagement.Allocate(size); _forJit = flags.HasFlag(MemoryAllocationFlags.Jit);
_pointer = MemoryManagement.Allocate(size, _forJit);
} }
Size = size; Size = size;
_viewStorages = new ConcurrentDictionary<MemoryBlock, byte>();
_viewStorages.TryAdd(this, 0);
_viewCount = 1;
} }
/// <summary> /// <summary>
@ -104,7 +106,7 @@ namespace Ryujinx.Memory
/// <exception cref="InvalidMemoryRegionException">Throw when either <paramref name="offset"/> or <paramref name="size"/> are out of range</exception> /// <exception cref="InvalidMemoryRegionException">Throw when either <paramref name="offset"/> or <paramref name="size"/> are out of range</exception>
public bool Commit(ulong offset, ulong size) public bool Commit(ulong offset, ulong size)
{ {
return MemoryManagement.Commit(GetPointerInternal(offset, size), size); return MemoryManagement.Commit(GetPointerInternal(offset, size), size, _forJit);
} }
/// <summary> /// <summary>
@ -138,11 +140,6 @@ namespace Ryujinx.Memory
throw new ArgumentException("The source memory block is not mirrorable, and thus cannot be mapped on the current block."); throw new ArgumentException("The source memory block is not mirrorable, and thus cannot be mapped on the current block.");
} }
if (_viewStorages.TryAdd(srcBlock, 0))
{
srcBlock.IncrementViewCount();
}
MemoryManagement.MapView(srcBlock._sharedMemory, srcOffset, GetPointerInternal(dstOffset, size), size, this); MemoryManagement.MapView(srcBlock._sharedMemory, srcOffset, GetPointerInternal(dstOffset, size), size, this);
} }
@ -403,33 +400,16 @@ namespace Ryujinx.Memory
{ {
MemoryManagement.Free(ptr, Size); MemoryManagement.Free(ptr, Size);
} }
foreach (MemoryBlock viewStorage in _viewStorages.Keys)
{
viewStorage.DecrementViewCount();
}
_viewStorages.Clear();
} }
}
/// <summary> if (!_isMirror)
/// Increments the number of views that uses this memory block as storage.
/// </summary>
private void IncrementViewCount()
{
Interlocked.Increment(ref _viewCount);
}
/// <summary>
/// Decrements the number of views that uses this memory block as storage.
/// </summary>
private void DecrementViewCount()
{
if (Interlocked.Decrement(ref _viewCount) == 0 && _sharedMemory != IntPtr.Zero && !_isMirror)
{ {
MemoryManagement.DestroySharedMemory(_sharedMemory); IntPtr sharedMemory = Interlocked.Exchange(ref _sharedMemory, IntPtr.Zero);
_sharedMemory = IntPtr.Zero;
if (sharedMemory != IntPtr.Zero)
{
MemoryManagement.DestroySharedMemory(sharedMemory);
}
} }
} }
@ -453,6 +433,16 @@ namespace Ryujinx.Memory
return true; return true;
} }
public static ulong GetPageSize()
{
if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
return 1UL << 14;
}
return 1UL << 12;
}
private static void ThrowInvalidMemoryRegionException() => throw new InvalidMemoryRegionException(); private static void ThrowInvalidMemoryRegionException() => throw new InvalidMemoryRegionException();
} }
} }

View file

@ -4,7 +4,7 @@ namespace Ryujinx.Memory
{ {
public static class MemoryManagement public static class MemoryManagement
{ {
public static IntPtr Allocate(ulong size) public static IntPtr Allocate(ulong size, bool forJit)
{ {
if (OperatingSystem.IsWindows()) if (OperatingSystem.IsWindows())
{ {
@ -12,7 +12,7 @@ namespace Ryujinx.Memory
} }
else if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS()) else if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS())
{ {
return MemoryManagementUnix.Allocate(size); return MemoryManagementUnix.Allocate(size, forJit);
} }
else else
{ {
@ -20,7 +20,7 @@ namespace Ryujinx.Memory
} }
} }
public static IntPtr Reserve(ulong size, bool viewCompatible) public static IntPtr Reserve(ulong size, bool forJit, bool viewCompatible)
{ {
if (OperatingSystem.IsWindows()) if (OperatingSystem.IsWindows())
{ {
@ -28,7 +28,7 @@ namespace Ryujinx.Memory
} }
else if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS()) else if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS())
{ {
return MemoryManagementUnix.Reserve(size); return MemoryManagementUnix.Reserve(size, forJit);
} }
else else
{ {
@ -36,7 +36,7 @@ namespace Ryujinx.Memory
} }
} }
public static bool Commit(IntPtr address, ulong size) public static bool Commit(IntPtr address, ulong size, bool forJit)
{ {
if (OperatingSystem.IsWindows()) if (OperatingSystem.IsWindows())
{ {
@ -44,7 +44,7 @@ namespace Ryujinx.Memory
} }
else if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS()) else if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS())
{ {
return MemoryManagementUnix.Commit(address, size); return MemoryManagementUnix.Commit(address, size, forJit);
} }
else else
{ {

View file

@ -13,17 +13,17 @@ namespace Ryujinx.Memory
{ {
private static readonly ConcurrentDictionary<IntPtr, ulong> _allocations = new ConcurrentDictionary<IntPtr, ulong>(); private static readonly ConcurrentDictionary<IntPtr, ulong> _allocations = new ConcurrentDictionary<IntPtr, ulong>();
public static IntPtr Allocate(ulong size) public static IntPtr Allocate(ulong size, bool forJit)
{ {
return AllocateInternal(size, MmapProts.PROT_READ | MmapProts.PROT_WRITE); return AllocateInternal(size, MmapProts.PROT_READ | MmapProts.PROT_WRITE, forJit);
} }
public static IntPtr Reserve(ulong size) public static IntPtr Reserve(ulong size, bool forJit)
{ {
return AllocateInternal(size, MmapProts.PROT_NONE); return AllocateInternal(size, MmapProts.PROT_NONE, forJit);
} }
private static IntPtr AllocateInternal(ulong size, MmapProts prot, bool shared = false) private static IntPtr AllocateInternal(ulong size, MmapProts prot, bool forJit, bool shared = false)
{ {
MmapFlags flags = MmapFlags.MAP_ANONYMOUS; MmapFlags flags = MmapFlags.MAP_ANONYMOUS;
@ -41,6 +41,16 @@ namespace Ryujinx.Memory
flags |= MmapFlags.MAP_NORESERVE; flags |= MmapFlags.MAP_NORESERVE;
} }
if (OperatingSystem.IsMacOSVersionAtLeast(10, 14) && forJit)
{
flags |= MmapFlags.MAP_JIT_DARWIN;
if (prot == (MmapProts.PROT_READ | MmapProts.PROT_WRITE))
{
prot |= MmapProts.PROT_EXEC;
}
}
IntPtr ptr = mmap(IntPtr.Zero, size, prot, flags, -1, 0); IntPtr ptr = mmap(IntPtr.Zero, size, prot, flags, -1, 0);
if (ptr == new IntPtr(-1L)) if (ptr == new IntPtr(-1L))
@ -57,9 +67,16 @@ namespace Ryujinx.Memory
return ptr; return ptr;
} }
public static bool Commit(IntPtr address, ulong size) public static bool Commit(IntPtr address, ulong size, bool forJit)
{ {
return mprotect(address, size, MmapProts.PROT_READ | MmapProts.PROT_WRITE) == 0; MmapProts prot = MmapProts.PROT_READ | MmapProts.PROT_WRITE;
if (OperatingSystem.IsMacOSVersionAtLeast(10, 14) && forJit)
{
prot |= MmapProts.PROT_EXEC;
}
return mprotect(address, size, prot) == 0;
} }
public static bool Decommit(IntPtr address, ulong size) public static bool Decommit(IntPtr address, ulong size)

View file

@ -22,7 +22,8 @@ namespace Ryujinx.Memory
MAP_ANONYMOUS = 4, MAP_ANONYMOUS = 4,
MAP_NORESERVE = 8, MAP_NORESERVE = 8,
MAP_FIXED = 16, MAP_FIXED = 16,
MAP_UNLOCKED = 32 MAP_UNLOCKED = 32,
MAP_JIT_DARWIN = 0x800
} }
[Flags] [Flags]
@ -45,7 +46,6 @@ namespace Ryujinx.Memory
private const int MAP_UNLOCKED_LINUX_GENERIC = 0x80000; private const int MAP_UNLOCKED_LINUX_GENERIC = 0x80000;
private const int MAP_NORESERVE_DARWIN = 0x40; private const int MAP_NORESERVE_DARWIN = 0x40;
private const int MAP_JIT_DARWIN = 0x800;
private const int MAP_ANONYMOUS_DARWIN = 0x1000; private const int MAP_ANONYMOUS_DARWIN = 0x1000;
public const int MADV_DONTNEED = 4; public const int MADV_DONTNEED = 4;
@ -151,10 +151,9 @@ namespace Ryujinx.Memory
} }
} }
if (OperatingSystem.IsMacOSVersionAtLeast(10, 14)) if (flags.HasFlag(MmapFlags.MAP_JIT_DARWIN) && OperatingSystem.IsMacOSVersionAtLeast(10, 14))
{ {
// Only to be used with the Hardened Runtime. result |= (int)MmapFlags.MAP_JIT_DARWIN;
// result |= MAP_JIT_DARWIN;
} }
return result; return result;