Add support for guest Fz (Fpcr) mode through host Ftz and Daz (Mxcsr) modes (fast paths). (#1630)
* Add support for guest Fz (Fpcr) mode through host Ftz and Daz (Mxcsr) modes (fast paths). * Ptc.InternalVersion = 1630 * Nits. * Address comments. * Update Ptc.cs * Address comment.
This commit is contained in:
parent
668720b088
commit
567ea726e1
14 changed files with 221 additions and 27 deletions
|
@ -131,6 +131,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Add(X86Instruction.Imul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstructionFlags.None));
|
Add(X86Instruction.Imul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstructionFlags.None));
|
||||||
Add(X86Instruction.Insertps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Insertps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Jmp, new InstructionInfo(0x040000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
|
Add(X86Instruction.Jmp, new InstructionInfo(0x040000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
|
||||||
|
Add(X86Instruction.Ldmxcsr, new InstructionInfo(0x02000fae, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex));
|
||||||
Add(X86Instruction.Lea, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x0000008d, InstructionFlags.None));
|
Add(X86Instruction.Lea, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x0000008d, InstructionFlags.None));
|
||||||
Add(X86Instruction.Maxpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Maxpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Maxps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex));
|
Add(X86Instruction.Maxps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex));
|
||||||
|
@ -257,6 +258,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Add(X86Instruction.Sqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex));
|
Add(X86Instruction.Sqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex));
|
||||||
Add(X86Instruction.Sqrtsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF2));
|
Add(X86Instruction.Sqrtsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF2));
|
||||||
Add(X86Instruction.Sqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF3));
|
Add(X86Instruction.Sqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF3));
|
||||||
|
Add(X86Instruction.Stmxcsr, new InstructionInfo(0x03000fae, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex));
|
||||||
Add(X86Instruction.Sub, new InstructionInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstructionFlags.None));
|
Add(X86Instruction.Sub, new InstructionInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstructionFlags.None));
|
||||||
Add(X86Instruction.Subpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Subpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Subps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex));
|
Add(X86Instruction.Subps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex));
|
||||||
|
@ -526,6 +528,11 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
WriteInstruction(dest, null, OperandType.None, X86Instruction.Jmp);
|
WriteInstruction(dest, null, OperandType.None, X86Instruction.Jmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void Ldmxcsr(Operand dest)
|
||||||
|
{
|
||||||
|
WriteInstruction(dest, null, OperandType.I32, X86Instruction.Ldmxcsr);
|
||||||
|
}
|
||||||
|
|
||||||
public void Lea(Operand dest, Operand source, OperandType type)
|
public void Lea(Operand dest, Operand source, OperandType type)
|
||||||
{
|
{
|
||||||
WriteInstruction(dest, source, type, X86Instruction.Lea);
|
WriteInstruction(dest, source, type, X86Instruction.Lea);
|
||||||
|
@ -796,6 +803,11 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
WriteOpCode(dest, null, null, OperandType.None, info.Flags, info.OpRRM | (int)condition);
|
WriteOpCode(dest, null, null, OperandType.None, info.Flags, info.OpRRM | (int)condition);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void Stmxcsr(Operand dest)
|
||||||
|
{
|
||||||
|
WriteInstruction(dest, null, OperandType.I32, X86Instruction.Stmxcsr);
|
||||||
|
}
|
||||||
|
|
||||||
public void Sub(Operand dest, Operand source, OperandType type)
|
public void Sub(Operand dest, Operand source, OperandType type)
|
||||||
{
|
{
|
||||||
WriteInstruction(dest, source, type, X86Instruction.Sub);
|
WriteInstruction(dest, source, type, X86Instruction.Sub);
|
||||||
|
|
|
@ -250,6 +250,40 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case IntrinsicType.Mxcsr:
|
||||||
|
{
|
||||||
|
Operand offset = operation.GetSource(0);
|
||||||
|
Operand bits = operation.GetSource(1);
|
||||||
|
|
||||||
|
Debug.Assert(offset.Kind == OperandKind.Constant && bits.Kind == OperandKind.Constant);
|
||||||
|
Debug.Assert(offset.Type == OperandType.I32 && bits.Type == OperandType.I32);
|
||||||
|
|
||||||
|
int offs = offset.AsInt32() + context.CallArgsRegionSize;
|
||||||
|
|
||||||
|
Operand rsp = Register(X86Register.Rsp);
|
||||||
|
|
||||||
|
MemoryOperand memOp = MemoryOp(OperandType.I32, rsp, null, Multiplier.x1, offs);
|
||||||
|
|
||||||
|
Debug.Assert(HardwareCapabilities.SupportsSse || HardwareCapabilities.SupportsVexEncoding);
|
||||||
|
|
||||||
|
context.Assembler.Stmxcsr(memOp);
|
||||||
|
|
||||||
|
if (intrinOp.Intrinsic == Intrinsic.X86Mxcsrmb)
|
||||||
|
{
|
||||||
|
context.Assembler.Or(memOp, bits, OperandType.I32);
|
||||||
|
}
|
||||||
|
else /* if (intrinOp.Intrinsic == Intrinsic.X86Mxcsrub) */
|
||||||
|
{
|
||||||
|
Operand notBits = Const(~bits.AsInt32());
|
||||||
|
|
||||||
|
context.Assembler.And(memOp, notBits, OperandType.I32);
|
||||||
|
}
|
||||||
|
|
||||||
|
context.Assembler.Ldmxcsr(memOp);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case IntrinsicType.PopCount:
|
case IntrinsicType.PopCount:
|
||||||
{
|
{
|
||||||
Operand dest = operation.Destination;
|
Operand dest = operation.Destination;
|
||||||
|
|
|
@ -76,6 +76,8 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary));
|
Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary));
|
||||||
Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary));
|
Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary));
|
||||||
Add(Intrinsic.X86Mulss, new IntrinsicInfo(X86Instruction.Mulss, IntrinsicType.Binary));
|
Add(Intrinsic.X86Mulss, new IntrinsicInfo(X86Instruction.Mulss, IntrinsicType.Binary));
|
||||||
|
Add(Intrinsic.X86Mxcsrmb, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr)); // Mask bits.
|
||||||
|
Add(Intrinsic.X86Mxcsrub, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr)); // Unmask bits.
|
||||||
Add(Intrinsic.X86Paddb, new IntrinsicInfo(X86Instruction.Paddb, IntrinsicType.Binary));
|
Add(Intrinsic.X86Paddb, new IntrinsicInfo(X86Instruction.Paddb, IntrinsicType.Binary));
|
||||||
Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary));
|
Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary));
|
||||||
Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary));
|
Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary));
|
||||||
|
|
|
@ -3,6 +3,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
enum IntrinsicType
|
enum IntrinsicType
|
||||||
{
|
{
|
||||||
Comis_,
|
Comis_,
|
||||||
|
Mxcsr,
|
||||||
PopCount,
|
PopCount,
|
||||||
Unary,
|
Unary,
|
||||||
UnaryToGpr,
|
UnaryToGpr,
|
||||||
|
|
|
@ -114,6 +114,16 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
node = HandleVectorInsert8(block.Operations, node, operation);
|
node = HandleVectorInsert8(block.Operations, node, operation);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case Instruction.Extended:
|
||||||
|
IntrinsicOperation intrinOp = (IntrinsicOperation)operation;
|
||||||
|
|
||||||
|
if (intrinOp.Intrinsic == Intrinsic.X86Mxcsrmb || intrinOp.Intrinsic == Intrinsic.X86Mxcsrub)
|
||||||
|
{
|
||||||
|
int stackOffset = stackAlloc.Allocate(OperandType.I32);
|
||||||
|
operation.SetSources(new Operand[] { Const(stackOffset), operation.GetSource(0) });
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
{
|
{
|
||||||
enum X86Instruction
|
enum X86Instruction
|
||||||
{
|
{
|
||||||
|
None,
|
||||||
Add,
|
Add,
|
||||||
Addpd,
|
Addpd,
|
||||||
Addps,
|
Addps,
|
||||||
|
@ -60,6 +61,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Imul128,
|
Imul128,
|
||||||
Insertps,
|
Insertps,
|
||||||
Jmp,
|
Jmp,
|
||||||
|
Ldmxcsr,
|
||||||
Lea,
|
Lea,
|
||||||
Maxpd,
|
Maxpd,
|
||||||
Maxps,
|
Maxps,
|
||||||
|
@ -186,6 +188,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Sqrtps,
|
Sqrtps,
|
||||||
Sqrtsd,
|
Sqrtsd,
|
||||||
Sqrtss,
|
Sqrtss,
|
||||||
|
Stmxcsr,
|
||||||
Sub,
|
Sub,
|
||||||
Subpd,
|
Subpd,
|
||||||
Subps,
|
Subps,
|
||||||
|
|
|
@ -380,15 +380,21 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
public static void Faddp_V(ArmEmitterContext context)
|
public static void Faddp_V(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
|
EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
|
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
|
||||||
|
|
||||||
Intrinsic addInst = (op.Size & 1) == 0 ? Intrinsic.X86Addps : Intrinsic.X86Addpd;
|
Intrinsic addInst = (op.Size & 1) == 0 ? Intrinsic.X86Addps : Intrinsic.X86Addpd;
|
||||||
|
|
||||||
return context.AddIntrinsic(addInst, op1, op2);
|
return context.AddIntrinsic(addInst, op1, op2);
|
||||||
|
}, scalar: false, op1, op2);
|
||||||
|
}, scalar: false, op1, op2);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -478,8 +484,11 @@ namespace ARMeilleure.Instructions
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse41)
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||||
|
}, scalar: true, op1, op2);
|
||||||
}, scalar: true);
|
}, scalar: true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -496,8 +505,11 @@ namespace ARMeilleure.Instructions
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse41)
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||||
|
}, scalar: false, op1, op2);
|
||||||
}, scalar: false);
|
}, scalar: false);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -582,9 +594,12 @@ namespace ARMeilleure.Instructions
|
||||||
EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
|
EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||||
}, scalar: false, op1, op2);
|
}, scalar: false, op1, op2);
|
||||||
|
}, scalar: false, op1, op2);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -603,9 +618,12 @@ namespace ARMeilleure.Instructions
|
||||||
EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
|
EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||||
}, scalar: false, op1, op2);
|
}, scalar: false, op1, op2);
|
||||||
|
}, scalar: false, op1, op2);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -622,8 +640,11 @@ namespace ARMeilleure.Instructions
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse41)
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||||
|
}, scalar: true, op1, op2);
|
||||||
}, scalar: true);
|
}, scalar: true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -640,8 +661,11 @@ namespace ARMeilleure.Instructions
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse41)
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||||
|
}, scalar: false, op1, op2);
|
||||||
}, scalar: false);
|
}, scalar: false);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -726,9 +750,12 @@ namespace ARMeilleure.Instructions
|
||||||
EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
|
EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||||
}, scalar: false, op1, op2);
|
}, scalar: false, op1, op2);
|
||||||
|
}, scalar: false, op1, op2);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -747,9 +774,12 @@ namespace ARMeilleure.Instructions
|
||||||
EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
|
EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||||
}, scalar: false, op1, op2);
|
}, scalar: false, op1, op2);
|
||||||
|
}, scalar: false, op1, op2);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -3360,6 +3390,53 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Operand EmitSseOrAvxHandleFzModeOpF(
|
||||||
|
ArmEmitterContext context,
|
||||||
|
Func2I emit,
|
||||||
|
bool scalar,
|
||||||
|
Operand n = null,
|
||||||
|
Operand m = null)
|
||||||
|
{
|
||||||
|
Operand nCopy = n ?? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn));
|
||||||
|
Operand mCopy = m ?? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm));
|
||||||
|
|
||||||
|
EmitSseOrAvxEnterFtzAndDazModesOpF(context, out Operand isTrue);
|
||||||
|
|
||||||
|
Operand res = emit(nCopy, mCopy);
|
||||||
|
|
||||||
|
EmitSseOrAvxExitFtzAndDazModesOpF(context, isTrue);
|
||||||
|
|
||||||
|
if (n != null || m != null)
|
||||||
|
{
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1;
|
||||||
|
|
||||||
|
if (sizeF == 0)
|
||||||
|
{
|
||||||
|
if (scalar)
|
||||||
|
{
|
||||||
|
res = context.VectorZeroUpper96(res);
|
||||||
|
}
|
||||||
|
else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64)
|
||||||
|
{
|
||||||
|
res = context.VectorZeroUpper64(res);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else /* if (sizeF == 1) */
|
||||||
|
{
|
||||||
|
if (scalar)
|
||||||
|
{
|
||||||
|
res = context.VectorZeroUpper64(res);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
private static Operand EmitSse2VectorMaxMinOpF(ArmEmitterContext context, Operand n, Operand m, bool isMax)
|
private static Operand EmitSse2VectorMaxMinOpF(ArmEmitterContext context, Operand n, Operand m, bool isMax)
|
||||||
{
|
{
|
||||||
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
|
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
|
||||||
|
@ -3418,8 +3495,11 @@ namespace ARMeilleure.Instructions
|
||||||
mCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, negInfMask, mMask);
|
mCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, negInfMask, mMask);
|
||||||
|
|
||||||
Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
|
||||||
|
}, scalar: scalar, op1, op2);
|
||||||
}, scalar: scalar, nCopy, mCopy);
|
}, scalar: scalar, nCopy, mCopy);
|
||||||
|
|
||||||
if (n != null || m != null)
|
if (n != null || m != null)
|
||||||
|
@ -3453,8 +3533,11 @@ namespace ARMeilleure.Instructions
|
||||||
mCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, negInfMask, mMask);
|
mCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, negInfMask, mMask);
|
||||||
|
|
||||||
Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
|
||||||
|
}, scalar: scalar, op1, op2);
|
||||||
}, scalar: scalar, nCopy, mCopy);
|
}, scalar: scalar, nCopy, mCopy);
|
||||||
|
|
||||||
if (n != null || m != null)
|
if (n != null || m != null)
|
||||||
|
|
|
@ -1189,6 +1189,39 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Flags]
|
||||||
|
public enum Mxcsr
|
||||||
|
{
|
||||||
|
Ftz = 1 << 15, // Flush To Zero.
|
||||||
|
Um = 1 << 11, // Underflow Mask.
|
||||||
|
Dm = 1 << 8, // Denormal Mask.
|
||||||
|
Daz = 1 << 6 // Denormals Are Zero.
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void EmitSseOrAvxEnterFtzAndDazModesOpF(ArmEmitterContext context, out Operand isTrue)
|
||||||
|
{
|
||||||
|
isTrue = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFpcrFz)));
|
||||||
|
|
||||||
|
Operand lblTrue = Label();
|
||||||
|
context.BranchIfFalse(lblTrue, isTrue);
|
||||||
|
|
||||||
|
context.AddIntrinsicNoRet(Intrinsic.X86Mxcsrmb, Const((int)(Mxcsr.Ftz | Mxcsr.Um | Mxcsr.Dm | Mxcsr.Daz)));
|
||||||
|
|
||||||
|
context.MarkLabel(lblTrue);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void EmitSseOrAvxExitFtzAndDazModesOpF(ArmEmitterContext context, Operand isTrue = null)
|
||||||
|
{
|
||||||
|
isTrue ??= context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFpcrFz)));
|
||||||
|
|
||||||
|
Operand lblTrue = Label();
|
||||||
|
context.BranchIfFalse(lblTrue, isTrue);
|
||||||
|
|
||||||
|
context.AddIntrinsicNoRet(Intrinsic.X86Mxcsrub, Const((int)(Mxcsr.Ftz | Mxcsr.Daz)));
|
||||||
|
|
||||||
|
context.MarkLabel(lblTrue);
|
||||||
|
}
|
||||||
|
|
||||||
public enum CmpCondition
|
public enum CmpCondition
|
||||||
{
|
{
|
||||||
// Legacy Sse.
|
// Legacy Sse.
|
||||||
|
|
|
@ -78,6 +78,11 @@ namespace ARMeilleure.Instructions
|
||||||
return (ulong)GetContext().Fpcr;
|
return (ulong)GetContext().Fpcr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static bool GetFpcrFz()
|
||||||
|
{
|
||||||
|
return (GetContext().Fpcr & FPCR.Fz) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
public static ulong GetFpsr()
|
public static ulong GetFpsr()
|
||||||
{
|
{
|
||||||
return (ulong)GetContext().Fpsr;
|
return (ulong)GetContext().Fpsr;
|
||||||
|
@ -85,7 +90,7 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
public static uint GetFpscr()
|
public static uint GetFpscr()
|
||||||
{
|
{
|
||||||
var context = GetContext();
|
ExecutionContext context = GetContext();
|
||||||
|
|
||||||
return (uint)(context.Fpsr & FPSR.A32Mask & ~FPSR.Nzcv) |
|
return (uint)(context.Fpsr & FPSR.A32Mask & ~FPSR.Nzcv) |
|
||||||
(uint)(context.Fpcr & FPCR.A32Mask);
|
(uint)(context.Fpcr & FPCR.A32Mask);
|
||||||
|
@ -143,7 +148,7 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
public static void SetFpscr(uint fpscr)
|
public static void SetFpscr(uint fpscr)
|
||||||
{
|
{
|
||||||
var context = GetContext();
|
ExecutionContext context = GetContext();
|
||||||
|
|
||||||
context.Fpsr = FPSR.A32Mask & (FPSR)fpscr;
|
context.Fpsr = FPSR.A32Mask & (FPSR)fpscr;
|
||||||
context.Fpcr = FPCR.A32Mask & (FPCR)fpscr;
|
context.Fpcr = FPCR.A32Mask & (FPCR)fpscr;
|
||||||
|
@ -250,7 +255,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
Statistics.PauseTimer();
|
Statistics.PauseTimer();
|
||||||
|
|
||||||
var context = GetContext();
|
ExecutionContext context = GetContext();
|
||||||
|
|
||||||
context.CheckInterrupt();
|
context.CheckInterrupt();
|
||||||
|
|
||||||
|
|
|
@ -65,6 +65,8 @@ namespace ARMeilleure.IntermediateRepresentation
|
||||||
X86Mulps,
|
X86Mulps,
|
||||||
X86Mulsd,
|
X86Mulsd,
|
||||||
X86Mulss,
|
X86Mulss,
|
||||||
|
X86Mxcsrmb,
|
||||||
|
X86Mxcsrub,
|
||||||
X86Paddb,
|
X86Paddb,
|
||||||
X86Paddd,
|
X86Paddd,
|
||||||
X86Paddq,
|
X86Paddq,
|
||||||
|
|
|
@ -109,6 +109,7 @@ namespace ARMeilleure.Translation
|
||||||
SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCtrEl0)));
|
SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCtrEl0)));
|
||||||
SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetDczidEl0)));
|
SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetDczidEl0)));
|
||||||
SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFpcr)));
|
SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFpcr)));
|
||||||
|
SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFpcrFz)));
|
||||||
SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFpscr))); // A32 only.
|
SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFpscr))); // A32 only.
|
||||||
SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFpsr)));
|
SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFpsr)));
|
||||||
SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress)));
|
SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress)));
|
||||||
|
|
|
@ -573,6 +573,11 @@ namespace ARMeilleure.Translation
|
||||||
return Add(intrin, Local(OperandType.I64), args);
|
return Add(intrin, Local(OperandType.I64), args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void AddIntrinsicNoRet(Intrinsic intrin, params Operand[] args)
|
||||||
|
{
|
||||||
|
Add(intrin, null, args);
|
||||||
|
}
|
||||||
|
|
||||||
private Operand Add(Intrinsic intrin, Operand dest, params Operand[] sources)
|
private Operand Add(Intrinsic intrin, Operand dest, params Operand[] sources)
|
||||||
{
|
{
|
||||||
NewNextBlockIfNeeded();
|
NewNextBlockIfNeeded();
|
||||||
|
|
|
@ -21,7 +21,7 @@ namespace ARMeilleure.Translation.PTC
|
||||||
{
|
{
|
||||||
private const string HeaderMagic = "PTChd";
|
private const string HeaderMagic = "PTChd";
|
||||||
|
|
||||||
private const int InternalVersion = 1758; //! To be incremented manually for each change to the ARMeilleure project.
|
private const int InternalVersion = 1631; //! To be incremented manually for each change to the ARMeilleure project.
|
||||||
|
|
||||||
private const string ActualDir = "0";
|
private const string ActualDir = "0";
|
||||||
private const string BackupDir = "1";
|
private const string BackupDir = "1";
|
||||||
|
|
|
@ -17,8 +17,10 @@ namespace Ryujinx.Tests.Cpu
|
||||||
protected const ulong CodeBaseAddress = 0x1000;
|
protected const ulong CodeBaseAddress = 0x1000;
|
||||||
protected const ulong DataBaseAddress = CodeBaseAddress + Size;
|
protected const ulong DataBaseAddress = CodeBaseAddress + Size;
|
||||||
|
|
||||||
private const bool Ignore_FpcrFz_FpcrDn = false;
|
private static bool Ignore_FpcrFz = false;
|
||||||
private const bool IgnoreAllExcept_FpsrQc = false;
|
private static bool Ignore_FpcrDn = false;
|
||||||
|
|
||||||
|
private static bool IgnoreAllExcept_FpsrQc = false;
|
||||||
|
|
||||||
private ulong _currAddress;
|
private ulong _currAddress;
|
||||||
|
|
||||||
|
@ -205,11 +207,14 @@ namespace Ryujinx.Tests.Cpu
|
||||||
int fpsr = 0,
|
int fpsr = 0,
|
||||||
bool runUnicorn = true)
|
bool runUnicorn = true)
|
||||||
{
|
{
|
||||||
if (Ignore_FpcrFz_FpcrDn)
|
if (Ignore_FpcrFz)
|
||||||
{
|
{
|
||||||
#pragma warning disable CS0162
|
fpcr &= ~(1 << (int)Fpcr.Fz);
|
||||||
fpcr &= ~((1 << (int)Fpcr.Fz) | (1 << (int)Fpcr.Dn));
|
}
|
||||||
#pragma warning restore CS0162
|
|
||||||
|
if (Ignore_FpcrDn)
|
||||||
|
{
|
||||||
|
fpcr &= ~(1 << (int)Fpcr.Dn);
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode(opcode);
|
Opcode(opcode);
|
||||||
|
@ -323,9 +328,7 @@ namespace Ryujinx.Tests.Cpu
|
||||||
|
|
||||||
if (IgnoreAllExcept_FpsrQc)
|
if (IgnoreAllExcept_FpsrQc)
|
||||||
{
|
{
|
||||||
#pragma warning disable CS0162
|
|
||||||
fpsrMask &= Fpsr.Qc;
|
fpsrMask &= Fpsr.Qc;
|
||||||
#pragma warning restore CS0162
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fpSkips != FpSkips.None)
|
if (fpSkips != FpSkips.None)
|
||||||
|
|
Loading…
Reference in a new issue