Generalize tail continues (#1298)

* Generalize tail continues

* Fix DecodeBasicBlock

`Next` and `Branch` would be null, which is not the state expected by
the branch instructions. They end up branching or falling into a block
which is never populated by the `Translator`. This causes an assert to
be fired when building the CFG.

* Clean up Decode overloads

* Do not synchronize when branching into exit block

If we're branching into an exit block, that exit block will tail
continue into another translation which already has a synchronization.

* Remove A32 predicate tail continue

If `block` is not an exit block then the `block.Next` must exist (as
per the last instruction of `block`).

* Throw if decoded 0 blocks

Address gdkchan's feedback

* Rebuild block list instead of setting to null

Address gdkchan's feedback
This commit is contained in:
Ficture Seven 2020-06-18 07:37:21 +04:00 committed by GitHub
parent 5e724cf24e
commit 2421186d97
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 178 additions and 240 deletions

View file

@ -12,6 +12,7 @@ namespace ARMeilleure.Decoders
public Block Branch { get; set; } public Block Branch { get; set; }
public bool TailCall { get; set; } public bool TailCall { get; set; }
public bool Exit { get; set; }
public List<OpCode> OpCodes { get; private set; } public List<OpCode> OpCodes { get; private set; }
@ -29,7 +30,7 @@ namespace ARMeilleure.Decoders
{ {
int splitIndex = BinarySearch(OpCodes, rightBlock.Address); int splitIndex = BinarySearch(OpCodes, rightBlock.Address);
if ((ulong)OpCodes[splitIndex].Address < rightBlock.Address) if (OpCodes[splitIndex].Address < rightBlock.Address)
{ {
splitIndex++; splitIndex++;
} }

View file

@ -17,16 +17,7 @@ namespace ARMeilleure.Decoders
// For lower code quality translation, we set a lower limit since we're blocking execution. // For lower code quality translation, we set a lower limit since we're blocking execution.
private const int MaxInstsPerFunctionLowCq = 500; private const int MaxInstsPerFunctionLowCq = 500;
public static Block[] DecodeBasicBlock(IMemoryManager memory, ulong address, ExecutionMode mode) public static Block[] Decode(IMemoryManager memory, ulong address, ExecutionMode mode, bool highCq, bool singleBlock)
{
Block block = new Block(address);
FillBlock(memory, mode, block, ulong.MaxValue);
return new Block[] { block };
}
public static Block[] DecodeFunction(IMemoryManager memory, ulong address, ExecutionMode mode, bool highCq)
{ {
List<Block> blocks = new List<Block>(); List<Block> blocks = new List<Block>();
@ -42,13 +33,14 @@ namespace ARMeilleure.Decoders
{ {
if (!visited.TryGetValue(blkAddress, out Block block)) if (!visited.TryGetValue(blkAddress, out Block block))
{ {
if (opsCount > instructionLimit || !memory.IsMapped(blkAddress))
{
return null;
}
block = new Block(blkAddress); block = new Block(blkAddress);
if ((singleBlock && visited.Count >= 1) || opsCount > instructionLimit || !memory.IsMapped(blkAddress))
{
block.Exit = true;
block.EndAddress = blkAddress;
}
workQueue.Enqueue(block); workQueue.Enqueue(block);
visited.Add(blkAddress, block); visited.Add(blkAddress, block);
@ -71,6 +63,8 @@ namespace ARMeilleure.Decoders
throw new InvalidOperationException("Found duplicate block address on the list."); throw new InvalidOperationException("Found duplicate block address on the list.");
} }
currBlock.Exit = false;
nBlock.Split(currBlock); nBlock.Split(currBlock);
blocks.Insert(nBlkIndex + 1, currBlock); blocks.Insert(nBlkIndex + 1, currBlock);
@ -78,6 +72,8 @@ namespace ARMeilleure.Decoders
continue; continue;
} }
if (!currBlock.Exit)
{
// If we have a block after the current one, set the limit address. // If we have a block after the current one, set the limit address.
ulong limitAddress = ulong.MaxValue; ulong limitAddress = ulong.MaxValue;
@ -121,6 +117,7 @@ namespace ARMeilleure.Decoders
currBlock.Next = GetBlock(currBlock.EndAddress); currBlock.Next = GetBlock(currBlock.EndAddress);
} }
} }
}
// Insert the new block on the list (sorted by address). // Insert the new block on the list (sorted by address).
if (blocks.Count != 0) if (blocks.Count != 0)
@ -135,7 +132,15 @@ namespace ARMeilleure.Decoders
} }
} }
TailCallRemover.RunPass(address, blocks); if (blocks.Count == 0)
{
throw new InvalidOperationException($"Decoded 0 blocks. Entry point = 0x{address:X}.");
}
if (!singleBlock)
{
return TailCallRemover.RunPass(address, blocks);
}
return blocks.ToArray(); return blocks.ToArray();
} }

View file

@ -1,16 +1,15 @@
using ARMeilleure.Decoders; using System;
using System;
using System.Collections.Generic; using System.Collections.Generic;
namespace ARMeilleure.Decoders.Optimizations namespace ARMeilleure.Decoders.Optimizations
{ {
static class TailCallRemover static class TailCallRemover
{ {
public static void RunPass(ulong entryAddress, List<Block> blocks) public static Block[] RunPass(ulong entryAddress, List<Block> blocks)
{ {
// Detect tail calls: // Detect tail calls:
// - Assume this function spans the space covered by contiguous code blocks surrounding the entry address. // - Assume this function spans the space covered by contiguous code blocks surrounding the entry address.
// - Unconditional jump to an area outside this contiguous region will be treated as a tail call. // - A jump to an area outside this contiguous region will be treated as an exit block.
// - Include a small allowance for jumps outside the contiguous range. // - Include a small allowance for jumps outside the contiguous range.
if (!Decoder.BinarySearch(blocks, entryAddress, out int entryBlockId)) if (!Decoder.BinarySearch(blocks, entryAddress, out int entryBlockId))
@ -19,15 +18,19 @@ namespace ARMeilleure.Decoders.Optimizations
} }
const ulong allowance = 4; const ulong allowance = 4;
Block entryBlock = blocks[entryBlockId]; Block entryBlock = blocks[entryBlockId];
int startBlockIndex = entryBlockId;
Block startBlock = entryBlock; Block startBlock = entryBlock;
int endBlockIndex = entryBlockId;
Block endBlock = entryBlock; Block endBlock = entryBlock;
int startBlockIndex = entryBlockId;
int endBlockIndex = entryBlockId;
for (int i = entryBlockId + 1; i < blocks.Count; i++) // Search forwards. for (int i = entryBlockId + 1; i < blocks.Count; i++) // Search forwards.
{ {
Block block = blocks[i]; Block block = blocks[i];
if (endBlock.EndAddress < block.Address - allowance) if (endBlock.EndAddress < block.Address - allowance)
{ {
break; // End of contiguous function. break; // End of contiguous function.
@ -40,6 +43,7 @@ namespace ARMeilleure.Decoders.Optimizations
for (int i = entryBlockId - 1; i >= 0; i--) // Search backwards. for (int i = entryBlockId - 1; i >= 0; i--) // Search backwards.
{ {
Block block = blocks[i]; Block block = blocks[i];
if (startBlock.Address > block.EndAddress + allowance) if (startBlock.Address > block.EndAddress + allowance)
{ {
break; // End of contiguous function. break; // End of contiguous function.
@ -51,25 +55,35 @@ namespace ARMeilleure.Decoders.Optimizations
if (startBlockIndex == 0 && endBlockIndex == blocks.Count - 1) if (startBlockIndex == 0 && endBlockIndex == blocks.Count - 1)
{ {
return; // Nothing to do here. return blocks.ToArray(); // Nothing to do here.
} }
// Replace all branches to blocks outside the range with null, and force a tail call. // Mark branches outside of contiguous region as exit blocks.
for (int i = startBlockIndex; i <= endBlockIndex; i++) for (int i = startBlockIndex; i <= endBlockIndex; i++)
{ {
Block block = blocks[i]; Block block = blocks[i];
if (block.Branch != null && (block.Branch.Address > endBlock.EndAddress || block.Branch.EndAddress < startBlock.Address)) if (block.Branch != null && (block.Branch.Address > endBlock.EndAddress || block.Branch.EndAddress < startBlock.Address))
{ {
block.Branch = null; block.Branch.Exit = true;
block.TailCall = true; block.Branch.TailCall = true;
} }
} }
// Finally, delete all blocks outside the contiguous range. var newBlocks = new List<Block>(blocks.Count);
blocks.RemoveRange(endBlockIndex + 1, (blocks.Count - endBlockIndex) - 1); // Finally, rebuild decoded block list, ignoring blocks outside the contiguous range.
blocks.RemoveRange(0, startBlockIndex); for (int i = 0; i < blocks.Count; i++)
{
Block block = blocks[i];
if (block.Exit || (i >= startBlockIndex && i <= endBlockIndex))
{
newBlocks.Add(block);
}
}
return newBlocks.ToArray();
} }
} }
} }

View file

@ -27,11 +27,6 @@ namespace ARMeilleure.Instructions
context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.Id)); context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.Id));
context.LoadFromContext(); context.LoadFromContext();
if (context.CurrBlock.Next == null)
{
EmitTailContinue(context, Const(op.Address + 4));
}
} }
public static void Und(ArmEmitterContext context) public static void Und(ArmEmitterContext context)
@ -45,11 +40,6 @@ namespace ARMeilleure.Instructions
context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.RawOpCode)); context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.RawOpCode));
context.LoadFromContext(); context.LoadFromContext();
if (context.CurrBlock.Next == null)
{
EmitTailContinue(context, Const(op.Address + 4));
}
} }
} }
} }

View file

@ -27,11 +27,6 @@ namespace ARMeilleure.Instructions
context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.Id)); context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.Id));
context.LoadFromContext(); context.LoadFromContext();
if (context.CurrBlock.Next == null)
{
EmitTailContinue(context, Const(op.Address + 4));
}
} }
} }
} }

View file

@ -15,15 +15,8 @@ namespace ARMeilleure.Instructions
{ {
OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp; OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp;
if (context.CurrBlock.Branch != null)
{
context.Branch(context.GetLabel((ulong)op.Immediate)); context.Branch(context.GetLabel((ulong)op.Immediate));
} }
else
{
EmitTailContinue(context, Const(op.Immediate), context.CurrBlock.TailCall);
}
}
public static void B_Cond(ArmEmitterContext context) public static void B_Cond(ArmEmitterContext context)
{ {
@ -92,35 +85,13 @@ namespace ARMeilleure.Instructions
{ {
OpCodeBImm op = (OpCodeBImm)context.CurrOp; OpCodeBImm op = (OpCodeBImm)context.CurrOp;
if (context.CurrBlock.Branch != null)
{
EmitCondBranch(context, context.GetLabel((ulong)op.Immediate), cond); EmitCondBranch(context, context.GetLabel((ulong)op.Immediate), cond);
if (context.CurrBlock.Next == null)
{
EmitTailContinue(context, Const(op.Address + 4));
}
}
else
{
Operand lblTaken = Label();
EmitCondBranch(context, lblTaken, cond);
EmitTailContinue(context, Const(op.Address + 4));
context.MarkLabel(lblTaken);
EmitTailContinue(context, Const(op.Immediate));
}
} }
private static void EmitBranch(ArmEmitterContext context, Operand value, bool onNotZero) private static void EmitBranch(ArmEmitterContext context, Operand value, bool onNotZero)
{ {
OpCodeBImm op = (OpCodeBImm)context.CurrOp; OpCodeBImm op = (OpCodeBImm)context.CurrOp;
if (context.CurrBlock.Branch != null)
{
Operand lblTarget = context.GetLabel((ulong)op.Immediate); Operand lblTarget = context.GetLabel((ulong)op.Immediate);
if (onNotZero) if (onNotZero)
@ -131,31 +102,6 @@ namespace ARMeilleure.Instructions
{ {
context.BranchIfFalse(lblTarget, value); context.BranchIfFalse(lblTarget, value);
} }
if (context.CurrBlock.Next == null)
{
EmitTailContinue(context, Const(op.Address + 4));
}
}
else
{
Operand lblTaken = Label();
if (onNotZero)
{
context.BranchIfTrue(lblTaken, value);
}
else
{
context.BranchIfFalse(lblTaken, value);
}
EmitTailContinue(context, Const(op.Address + 4));
context.MarkLabel(lblTaken);
EmitTailContinue(context, Const(op.Immediate));
}
} }
} }
} }

View file

@ -15,15 +15,8 @@ namespace ARMeilleure.Instructions
{ {
IOpCode32BImm op = (IOpCode32BImm)context.CurrOp; IOpCode32BImm op = (IOpCode32BImm)context.CurrOp;
if (context.CurrBlock.Branch != null)
{
context.Branch(context.GetLabel((ulong)op.Immediate)); context.Branch(context.GetLabel((ulong)op.Immediate));
} }
else
{
EmitTailContinue(context, Const(op.Immediate));
}
}
public static void Bl(ArmEmitterContext context) public static void Bl(ArmEmitterContext context)
{ {

View file

@ -150,17 +150,32 @@ namespace ARMeilleure.Instructions
private static void EmitNativeCall(ArmEmitterContext context, Operand nativeContextPtr, Operand funcAddr, bool isJump = false) private static void EmitNativeCall(ArmEmitterContext context, Operand nativeContextPtr, Operand funcAddr, bool isJump = false)
{ {
context.StoreToContext(); context.StoreToContext();
Operand returnAddress;
if (isJump) if (isJump)
{ {
context.Tailcall(funcAddr, nativeContextPtr); context.Tailcall(funcAddr, nativeContextPtr);
} }
else else
{ {
returnAddress = context.Call(funcAddr, OperandType.I64, nativeContextPtr); OpCode op = context.CurrOp;
Operand returnAddress = context.Call(funcAddr, OperandType.I64, nativeContextPtr);
context.LoadFromContext(); context.LoadFromContext();
EmitContinueOrReturnCheck(context, returnAddress); // Note: The return value of a translated function is always an Int64 with the
// address execution has returned to. We expect this address to be immediately after the
// current instruction, if it isn't we keep returning until we reach the dispatcher.
Operand nextAddr = Const((long)op.Address + op.OpCodeSizeInBytes);
// Try to continue within this block.
// If the return address isn't to our next instruction, we need to return so the JIT can figure out what to do.
Operand lblContinue = context.GetLabel(nextAddr.Value);
// We need to clear out the call flag for the return address before comparing it.
context.BranchIfTrue(lblContinue, context.ICompareEqual(context.BitwiseAnd(returnAddress, Const(~CallFlag)), nextAddr));
context.Return(returnAddress);
} }
} }
@ -191,46 +206,18 @@ namespace ARMeilleure.Instructions
} }
} }
private static void EmitContinueOrReturnCheck(ArmEmitterContext context, Operand returnAddress)
{
// Note: The return value of a translated function is always an Int64 with the
// address execution has returned to. We expect this address to be immediately after the
// current instruction, if it isn't we keep returning until we reach the dispatcher.
Operand nextAddr = Const(GetNextOpAddress(context.CurrOp));
// Try to continue within this block.
// If the return address isn't to our next instruction, we need to return so the JIT can figure out what to do.
Operand lblContinue = Label();
// We need to clear out the call flag for the return address before comparing it.
context.BranchIfTrue(lblContinue, context.ICompareEqual(context.BitwiseAnd(returnAddress, Const(~CallFlag)), nextAddr));
context.Return(returnAddress);
context.MarkLabel(lblContinue);
if (context.CurrBlock.Next == null)
{
// No code following this instruction, try and find the next block and jump to it.
EmitTailContinue(context, nextAddr);
}
}
private static ulong GetNextOpAddress(OpCode op)
{
return op.Address + (ulong)op.OpCodeSizeInBytes;
}
public static void EmitTailContinue(ArmEmitterContext context, Operand address, bool allowRejit = false) public static void EmitTailContinue(ArmEmitterContext context, Operand address, bool allowRejit = false)
{ {
bool useTailContinue = true; // Left option here as it may be useful if we need to return to managed rather than tail call in future. (eg. for debug) // Left option here as it may be useful if we need to return to managed rather than tail call in future.
// (eg. for debug)
bool useTailContinue = true;
if (useTailContinue) if (useTailContinue)
{ {
if (context.HighCq) if (context.HighCq)
{ {
// If we're doing a tail continue in HighCq, reserve a space in the jump table to avoid calling back to the translator. // If we're doing a tail continue in HighCq, reserve a space in the jump table to avoid calling back
// This will always try to get a HighCq version of our continue target as well. // to the translator. This will always try to get a HighCq version of our continue target as well.
EmitJumpTableBranch(context, address, true); EmitJumpTableBranch(context, address, true);
} }
else else
@ -263,6 +250,7 @@ namespace ARMeilleure.Instructions
{ {
address = context.BitwiseOr(address, Const(address.Type, (long)CallFlag)); // Set call flag. address = context.BitwiseOr(address, Const(address.Type, (long)CallFlag)); // Set call flag.
Operand fallbackAddr = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress)), address); Operand fallbackAddr = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress)), address);
EmitNativeCall(context, fallbackAddr, isJump); EmitNativeCall(context, fallbackAddr, isJump);
} }
@ -273,39 +261,48 @@ namespace ARMeilleure.Instructions
Operand endLabel = Label(); Operand endLabel = Label();
Operand fallbackLabel = Label(); Operand fallbackLabel = Label();
Action<Operand> emitTableEntry = (Operand entrySkipLabel) => void EmitTableEntry(Operand entrySkipLabel)
{ {
// Try to take this entry in the table if its guest address equals 0. // Try to take this entry in the table if its guest address equals 0.
Operand gotResult = context.CompareAndSwap(tableAddress, Const(0L), address); Operand gotResult = context.CompareAndSwap(tableAddress, Const(0L), address);
// Is the address ours? (either taken via CompareAndSwap (0), or what was already here) // Is the address ours? (either taken via CompareAndSwap (0), or what was already here)
context.BranchIfFalse(entrySkipLabel, context.BitwiseOr(context.ICompareEqual(gotResult, address), context.ICompareEqual(gotResult, Const(0L)))); context.BranchIfFalse(entrySkipLabel,
context.BitwiseOr(
context.ICompareEqual(gotResult, address),
context.ICompareEqual(gotResult, Const(0L)))
);
// It's ours, so what function is it pointing to? // It's ours, so what function is it pointing to?
Operand targetFunctionPtr = context.Add(tableAddress, Const(8L)); Operand targetFunctionPtr = context.Add(tableAddress, Const(8L));
Operand targetFunction = context.Load(OperandType.I64, targetFunctionPtr); Operand targetFunction = context.Load(OperandType.I64, targetFunctionPtr);
// Call the function. // Call the function.
// We pass in the entry address as the guest address, as the entry may need to be updated by the indirect call stub. // We pass in the entry address as the guest address, as the entry may need to be updated by the
// indirect call stub.
EmitNativeCallWithGuestAddress(context, targetFunction, tableAddress, isJump); EmitNativeCallWithGuestAddress(context, targetFunction, tableAddress, isJump);
context.Branch(endLabel); context.Branch(endLabel);
}; }
// Currently this uses a size of 1, as higher values inflate code size for no real benefit. // Currently this uses a size of 1, as higher values inflate code size for no real benefit.
for (int i = 0; i < JumpTable.DynamicTableElems; i++) for (int i = 0; i < JumpTable.DynamicTableElems; i++)
{ {
if (i == JumpTable.DynamicTableElems - 1) if (i == JumpTable.DynamicTableElems - 1)
{ {
emitTableEntry(fallbackLabel); // If this is the last entry, avoid emitting the additional label and add. // If this is the last entry, avoid emitting the additional label and add.
EmitTableEntry(fallbackLabel);
} }
else else
{ {
Operand nextLabel = Label(); Operand nextLabel = Label();
emitTableEntry(nextLabel); EmitTableEntry(nextLabel);
context.MarkLabel(nextLabel); context.MarkLabel(nextLabel);
tableAddress = context.Add(tableAddress, Const((long)JumpTable.JumpTableStride)); // Move to the next table entry.
// Move to the next table entry.
tableAddress = context.Add(tableAddress, Const((long)JumpTable.JumpTableStride));
} }
} }
@ -323,16 +320,15 @@ namespace ARMeilleure.Instructions
address = context.ZeroExtend32(OperandType.I64, address); address = context.ZeroExtend32(OperandType.I64, address);
} }
// TODO: Constant folding. Indirect calls are slower in the best case and emit more code so we want to avoid them when possible. // TODO: Constant folding. Indirect calls are slower in the best case and emit more code so we want to
// avoid them when possible.
bool isConst = address.Kind == OperandKind.Constant; bool isConst = address.Kind == OperandKind.Constant;
long constAddr = (long)address.Value; long constAddr = (long)address.Value;
if (!context.HighCq) if (!context.HighCq)
{ {
// Don't emit indirect calls or jumps if we're compiling in lowCq mode. // Don't emit indirect calls or jumps if we're compiling in lowCq mode. This avoids wasting space on the
// This avoids wasting space on the jump and indirect tables. // jump and indirect tables. Just ask the translator for the function address.
// Just ask the translator for the function address.
EmitBranchFallback(context, address, isJump); EmitBranchFallback(context, address, isJump);
} }
else if (!isConst) else if (!isConst)
@ -376,7 +372,8 @@ namespace ARMeilleure.Instructions
Operand funcAddr = context.Load(OperandType.I64, tableEntryPtr); Operand funcAddr = context.Load(OperandType.I64, tableEntryPtr);
EmitNativeCallWithGuestAddress(context, funcAddr, address, isJump); // Call the function directly. If it's not present yet, this will call the direct call stub. // Call the function directly. If it's not present yet, this will call the direct call stub.
EmitNativeCallWithGuestAddress(context, funcAddr, address, isJump);
} }
} }
} }

View file

@ -183,7 +183,7 @@ namespace ARMeilleure.Translation
Logger.StartPass(PassName.Decoding); Logger.StartPass(PassName.Decoding);
Block[] blocks = Decoder.DecodeFunction(memory, address, mode, highCq); Block[] blocks = Decoder.Decode(memory, address, mode, highCq, singleBlock: false);
Logger.EndPass(PassName.Decoding); Logger.EndPass(PassName.Decoding);
@ -242,6 +242,12 @@ namespace ARMeilleure.Translation
context.MarkLabel(context.GetLabel(block.Address)); context.MarkLabel(context.GetLabel(block.Address));
if (block.Exit)
{
InstEmitFlowHelper.EmitTailContinue(context, Const(block.Address), block.TailCall);
}
else
{
for (int opcIndex = 0; opcIndex < block.OpCodes.Count; opcIndex++) for (int opcIndex = 0; opcIndex < block.OpCodes.Count; opcIndex++)
{ {
OpCode opCode = block.OpCodes[opcIndex]; OpCode opCode = block.OpCodes[opcIndex];
@ -250,7 +256,7 @@ namespace ARMeilleure.Translation
bool isLastOp = opcIndex == block.OpCodes.Count - 1; bool isLastOp = opcIndex == block.OpCodes.Count - 1;
if (isLastOp && block.Branch != null && block.Branch.Address <= block.Address) if (isLastOp && block.Branch != null && !block.Branch.Exit && block.Branch.Address <= block.Address)
{ {
EmitSynchronization(context); EmitSynchronization(context);
} }
@ -276,15 +282,6 @@ namespace ARMeilleure.Translation
if (lblPredicateSkip != null) if (lblPredicateSkip != null)
{ {
context.MarkLabel(lblPredicateSkip); context.MarkLabel(lblPredicateSkip);
// If this is the last op on the block, and there's no "next" block
// after this one, then we have to return right now, with the address
// of the next instruction to be executed (in the case that the condition
// is false, and the branch was not taken, as all basic blocks should end
// with some kind of branch).
if (isLastOp && block.Next == null)
{
InstEmitFlowHelper.EmitTailContinue(context, Const(opCode.Address + (ulong)opCode.OpCodeSizeInBytes));
} }
} }
} }