From 98ac020097e98a6630e6cfc355883a4556a2d747 Mon Sep 17 00:00:00 2001 From: FICTURE7 Date: Fri, 9 Apr 2021 20:49:15 +0400 Subject: [PATCH] Add on translation call counting --- .../Instructions/InstEmitFlowHelper.cs | 7 +- ARMeilleure/Instructions/NativeInterface.cs | 19 +-- .../OperandHelper.cs | 6 + ARMeilleure/Translation/ArmEmitterContext.cs | 19 ++- ARMeilleure/Translation/Delegates.cs | 2 +- ARMeilleure/Translation/PTC/Ptc.cs | 5 +- ARMeilleure/Translation/Translator.cs | 136 ++++++++++++++---- 7 files changed, 141 insertions(+), 53 deletions(-) diff --git a/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/ARMeilleure/Instructions/InstEmitFlowHelper.cs index 296e20a5e..a0ed5dd4c 100644 --- a/ARMeilleure/Instructions/InstEmitFlowHelper.cs +++ b/ARMeilleure/Instructions/InstEmitFlowHelper.cs @@ -200,7 +200,7 @@ namespace ARMeilleure.Instructions } } - public static void EmitTailContinue(ArmEmitterContext context, Operand address, bool allowRejit) + public static void EmitTailContinue(ArmEmitterContext context, Operand address) { // Left option here as it may be useful if we need to return to managed rather than tail call in future. // (eg. for debug) @@ -218,9 +218,8 @@ namespace ARMeilleure.Instructions { context.StoreToContext(); - Operand fallbackAddr = context.Call(typeof(NativeInterface).GetMethod(allowRejit - ? nameof(NativeInterface.GetFunctionAddress) - : nameof(NativeInterface.GetFunctionAddressWithoutRejit)), address); + Operand fallbackAddr = context.Call( + typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress)), address); EmitNativeCall(context, fallbackAddr, isJump: true); } diff --git a/ARMeilleure/Instructions/NativeInterface.cs b/ARMeilleure/Instructions/NativeInterface.cs index b8b7ff0e8..fa17d3349 100644 --- a/ARMeilleure/Instructions/NativeInterface.cs +++ b/ARMeilleure/Instructions/NativeInterface.cs @@ -220,6 +220,11 @@ namespace ARMeilleure.Instructions } #endregion + public static void EnqueueForRejit(ulong address) + { + Context.Translator.EnqueueForRejit(address, GetContext().ExecutionMode); + } + public static void SignalMemoryTracking(ulong address, ulong size, bool write) { GetMemoryManager().SignalMemoryTracking(address, size, write); @@ -232,24 +237,14 @@ namespace ARMeilleure.Instructions public static ulong GetFunctionAddress(ulong address) { - return GetFunctionAddressWithHint(address, true); - } - - public static ulong GetFunctionAddressWithoutRejit(ulong address) - { - return GetFunctionAddressWithHint(address, false); - } - - private static ulong GetFunctionAddressWithHint(ulong address, bool hintRejit) - { - TranslatedFunction function = Context.Translator.GetOrTranslate(address, GetContext().ExecutionMode, hintRejit); + TranslatedFunction function = Context.Translator.GetOrTranslate(address, GetContext().ExecutionMode); return (ulong)function.FuncPtr.ToInt64(); } public static ulong GetIndirectFunctionAddress(ulong address, ulong entryAddress) { - TranslatedFunction function = Context.Translator.GetOrTranslate(address, GetContext().ExecutionMode, hintRejit: true); + TranslatedFunction function = Context.Translator.GetOrTranslate(address, GetContext().ExecutionMode); ulong ptr = (ulong)function.FuncPtr.ToInt64(); diff --git a/ARMeilleure/IntermediateRepresentation/OperandHelper.cs b/ARMeilleure/IntermediateRepresentation/OperandHelper.cs index 26d664783..6e58fa612 100644 --- a/ARMeilleure/IntermediateRepresentation/OperandHelper.cs +++ b/ARMeilleure/IntermediateRepresentation/OperandHelper.cs @@ -1,4 +1,5 @@ using ARMeilleure.Common; +using System.Runtime.CompilerServices; namespace ARMeilleure.IntermediateRepresentation { @@ -34,6 +35,11 @@ namespace ARMeilleure.IntermediateRepresentation return Operand().With(value); } + public static unsafe Operand Const(ref T reference) + { + return Operand().With((ulong)Unsafe.AsPointer(ref reference)); + } + public static Operand ConstF(float value) { return Operand().With(value); diff --git a/ARMeilleure/Translation/ArmEmitterContext.cs b/ARMeilleure/Translation/ArmEmitterContext.cs index 8f1531922..ac24a45c9 100644 --- a/ARMeilleure/Translation/ArmEmitterContext.cs +++ b/ARMeilleure/Translation/ArmEmitterContext.cs @@ -1,3 +1,4 @@ +using ARMeilleure.Common; using ARMeilleure.Decoders; using ARMeilleure.Instructions; using ARMeilleure.IntermediateRepresentation; @@ -41,18 +42,26 @@ namespace ARMeilleure.Translation public IMemoryManager Memory { get; } public JumpTable JumpTable { get; } + public EntryTable CountTable { get; } public ulong EntryAddress { get; } public bool HighCq { get; } public Aarch32Mode Mode { get; } - public ArmEmitterContext(IMemoryManager memory, JumpTable jumpTable, ulong entryAddress, bool highCq, Aarch32Mode mode) + public ArmEmitterContext( + IMemoryManager memory, + JumpTable jumpTable, + EntryTable countTable, + ulong entryAddress, + bool highCq, + Aarch32Mode mode) { - Memory = memory; - JumpTable = jumpTable; + Memory = memory; + JumpTable = jumpTable; + CountTable = countTable; EntryAddress = entryAddress; - HighCq = highCq; - Mode = mode; + HighCq = highCq; + Mode = mode; _labels = new Dictionary(); } diff --git a/ARMeilleure/Translation/Delegates.cs b/ARMeilleure/Translation/Delegates.cs index 5ad718724..a561d2653 100644 --- a/ARMeilleure/Translation/Delegates.cs +++ b/ARMeilleure/Translation/Delegates.cs @@ -103,6 +103,7 @@ namespace ARMeilleure.Translation SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.Break))); SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.CheckSynchronization))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.EnqueueForRejit))); SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntfrqEl0))); SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntpctEl0))); SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntvctEl0))); @@ -113,7 +114,6 @@ namespace ARMeilleure.Translation SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFpscr))); // A32 only. SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFpsr))); SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress))); - SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddressWithoutRejit))); SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetIndirectFunctionAddress))); SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetTpidr))); SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetTpidr32))); // A32 only. diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index 32e0e7e8c..1e8908bbc 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -1,6 +1,7 @@ using ARMeilleure.CodeGen; using ARMeilleure.CodeGen.Unwinding; using ARMeilleure.CodeGen.X86; +using ARMeilleure.Common; using ARMeilleure.Memory; using ARMeilleure.Translation.Cache; using Ryujinx.Common; @@ -771,7 +772,7 @@ namespace ARMeilleure.Translation.PTC } } - internal static void MakeAndSaveTranslations(ConcurrentDictionary funcs, IMemoryManager memory, JumpTable jumpTable) + internal static void MakeAndSaveTranslations(ConcurrentDictionary funcs, IMemoryManager memory, JumpTable jumpTable, EntryTable countTable) { var profiledFuncsToTranslate = PtcProfiler.GetProfiledFuncsToTranslate(funcs); @@ -813,7 +814,7 @@ namespace ARMeilleure.Translation.PTC Debug.Assert(PtcProfiler.IsAddressInStaticCodeRange(address)); - TranslatedFunction func = Translator.Translate(memory, jumpTable, address, item.mode, item.highCq); + TranslatedFunction func = Translator.Translate(memory, jumpTable, countTable, address, item.mode, item.highCq); bool isAddressUnique = funcs.TryAdd(address, func); diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs index 73a321fd2..7eeeccc5d 100644 --- a/ARMeilleure/Translation/Translator.cs +++ b/ARMeilleure/Translation/Translator.cs @@ -1,3 +1,4 @@ +using ARMeilleure.Common; using ARMeilleure.Decoders; using ARMeilleure.Diagnostics; using ARMeilleure.Instructions; @@ -10,7 +11,6 @@ using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics; -using System.Linq; using System.Runtime; using System.Threading; @@ -22,36 +22,45 @@ namespace ARMeilleure.Translation { public class Translator { + private long _nextUpdate; + private readonly IJitMemoryAllocator _allocator; private readonly IMemoryManager _memory; private readonly ConcurrentDictionary _funcs; private readonly ConcurrentQueue> _oldFuncs; + private readonly ConcurrentDictionary _backgroundSet; private readonly ConcurrentStack _backgroundStack; private readonly AutoResetEvent _backgroundTranslatorEvent; private readonly ReaderWriterLock _backgroundTranslatorLock; private JumpTable _jumpTable; internal JumpTable JumpTable => _jumpTable; + internal EntryTable CountTable { get; } private volatile int _threadCount; // FIXME: Remove this once the init logic of the emulator will be redone. - public static ManualResetEvent IsReadyForTranslation = new ManualResetEvent(false); + public static readonly ManualResetEvent IsReadyForTranslation = new(false); public Translator(IJitMemoryAllocator allocator, IMemoryManager memory) { + _nextUpdate = Stopwatch.GetTimestamp(); + _allocator = allocator; _memory = memory; _funcs = new ConcurrentDictionary(); _oldFuncs = new ConcurrentQueue>(); + _backgroundSet = new ConcurrentDictionary(); _backgroundStack = new ConcurrentStack(); _backgroundTranslatorEvent = new AutoResetEvent(false); _backgroundTranslatorLock = new ReaderWriterLock(); + CountTable = new EntryTable(capacity: 16 * 1024 * 1024); + JitCache.Initialize(allocator); DirectCallStubs.InitializeStubs(); @@ -63,9 +72,16 @@ namespace ARMeilleure.Translation { _backgroundTranslatorLock.AcquireReaderLock(Timeout.Infinite); - if (_backgroundStack.TryPop(out RejitRequest request)) + if (_backgroundStack.TryPop(out RejitRequest request) && + _backgroundSet.TryRemove(request.Address, out _)) { - TranslatedFunction func = Translate(_memory, _jumpTable, request.Address, request.Mode, highCq: true); + TranslatedFunction func = Translate( + _memory, + _jumpTable, + CountTable, + request.Address, + request.Mode, + highCq: true); _funcs.AddOrUpdate(request.Address, func, (key, oldFunc) => { @@ -80,6 +96,26 @@ namespace ARMeilleure.Translation PtcProfiler.UpdateEntry(request.Address, request.Mode, highCq: true); } + var nextUpdate = Interlocked.Exchange(ref _nextUpdate, 0); + + if (nextUpdate != 0) + { + var now = Stopwatch.GetTimestamp(); + + if (now < nextUpdate) + { + _nextUpdate = nextUpdate; + } + else + { + Ryujinx.Common.Logging.Logger.Info?.Print( + Ryujinx.Common.Logging.LogClass.Cpu, + $"{_backgroundStack.Count} rejit requests remaining"); + + _nextUpdate = now + Stopwatch.Frequency * 30; + } + } + _backgroundTranslatorLock.ReleaseReaderLock(); } else @@ -89,7 +125,8 @@ namespace ARMeilleure.Translation } } - _backgroundTranslatorEvent.Set(); // Wake up any other background translator threads, to encourage them to exit. + // Wake up any other background translator threads, to encourage them to exit. + _backgroundTranslatorEvent.Set(); } public void Execute(State.ExecutionContext context, ulong address) @@ -105,17 +142,20 @@ namespace ARMeilleure.Translation { Debug.Assert(_funcs.Count == 0); Ptc.LoadTranslations(_funcs, _memory, _jumpTable); - Ptc.MakeAndSaveTranslations(_funcs, _memory, _jumpTable); + Ptc.MakeAndSaveTranslations(_funcs, _memory, _jumpTable, CountTable); } PtcProfiler.Start(); Ptc.Disable(); - // Simple heuristic, should be user configurable in future. (1 for 4 core/ht or less, 2 for 6 core+ht etc). - // All threads are normal priority except from the last, which just fills as much of the last core as the os lets it with a low priority. - // If we only have one rejit thread, it should be normal priority as highCq code is performance critical. - // TODO: Use physical cores rather than logical. This only really makes sense for processors with hyperthreading. Requires OS specific code. + // Simple heuristic, should be user configurable in future. (1 for 4 core/ht or less, 2 for 6 core + ht + // etc). All threads are normal priority except from the last, which just fills as much of the last core + // as the os lets it with a low priority. If we only have one rejit thread, it should be normal priority + // as highCq code is performance critical. + // + // TODO: Use physical cores rather than logical. This only really makes sense for processors with + // hyperthreading. Requires OS specific code. int unboundedThreadCount = Math.Max(1, (Environment.ProcessorCount - 6) / 3); int threadCount = Math.Min(4, unboundedThreadCount); @@ -173,11 +213,11 @@ namespace ARMeilleure.Translation return nextAddr; } - internal TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode, bool hintRejit = false) + internal TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode) { if (!_funcs.TryGetValue(address, out TranslatedFunction func)) { - func = Translate(_memory, _jumpTable, address, mode, highCq: false); + func = Translate(_memory, _jumpTable, CountTable, address, mode, highCq: false); TranslatedFunction getFunc = _funcs.GetOrAdd(address, func); @@ -193,18 +233,18 @@ namespace ARMeilleure.Translation } } - if (hintRejit && func.ShouldRejit()) - { - _backgroundStack.Push(new RejitRequest(address, mode)); - _backgroundTranslatorEvent.Set(); - } - return func; } - internal static TranslatedFunction Translate(IMemoryManager memory, JumpTable jumpTable, ulong address, ExecutionMode mode, bool highCq) + internal static TranslatedFunction Translate( + IMemoryManager memory, + JumpTable jumpTable, + EntryTable countTable, + ulong address, + ExecutionMode mode, + bool highCq) { - ArmEmitterContext context = new ArmEmitterContext(memory, jumpTable, address, highCq, Aarch32Mode.User); + var context = new ArmEmitterContext(memory, jumpTable, countTable, address, highCq, Aarch32Mode.User); Logger.StartPass(PassName.Decoding); @@ -216,6 +256,11 @@ namespace ARMeilleure.Translation Logger.StartPass(PassName.Translation); + if (!context.HighCq) + { + EmitRejitCheck(context); + } + EmitSynchronization(context); if (blocks[0].Address != address) @@ -320,7 +365,7 @@ namespace ARMeilleure.Translation if (block.Exit) { - InstEmitFlowHelper.EmitTailContinue(context, Const(block.Address), block.TailCall); + InstEmitFlowHelper.EmitTailContinue(context, Const(block.Address)); } else { @@ -368,29 +413,51 @@ namespace ARMeilleure.Translation return context.GetControlFlowGraph(); } + internal static void EmitRejitCheck(ArmEmitterContext context) + { + if (!context.CountTable.TryAllocate(out int index)) + { + return; + } + + Operand lblRejit = Label(); + Operand lblAdd = Label(); + Operand lblEnd = Label(); + + // TODO: PPTC. + Operand address = Const(ref context.CountTable.GetValue(index)); + Operand count = context.Load8(address); + context.BranchIf(lblAdd, count, Const(100), Comparison.LessUI); + context.BranchIf(lblRejit, count, Const(100), Comparison.Equal); + context.Branch(lblEnd); + + context.MarkLabel(lblRejit, BasicBlockFrequency.Cold); + context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.EnqueueForRejit)), Const(context.EntryAddress)); + + context.MarkLabel(lblAdd, BasicBlockFrequency.Cold); + context.Store8(address, context.Add(count, Const(1))); + + context.MarkLabel(lblEnd); + } + internal static void EmitSynchronization(EmitterContext context) { long countOffs = NativeContext.GetCounterOffset(); - Operand countAddr = context.Add(context.LoadArgument(OperandType.I64, 0), Const(countOffs)); - - Operand count = context.Load(OperandType.I32, countAddr); - Operand lblNonZero = Label(); - Operand lblExit = Label(); + Operand lblExit = Label(); + Operand countAddr = context.Add(context.LoadArgument(OperandType.I64, 0), Const(countOffs)); + Operand count = context.Load(OperandType.I32, countAddr); context.BranchIfTrue(lblNonZero, count, BasicBlockFrequency.Cold); Operand running = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.CheckSynchronization))); - context.BranchIfTrue(lblExit, running, BasicBlockFrequency.Cold); context.Return(Const(0L)); context.MarkLabel(lblNonZero); - count = context.Subtract(count, Const(1)); - context.Store(countAddr, count); context.MarkLabel(lblExit); @@ -404,6 +471,15 @@ namespace ARMeilleure.Translation // TODO: Completely remove functions overlapping the specified range from the cache. } + internal void EnqueueForRejit(ulong guestAddress, ExecutionMode mode) + { + if (_backgroundSet.TryAdd(guestAddress, null)) + { + _backgroundStack.Push(new RejitRequest(guestAddress, mode)); + _backgroundTranslatorEvent.Set(); + } + } + private void EnqueueForDeletion(ulong guestAddress, TranslatedFunction func) { _oldFuncs.Enqueue(new KeyValuePair(guestAddress, func.FuncPtr)); @@ -439,6 +515,8 @@ namespace ARMeilleure.Translation { func.ResetCallCount(); } + + _backgroundSet.TryRemove(request.Address, out _); } } else