WIP sparse stuff

This commit is contained in:
riperiperi 2024-06-23 18:59:45 +01:00
parent acff1d8fa8
commit a3199f0b54
8 changed files with 303 additions and 73 deletions

View file

@ -1,7 +1,10 @@
using ARMeilleure.Diagnostics; using ARMeilleure.Diagnostics;
using Ryujinx.Memory;
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
using System.Threading;
namespace ARMeilleure.Common namespace ARMeilleure.Common
{ {
@ -11,6 +14,12 @@ namespace ARMeilleure.Common
/// <typeparam name="TEntry">Type of the value</typeparam> /// <typeparam name="TEntry">Type of the value</typeparam>
public unsafe class AddressTable<TEntry> : IDisposable where TEntry : unmanaged public unsafe class AddressTable<TEntry> : IDisposable where TEntry : unmanaged
{ {
/// <summary>
/// If true, the sparse 2-level table should be used to improve performance.
/// If false, the platform doesn't properly support it, or will be negatively impacted.
/// </summary>
public static bool UseSparseTable => true;
/// <summary> /// <summary>
/// Represents a level in an <see cref="AddressTable{TEntry}"/>. /// Represents a level in an <see cref="AddressTable{TEntry}"/>.
/// </summary> /// </summary>
@ -53,12 +62,33 @@ namespace ARMeilleure.Common
} }
} }
private readonly struct AddressTablePage
{
public readonly bool IsSparse;
public readonly IntPtr Address;
public AddressTablePage(bool isSparse, IntPtr address)
{
IsSparse = isSparse;
Address = address;
}
}
private bool _disposed; private bool _disposed;
private TEntry** _table; private TEntry** _table;
private readonly List<IntPtr> _pages; private readonly List<AddressTablePage> _pages;
private readonly TEntry* _fallbackTable;
private TEntry _fill; private TEntry _fill;
private readonly bool _sparse;
private readonly MemoryBlock _sparseFill;
private readonly SparseMemoryBlock _fillBottomLevel;
private readonly TEntry* _fillBottomLevelPtr;
private readonly List<SparseMemoryBlock> _sparseReserved;
private readonly ulong _sparseBlockSize;
private readonly ReaderWriterLockSlim _sparseLock;
private ulong _sparseReservedOffset;
/// <summary> /// <summary>
/// Gets the bits used by the <see cref="Levels"/> of the <see cref="AddressTable{TEntry}"/> instance. /// Gets the bits used by the <see cref="Levels"/> of the <see cref="AddressTable{TEntry}"/> instance.
/// </summary> /// </summary>
@ -80,8 +110,7 @@ namespace ARMeilleure.Common
} }
set set
{ {
*_fallbackTable = value; UpdateFill(value);
_fill = value;
} }
} }
@ -102,26 +131,15 @@ namespace ARMeilleure.Common
} }
} }
/// <summary>
/// Gets a pointer to a single entry table containing only the leaf fill value.
/// </summary>
public IntPtr Fallback
{
get
{
ObjectDisposedException.ThrowIf(_disposed, this);
return (IntPtr)_fallbackTable;
}
}
/// <summary> /// <summary>
/// Constructs a new instance of the <see cref="AddressTable{TEntry}"/> class with the specified list of /// Constructs a new instance of the <see cref="AddressTable{TEntry}"/> class with the specified list of
/// <see cref="Level"/>. /// <see cref="Level"/>.
/// </summary> /// </summary>
/// <param name="levels">Levels for the address table</param>
/// <param name="sparse">True if the bottom page should be sparsely mapped</param>
/// <exception cref="ArgumentNullException"><paramref name="levels"/> is null</exception> /// <exception cref="ArgumentNullException"><paramref name="levels"/> is null</exception>
/// <exception cref="ArgumentException">Length of <paramref name="levels"/> is less than 2</exception> /// <exception cref="ArgumentException">Length of <paramref name="levels"/> is less than 2</exception>
public AddressTable(Level[] levels) public AddressTable(Level[] levels, bool sparse)
{ {
ArgumentNullException.ThrowIfNull(levels); ArgumentNullException.ThrowIfNull(levels);
@ -130,7 +148,7 @@ namespace ARMeilleure.Common
throw new ArgumentException("Table must be at least 2 levels deep.", nameof(levels)); throw new ArgumentException("Table must be at least 2 levels deep.", nameof(levels));
} }
_pages = new List<IntPtr>(capacity: 16); _pages = new List<AddressTablePage>(capacity: 16);
Levels = levels; Levels = levels;
Mask = 0; Mask = 0;
@ -140,7 +158,35 @@ namespace ARMeilleure.Common
Mask |= level.Mask; Mask |= level.Mask;
} }
_fallbackTable = (TEntry*)NativeAllocator.Instance.Allocate((ulong)sizeof(TEntry)); _sparse = sparse;
if (sparse)
{
// If the address table is sparse, allocate a fill block
_sparseFill = new MemoryBlock(65536, MemoryAllocationFlags.Mirrorable);
ulong bottomLevelSize = (1ul << levels.Last().Length) * (ulong)sizeof(TEntry);
_fillBottomLevel = new SparseMemoryBlock(bottomLevelSize, null, _sparseFill);
_fillBottomLevelPtr = (TEntry*)_fillBottomLevel.Block.Pointer;
_sparseReserved = new List<SparseMemoryBlock>();
_sparseLock = new ReaderWriterLockSlim();
_sparseBlockSize = bottomLevelSize << 3;
}
}
private void UpdateFill(TEntry fillValue)
{
if (_sparseFill != null)
{
Span<byte> span = _sparseFill.GetSpan(0, (int)_sparseFill.Size);
MemoryMarshal.Cast<byte, TEntry>(span).Fill(fillValue);
}
_fill = fillValue;
} }
/// <summary> /// <summary>
@ -172,7 +218,13 @@ namespace ARMeilleure.Common
lock (_pages) lock (_pages)
{ {
return ref GetPage(address)[Levels[^1].GetValue(address)]; TEntry* page = GetPage(address);
int index = Levels[^1].GetValue(address);
EnsureMapped((IntPtr)(page + index));
return ref page[index];
} }
} }
@ -190,13 +242,18 @@ namespace ARMeilleure.Common
ref Level level = ref Levels[i]; ref Level level = ref Levels[i];
ref TEntry* nextPage = ref page[level.GetValue(address)]; ref TEntry* nextPage = ref page[level.GetValue(address)];
if (nextPage == null) if (nextPage == null || nextPage == _fillBottomLevelPtr)
{ {
ref Level nextLevel = ref Levels[i + 1]; ref Level nextLevel = ref Levels[i + 1];
nextPage = i == Levels.Length - 2 ? if (i == Levels.Length - 2)
(TEntry*)Allocate(1 << nextLevel.Length, Fill, leaf: true) : {
(TEntry*)Allocate(1 << nextLevel.Length, IntPtr.Zero, leaf: false); nextPage = (TEntry*)Allocate(1 << nextLevel.Length, Fill, leaf: true);
}
else
{
nextPage = (TEntry*)Allocate(1 << nextLevel.Length, GetFillValue(i), leaf: false);
}
} }
page = (TEntry**)nextPage; page = (TEntry**)nextPage;
@ -205,6 +262,46 @@ namespace ARMeilleure.Common
return (TEntry*)page; return (TEntry*)page;
} }
private void EnsureMapped(IntPtr ptr)
{
if (_sparse)
{
// Check sparse allocations to see if the pointer is in any of them.
// Ensure the page is committed if there's a match.
_sparseLock.EnterReadLock();
try
{
foreach (SparseMemoryBlock sparse in _sparseReserved)
{
if (ptr >= sparse.Block.Pointer && ptr < sparse.Block.Pointer + (IntPtr)sparse.Block.Size)
{
sparse.EnsureMapped((ulong)(ptr - sparse.Block.Pointer));
break;
}
}
}
finally
{
_sparseLock.ExitReadLock();
}
}
}
private IntPtr GetFillValue(int level)
{
if (_fillBottomLevel != null && level == Levels.Length - 2)
{
return (IntPtr)_fillBottomLevelPtr;
}
else
{
return IntPtr.Zero;
}
}
/// <summary> /// <summary>
/// Lazily initialize and get the root page of the <see cref="AddressTable{TEntry}"/>. /// Lazily initialize and get the root page of the <see cref="AddressTable{TEntry}"/>.
/// </summary> /// </summary>
@ -213,12 +310,17 @@ namespace ARMeilleure.Common
{ {
if (_table == null) if (_table == null)
{ {
_table = (TEntry**)Allocate(1 << Levels[0].Length, fill: IntPtr.Zero, leaf: false); _table = (TEntry**)Allocate(1 << Levels[0].Length, GetFillValue(0), leaf: false);
} }
return _table; return _table;
} }
private void InitLeafPage(Span<byte> page)
{
MemoryMarshal.Cast<byte, TEntry>(page).Fill(_fill);
}
/// <summary> /// <summary>
/// Allocates a block of memory of the specified type and length. /// Allocates a block of memory of the specified type and length.
/// </summary> /// </summary>
@ -230,16 +332,42 @@ namespace ARMeilleure.Common
private IntPtr Allocate<T>(int length, T fill, bool leaf) where T : unmanaged private IntPtr Allocate<T>(int length, T fill, bool leaf) where T : unmanaged
{ {
var size = sizeof(T) * length; var size = sizeof(T) * length;
var page = (IntPtr)NativeAllocator.Instance.Allocate((uint)size);
var span = new Span<T>((void*)page, length);
AddressTablePage page;
if (_sparse && leaf)
{
_sparseLock.EnterWriteLock();
if (_sparseReserved.Count == 0 || _sparseReservedOffset == _sparseBlockSize)
{
_sparseReserved.Add(new SparseMemoryBlock(_sparseBlockSize, InitLeafPage, _sparseFill));
_sparseReservedOffset = 0;
}
SparseMemoryBlock block = _sparseReserved.Last();
page = new AddressTablePage(true, block.Block.Pointer + (IntPtr)_sparseReservedOffset);
_sparseReservedOffset += (ulong)size;
_sparseLock.ExitWriteLock();
}
else
{
var address = (IntPtr)NativeAllocator.Instance.Allocate((uint)size);
page = new AddressTablePage(false, address);
var span = new Span<T>((void*)page.Address, length);
span.Fill(fill); span.Fill(fill);
}
_pages.Add(page); _pages.Add(page);
TranslatorEventSource.Log.AddressTableAllocated(size, leaf); TranslatorEventSource.Log.AddressTableAllocated(size, leaf);
return page; return page.Address;
} }
/// <summary> /// <summary>
@ -262,10 +390,23 @@ namespace ARMeilleure.Common
{ {
foreach (var page in _pages) foreach (var page in _pages)
{ {
Marshal.FreeHGlobal(page); if (!page.IsSparse)
{
Marshal.FreeHGlobal(page.Address);
}
} }
Marshal.FreeHGlobal((IntPtr)_fallbackTable); if (_sparse)
{
foreach (SparseMemoryBlock block in _sparseReserved)
{
block.Dispose();
}
_fillBottomLevel.Dispose();
_sparseFill.Dispose();
_sparseLock.Dispose();
}
_disposed = true; _disposed = true;
} }

View file

@ -231,18 +231,7 @@ namespace ARMeilleure.Instructions
Const(3) Const(3)
); );
// TODO: could possibly make a fallback page that level 1 is filled with that contains dispatch stub on all pages hostAddress = context.Load(OperandType.I64, context.Add(page, index2));
// Would save this load and the comparisons
// 16MB of the same value is a bit wasteful so it could replicate with remapping.
Operand fallback = !context.HasPtc ?
Const((long)context.FunctionTable.Fallback) :
Const((long)context.FunctionTable.Fallback, Ptc.DispatchFallbackSymbol);
Operand pageIsZero = context.ICompareEqual(page, Const(0L));
// Small trick to keep this branchless - if the page is zero, load a fallback table entry that always contains the dispatch stub.
hostAddress = context.Load(OperandType.I64, context.ConditionalSelect(pageIsZero, fallback, context.Add(page, index2)));
} }
else else
{ {

View file

@ -29,7 +29,7 @@ namespace ARMeilleure.Translation.PTC
private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0";
private const uint InternalVersion = 26950; //! To be incremented manually for each change to the ARMeilleure project. private const uint InternalVersion = 26957; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0"; private const string ActualDir = "0";
private const string BackupDir = "1"; private const string BackupDir = "1";
@ -41,7 +41,6 @@ namespace ARMeilleure.Translation.PTC
public static readonly Symbol CountTableSymbol = new(SymbolType.Special, 2); public static readonly Symbol CountTableSymbol = new(SymbolType.Special, 2);
public static readonly Symbol DispatchStubSymbol = new(SymbolType.Special, 3); public static readonly Symbol DispatchStubSymbol = new(SymbolType.Special, 3);
public static readonly Symbol FunctionTableSymbol = new(SymbolType.Special, 4); public static readonly Symbol FunctionTableSymbol = new(SymbolType.Special, 4);
public static readonly Symbol DispatchFallbackSymbol = new(SymbolType.Special, 5);
private const byte FillingByte = 0x00; private const byte FillingByte = 0x00;
private const CompressionLevel SaveCompressionLevel = CompressionLevel.Fastest; private const CompressionLevel SaveCompressionLevel = CompressionLevel.Fastest;
@ -711,10 +710,6 @@ namespace ARMeilleure.Translation.PTC
{ {
imm = translator.FunctionTable.Base; imm = translator.FunctionTable.Base;
} }
else if (symbol == DispatchFallbackSymbol)
{
imm = translator.FunctionTable.Fallback;
}
if (imm == null) if (imm == null)
{ {

View file

@ -22,8 +22,6 @@ namespace ARMeilleure.Translation
{ {
public class Translator public class Translator
{ {
private const bool UseSparseTable = true;
private static readonly AddressTable<ulong>.Level[] _levels64Bit = private static readonly AddressTable<ulong>.Level[] _levels64Bit =
new AddressTable<ulong>.Level[] new AddressTable<ulong>.Level[]
{ {
@ -88,7 +86,9 @@ namespace ARMeilleure.Translation
AddressTable<ulong>.Level[] levels; AddressTable<ulong>.Level[] levels;
if (UseSparseTable) bool useSparseTable = AddressTable<ulong>.UseSparseTable;
if (useSparseTable)
{ {
levels = for64Bits ? _levels64BitSparse : _levels32BitSparse; levels = for64Bits ? _levels64BitSparse : _levels32BitSparse;
} }
@ -99,7 +99,7 @@ namespace ARMeilleure.Translation
CountTable = new EntryTable<uint>(); CountTable = new EntryTable<uint>();
Functions = new TranslatorCache<TranslatedFunction>(); Functions = new TranslatorCache<TranslatedFunction>();
FunctionTable = new AddressTable<ulong>(levels); FunctionTable = new AddressTable<ulong>(levels, useSparseTable);
Stubs = new TranslatorStubs(FunctionTable); Stubs = new TranslatorStubs(FunctionTable);
FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub; FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub;

View file

@ -214,18 +214,9 @@ namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
asm.Ubfx(indexReg, guestAddress, level1.Index, level1.Length); asm.Ubfx(indexReg, guestAddress, level1.Index, level1.Length);
asm.Lsl(indexReg, indexReg, Const(3)); asm.Lsl(indexReg, indexReg, Const(3));
// Is the page address zero? Make sure to use the fallback if it is.
asm.Tst(rn, rn);
// Index into the page. // Index into the page.
asm.Add(rn, rn, indexReg); asm.Add(rn, rn, indexReg);
// Reuse the index register for the fallback
ulong fallback = (ulong)funcTable.Fallback;
asm.Mov(indexReg, fallback);
asm.Csel(rn, indexReg, rn, ArmCondition.Eq);
// Load the final branch address // Load the final branch address
asm.LdrRiUn(rn, rn, 0); asm.LdrRiUn(rn, rn, 0);

View file

@ -385,12 +385,6 @@ namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64
// Index into the page. // Index into the page.
asm.Add(rn, rn, indexReg); asm.Add(rn, rn, indexReg);
// Reuse the index register for the fallback
ulong fallback = (ulong)funcTable.Fallback;
asm.Mov(indexReg, fallback);
asm.Csel(rn, indexReg, rn, ArmCondition.Eq);
// Load the final branch address // Load the final branch address
asm.LdrRiUn(rn, rn, 0); asm.LdrRiUn(rn, rn, 0);

View file

@ -16,8 +16,6 @@ namespace Ryujinx.Cpu.LightningJit
{ {
class Translator : IDisposable class Translator : IDisposable
{ {
private const bool UseSparseTable = true;
// Should be enabled on platforms that enforce W^X. // Should be enabled on platforms that enforce W^X.
private static bool IsNoWxPlatform => false; private static bool IsNoWxPlatform => false;
@ -78,9 +76,11 @@ namespace Ryujinx.Cpu.LightningJit
JitCache.Initialize(new JitMemoryAllocator(forJit: true)); JitCache.Initialize(new JitMemoryAllocator(forJit: true));
} }
bool useSparseTable = AddressTable<ulong>.UseSparseTable;
AddressTable<ulong>.Level[] levels; AddressTable<ulong>.Level[] levels;
if (UseSparseTable) if (useSparseTable)
{ {
levels = for64Bits ? _levels64BitSparse : _levels32BitSparse; levels = for64Bits ? _levels64BitSparse : _levels32BitSparse;
} }
@ -90,7 +90,7 @@ namespace Ryujinx.Cpu.LightningJit
} }
Functions = new TranslatorCache<TranslatedFunction>(); Functions = new TranslatorCache<TranslatedFunction>();
FunctionTable = new AddressTable<ulong>(levels); FunctionTable = new AddressTable<ulong>(levels, useSparseTable);
Stubs = new TranslatorStubs(FunctionTable, _noWxCache); Stubs = new TranslatorStubs(FunctionTable, _noWxCache);
FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub; FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub;

View file

@ -0,0 +1,120 @@
using Ryujinx.Common;
using System;
using System.Collections.Generic;
using System.Linq;
namespace Ryujinx.Memory
{
public delegate void PageInitDelegate(Span<byte> page);
public class SparseMemoryBlock : IDisposable
{
private const ulong MapGranularity = 1UL << 17;
private readonly PageInitDelegate _pageInit;
private readonly object _lock = new object();
private readonly ulong _pageSize;
private readonly MemoryBlock _reservedBlock;
private readonly List<MemoryBlock> _mappedBlocks;
private ulong _mappedBlockUsage;
private readonly ulong[] _mappedPageBitmap;
public MemoryBlock Block => _reservedBlock;
public SparseMemoryBlock(ulong size, PageInitDelegate pageInit, MemoryBlock fill)
{
_pageSize = MemoryBlock.GetPageSize();
_reservedBlock = new MemoryBlock(size, MemoryAllocationFlags.Reserve | MemoryAllocationFlags.ViewCompatible);
_mappedBlocks = new List<MemoryBlock>();
_pageInit = pageInit;
int pages = (int)BitUtils.DivRoundUp(size, _pageSize);
int bitmapEntries = BitUtils.DivRoundUp(pages, 64);
_mappedPageBitmap = new ulong[bitmapEntries];
if (fill != null)
{
// Fill the block with mappings from the fill block.
if (fill.Size % _pageSize != 0)
{
throw new ArgumentException("Fill memory block should be page aligned.", nameof(fill));
}
int repeats = (int)BitUtils.DivRoundUp(size, fill.Size);
ulong offset = 0;
for (int i = 0; i < repeats; i++)
{
_reservedBlock.MapView(fill, 0, offset, Math.Min(fill.Size, size - offset));
offset += fill.Size;
}
}
// If a fill block isn't provided, the pages that aren't EnsureMapped are unmapped.
// The caller can rely on signal handler to fill empty pages instead.
}
private void MapPage(ulong pageOffset)
{
// Take a page from the latest mapped block.
MemoryBlock block = _mappedBlocks.LastOrDefault();
if (block == null || _mappedBlockUsage == MapGranularity)
{
// Need to map some more memory.
block = new MemoryBlock(MapGranularity, MemoryAllocationFlags.Mirrorable | MemoryAllocationFlags.NoMap);
_mappedBlocks.Add(block);
_mappedBlockUsage = 0;
}
_reservedBlock.MapView(block, _mappedBlockUsage, pageOffset, _pageSize);
_pageInit(_reservedBlock.GetSpan(pageOffset, (int)_pageSize));
_mappedBlockUsage += _pageSize;
}
public void EnsureMapped(ulong offset)
{
int pageIndex = (int)(offset / _pageSize);
int bitmapIndex = pageIndex >> 6;
ref ulong entry = ref _mappedPageBitmap[bitmapIndex];
ulong bit = 1UL << (pageIndex & 63);
if ((entry & bit) == 0)
{
// Not mapped.
lock (_lock)
{
// Check the bit while locked to make sure that this only happens once.
if ((entry & bit) == 0)
{
MapPage(offset & ~(_pageSize - 1));
entry |= bit;
}
}
}
}
public void Dispose()
{
_reservedBlock.Dispose();
foreach (MemoryBlock block in _mappedBlocks)
{
block.Dispose();
}
GC.SuppressFinalize(this);
}
}
}