Ryujinx/src/Ryujinx.Graphics.Vulkan/StagingBuffer.cs
riperiperi 492a046335
Vulkan: Buffer Mirrors for MacOS performance (#4899)
* Initial implementation of buffer mirrors

Generally slower right now, goal is to reduce render passes in games that do inline updates

Fix support buffer mirrors

Reintroduce vertex buffer mirror

Add storage buffer support

Optimisation part 1

More optimisation

Avoid useless data copies.

Remove unused cbIndex stuff

Properly set write flag for storage buffers.

Fix minor issues

Not sure why this was here.

Fix BufferRangeList

Fix some big issues

Align storage buffers rather than getting full buffer as a range

Improves mirrorability of read-only storage buffers

Increase staging buffer size, as it now contains mirrors

Fix some issues with buffers not updating

Fix buffer SetDataUnchecked offset for one of the paths when using mirrors

Fix buffer mirrors interaction with buffer textures

Fix mirror rebinding

Move GetBuffer calls on indirect draws before BeginRenderPass to avoid draws without render pass

Fix mirrors rebase

Fix rebase 2023

* Fix crash when using stale vertex buffer

Similar to `Get` with a size that's too large, just treat it as a clamp.

* Explicitly set support buffer as mirrorable

* Address feedback

* Remove unused fragment of MVK workaround

* Replace logging for staging buffer OOM

* Address format issues

* Address more format issues

* Mini cleanup

* Address more things

* Rename BufferRangeList

* Support bounding range for ClearMirrors and UploadPendingData

* Add maximum size for vertex buffer mirrors

* Enable index buffer mirrors

Enabled on all platforms for the IbStreamer.

* Feedback

* Remove mystery BufferCache change

Probably macos related?

* Fix mirrors not creating when staging buffer is empty.

* Change log level to debug
2023-08-14 14:18:47 -03:00

280 lines
8.8 KiB
C#

using Ryujinx.Common;
using Ryujinx.Common.Logging;
using System;
using System.Collections.Generic;
using System.Diagnostics;
namespace Ryujinx.Graphics.Vulkan
{
readonly struct StagingBufferReserved
{
public readonly BufferHolder Buffer;
public readonly int Offset;
public readonly int Size;
public StagingBufferReserved(BufferHolder buffer, int offset, int size)
{
Buffer = buffer;
Offset = offset;
Size = size;
}
}
class StagingBuffer : IDisposable
{
private const int BufferSize = 32 * 1024 * 1024;
private int _freeOffset;
private int _freeSize;
private readonly VulkanRenderer _gd;
private readonly BufferHolder _buffer;
private readonly struct PendingCopy
{
public FenceHolder Fence { get; }
public int Size { get; }
public PendingCopy(FenceHolder fence, int size)
{
Fence = fence;
Size = size;
fence.Get();
}
}
private readonly Queue<PendingCopy> _pendingCopies;
public StagingBuffer(VulkanRenderer gd, BufferManager bufferManager)
{
_gd = gd;
_buffer = bufferManager.Create(gd, BufferSize);
_pendingCopies = new Queue<PendingCopy>();
_freeSize = BufferSize;
}
public void PushData(CommandBufferPool cbp, CommandBufferScoped? cbs, Action endRenderPass, BufferHolder dst, int dstOffset, ReadOnlySpan<byte> data)
{
bool isRender = cbs != null;
CommandBufferScoped scoped = cbs ?? cbp.Rent();
// Must push all data to the buffer. If it can't fit, split it up.
endRenderPass?.Invoke();
while (data.Length > 0)
{
if (_freeSize < data.Length)
{
FreeCompleted();
}
while (_freeSize == 0)
{
if (!WaitFreeCompleted(cbp))
{
if (isRender)
{
_gd.FlushAllCommands();
scoped = cbp.Rent();
isRender = false;
}
else
{
scoped = cbp.ReturnAndRent(scoped);
}
}
}
int chunkSize = Math.Min(_freeSize, data.Length);
PushDataImpl(scoped, dst, dstOffset, data[..chunkSize]);
dstOffset += chunkSize;
data = data[chunkSize..];
}
if (!isRender)
{
scoped.Dispose();
}
}
private void PushDataImpl(CommandBufferScoped cbs, BufferHolder dst, int dstOffset, ReadOnlySpan<byte> data)
{
var srcBuffer = _buffer.GetBuffer();
var dstBuffer = dst.GetBuffer(cbs.CommandBuffer, dstOffset, data.Length, true);
int offset = _freeOffset;
int capacity = BufferSize - offset;
if (capacity < data.Length)
{
_buffer.SetDataUnchecked(offset, data[..capacity]);
_buffer.SetDataUnchecked(0, data[capacity..]);
BufferHolder.Copy(_gd, cbs, srcBuffer, dstBuffer, offset, dstOffset, capacity);
BufferHolder.Copy(_gd, cbs, srcBuffer, dstBuffer, 0, dstOffset + capacity, data.Length - capacity);
}
else
{
_buffer.SetDataUnchecked(offset, data);
BufferHolder.Copy(_gd, cbs, srcBuffer, dstBuffer, offset, dstOffset, data.Length);
}
_freeOffset = (offset + data.Length) & (BufferSize - 1);
_freeSize -= data.Length;
Debug.Assert(_freeSize >= 0);
_pendingCopies.Enqueue(new PendingCopy(cbs.GetFence(), data.Length));
}
public bool TryPushData(CommandBufferScoped cbs, Action endRenderPass, BufferHolder dst, int dstOffset, ReadOnlySpan<byte> data)
{
if (data.Length > BufferSize)
{
return false;
}
if (_freeSize < data.Length)
{
FreeCompleted();
if (_freeSize < data.Length)
{
return false;
}
}
endRenderPass?.Invoke();
PushDataImpl(cbs, dst, dstOffset, data);
return true;
}
private StagingBufferReserved ReserveDataImpl(CommandBufferScoped cbs, int size, int alignment)
{
// Assumes the caller has already determined that there is enough space.
int offset = BitUtils.AlignUp(_freeOffset, alignment);
int padding = offset - _freeOffset;
int capacity = Math.Min(_freeSize, BufferSize - offset);
int reservedLength = size + padding;
if (capacity < size)
{
offset = 0; // Place at start.
reservedLength += capacity;
}
_freeOffset = (_freeOffset + reservedLength) & (BufferSize - 1);
_freeSize -= reservedLength;
Debug.Assert(_freeSize >= 0);
_pendingCopies.Enqueue(new PendingCopy(cbs.GetFence(), reservedLength));
return new StagingBufferReserved(_buffer, offset, size);
}
private int GetContiguousFreeSize(int alignment)
{
int alignedFreeOffset = BitUtils.AlignUp(_freeOffset, alignment);
int padding = alignedFreeOffset - _freeOffset;
// Free regions:
// - Aligned free offset to end (minimum free size - padding)
// - 0 to _freeOffset + freeSize wrapped (only if free area contains 0)
int endOffset = (_freeOffset + _freeSize) & (BufferSize - 1);
return Math.Max(
Math.Min(_freeSize - padding, BufferSize - alignedFreeOffset),
endOffset <= _freeOffset ? Math.Min(_freeSize, endOffset) : 0
);
}
/// <summary>
/// Reserve a range on the staging buffer for the current command buffer and upload data to it.
/// </summary>
/// <param name="cbs">Command buffer to reserve the data on</param>
/// <param name="data">The data to upload</param>
/// <param name="alignment">The required alignment for the buffer offset</param>
/// <returns>The reserved range of the staging buffer</returns>
public unsafe StagingBufferReserved? TryReserveData(CommandBufferScoped cbs, int size, int alignment)
{
if (size > BufferSize)
{
return null;
}
// Temporary reserved data cannot be fragmented.
if (GetContiguousFreeSize(alignment) < size)
{
FreeCompleted();
if (GetContiguousFreeSize(alignment) < size)
{
Logger.Debug?.PrintMsg(LogClass.Gpu, $"Staging buffer out of space to reserve data of size {size}.");
return null;
}
}
return ReserveDataImpl(cbs, size, alignment);
}
private bool WaitFreeCompleted(CommandBufferPool cbp)
{
if (_pendingCopies.TryPeek(out var pc))
{
if (!pc.Fence.IsSignaled())
{
if (cbp.IsFenceOnRentedCommandBuffer(pc.Fence))
{
return false;
}
pc.Fence.Wait();
}
var dequeued = _pendingCopies.Dequeue();
Debug.Assert(dequeued.Fence == pc.Fence);
_freeSize += pc.Size;
pc.Fence.Put();
}
return true;
}
private void FreeCompleted()
{
FenceHolder signalledFence = null;
while (_pendingCopies.TryPeek(out var pc) && (pc.Fence == signalledFence || pc.Fence.IsSignaled()))
{
signalledFence = pc.Fence; // Already checked - don't need to do it again.
var dequeued = _pendingCopies.Dequeue();
Debug.Assert(dequeued.Fence == pc.Fence);
_freeSize += pc.Size;
pc.Fence.Put();
}
}
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
_buffer.Dispose();
while (_pendingCopies.TryDequeue(out var pc))
{
pc.Fence.Put();
}
}
}
public void Dispose()
{
Dispose(true);
}
}
}