D32FS8 to D24S8 Conversion
This commit is contained in:
parent
f06c869df1
commit
d7c71c8a7b
6 changed files with 233 additions and 14 deletions
49
src/Ryujinx.Graphics.Metal/FormatConverter.cs
Normal file
49
src/Ryujinx.Graphics.Metal/FormatConverter.cs
Normal file
|
@ -0,0 +1,49 @@
|
|||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Graphics.Metal
|
||||
{
|
||||
class FormatConverter
|
||||
{
|
||||
public static void ConvertD24S8ToD32FS8(Span<byte> output, ReadOnlySpan<byte> input)
|
||||
{
|
||||
const float UnormToFloat = 1f / 0xffffff;
|
||||
|
||||
Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output);
|
||||
ReadOnlySpan<uint> inputUint = MemoryMarshal.Cast<byte, uint>(input);
|
||||
|
||||
int i = 0;
|
||||
|
||||
for (; i < inputUint.Length; i++)
|
||||
{
|
||||
uint depthStencil = inputUint[i];
|
||||
uint depth = depthStencil >> 8;
|
||||
uint stencil = depthStencil & 0xff;
|
||||
|
||||
int j = i * 2;
|
||||
|
||||
outputUint[j] = (uint)BitConverter.SingleToInt32Bits(depth * UnormToFloat);
|
||||
outputUint[j + 1] = stencil;
|
||||
}
|
||||
}
|
||||
|
||||
public static void ConvertD32FS8ToD24S8(Span<byte> output, ReadOnlySpan<byte> input)
|
||||
{
|
||||
Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output);
|
||||
ReadOnlySpan<uint> inputUint = MemoryMarshal.Cast<byte, uint>(input);
|
||||
|
||||
int i = 0;
|
||||
|
||||
for (; i < inputUint.Length; i += 2)
|
||||
{
|
||||
float depth = BitConverter.Int32BitsToSingle((int)inputUint[i]);
|
||||
uint stencil = inputUint[i + 1];
|
||||
uint depthStencil = (Math.Clamp((uint)(depth * 0xffffff), 0, 0xffffff) << 8) | (stencil & 0xff);
|
||||
|
||||
int j = i >> 1;
|
||||
|
||||
outputUint[j] = depthStencil;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -172,21 +172,25 @@ namespace Ryujinx.Graphics.Metal
|
|||
{
|
||||
var mtlFormat = _table[(int)format];
|
||||
|
||||
if (mtlFormat == MTLPixelFormat.Depth24UnormStencil8)
|
||||
if (IsD24S8(format))
|
||||
{
|
||||
if (!MTLDevice.CreateSystemDefaultDevice().Depth24Stencil8PixelFormatSupported)
|
||||
{
|
||||
Logger.Error?.PrintMsg(LogClass.Gpu, "Application requested Depth24Stencil8, which is unsupported on this device!");
|
||||
mtlFormat = MTLPixelFormat.Depth32FloatStencil8;
|
||||
}
|
||||
}
|
||||
|
||||
if (mtlFormat == MTLPixelFormat.Invalid)
|
||||
{
|
||||
Logger.Error?.PrintMsg(LogClass.Gpu, $"Application requested {format}, no direct equivalent was found!");
|
||||
Logger.Error?.PrintMsg(LogClass.Gpu, $"Format {format} is not supported by the host.");
|
||||
}
|
||||
|
||||
return mtlFormat;
|
||||
}
|
||||
|
||||
public static bool IsD24S8(Format format)
|
||||
{
|
||||
return format == Format.D24UnormS8Uint || format == Format.S8UintD24Unorm || format == Format.X8UintD24Unorm;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ namespace Ryujinx.Graphics.Metal
|
|||
private readonly List<IProgram> _programsColorClearU = new();
|
||||
private readonly IProgram _programDepthStencilClear;
|
||||
private readonly IProgram _programStrideChange;
|
||||
private readonly IProgram _programConvertD32S8ToD24S8;
|
||||
private readonly IProgram _programDepthBlit;
|
||||
private readonly IProgram _programDepthBlitMs;
|
||||
private readonly IProgram _programStencilBlit;
|
||||
|
@ -151,6 +152,17 @@ namespace Ryujinx.Graphics.Metal
|
|||
new ShaderSource(strideChangeSource, ShaderStage.Compute, TargetLanguage.Msl)
|
||||
], strideChangeResourceLayout, device, new ComputeSize(64, 1, 1));
|
||||
|
||||
var convertD32S8ToD24S8ResourceLayout = new ResourceLayoutBuilder()
|
||||
.Add(ResourceStages.Compute, ResourceType.UniformBuffer, 0)
|
||||
.Add(ResourceStages.Compute, ResourceType.StorageBuffer, 1)
|
||||
.Add(ResourceStages.Compute, ResourceType.StorageBuffer, 2, true).Build();
|
||||
|
||||
var convertD32S8ToD24S8Source = ReadMsl("ConvertD32S8ToD24S8.metal");
|
||||
_programConvertD32S8ToD24S8 = new Program(
|
||||
[
|
||||
new ShaderSource(convertD32S8ToD24S8Source, ShaderStage.Compute, TargetLanguage.Msl)
|
||||
], convertD32S8ToD24S8ResourceLayout, device, new ComputeSize(64, 1, 1));
|
||||
|
||||
var depthBlitSource = ReadMsl("DepthBlit.metal");
|
||||
_programDepthBlit = new Program(
|
||||
[
|
||||
|
@ -591,6 +603,39 @@ namespace Ryujinx.Graphics.Metal
|
|||
_pipeline.SwapState(null);
|
||||
}
|
||||
|
||||
public unsafe void ConvertD32S8ToD24S8(CommandBufferScoped cbs, BufferHolder src, Auto<DisposableBuffer> dstBuffer, int pixelCount, int dstOffset)
|
||||
{
|
||||
int inSize = pixelCount * 2 * sizeof(int);
|
||||
|
||||
var srcBuffer = src.GetBuffer();
|
||||
|
||||
const int ParamsBufferSize = sizeof(int) * 2;
|
||||
|
||||
// Save current state
|
||||
_pipeline.SwapState(_helperShaderState);
|
||||
|
||||
Span<int> shaderParams = stackalloc int[2];
|
||||
|
||||
shaderParams[0] = pixelCount;
|
||||
shaderParams[1] = dstOffset;
|
||||
|
||||
using var buffer = _renderer.BufferManager.ReserveOrCreate(cbs, ParamsBufferSize);
|
||||
buffer.Holder.SetDataUnchecked<int>(buffer.Offset, shaderParams);
|
||||
_pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]);
|
||||
|
||||
Span<Auto<DisposableBuffer>> sbRanges = new Auto<DisposableBuffer>[2];
|
||||
|
||||
sbRanges[0] = srcBuffer;
|
||||
sbRanges[1] = dstBuffer;
|
||||
_pipeline.SetStorageBuffers(1, sbRanges);
|
||||
|
||||
_pipeline.SetProgram(_programConvertD32S8ToD24S8);
|
||||
_pipeline.DispatchCompute(1 + inSize / ConvertElementsPerWorkgroup, 1, 1, "D32S8 to D24S8 Conversion");
|
||||
|
||||
// Restore previous state
|
||||
_pipeline.SwapState(null);
|
||||
}
|
||||
|
||||
public unsafe void ClearColor(
|
||||
int index,
|
||||
ReadOnlySpan<float> clearColor,
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
<EmbeddedResource Include="Shaders\Blit.metal" />
|
||||
<EmbeddedResource Include="Shaders\BlitMs.metal" />
|
||||
<EmbeddedResource Include="Shaders\ChangeBufferStride.metal" />
|
||||
<EmbeddedResource Include="Shaders\ConvertD32S8ToD24S8.metal" />
|
||||
<EmbeddedResource Include="Shaders\ColorClear.metal" />
|
||||
<EmbeddedResource Include="Shaders\DepthStencilClear.metal" />
|
||||
<EmbeddedResource Include="Shaders\DepthBlit.metal" />
|
||||
|
|
66
src/Ryujinx.Graphics.Metal/Shaders/ConvertD32S8ToD24S8.metal
Normal file
66
src/Ryujinx.Graphics.Metal/Shaders/ConvertD32S8ToD24S8.metal
Normal file
|
@ -0,0 +1,66 @@
|
|||
#include <metal_stdlib>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct StrideArguments {
|
||||
int pixelCount;
|
||||
int dstStartOffset;
|
||||
};
|
||||
|
||||
struct InData {
|
||||
uint data[1];
|
||||
};
|
||||
|
||||
struct OutData {
|
||||
uint data[1];
|
||||
};
|
||||
|
||||
struct ConstantBuffers {
|
||||
constant StrideArguments* stride_arguments;
|
||||
};
|
||||
|
||||
struct StorageBuffers {
|
||||
device InData* in_data;
|
||||
device OutData* out_data;
|
||||
};
|
||||
|
||||
kernel void kernelMain(constant ConstantBuffers &constant_buffers [[buffer(CONSTANT_BUFFERS_INDEX)]],
|
||||
device StorageBuffers &storage_buffers [[buffer(STORAGE_BUFFERS_INDEX)]],
|
||||
uint3 thread_position_in_grid [[thread_position_in_grid]],
|
||||
uint3 threads_per_threadgroup [[threads_per_threadgroup]],
|
||||
uint3 threadgroups_per_grid [[threads_per_grid]])
|
||||
{
|
||||
// Determine what slice of the stride copies this invocation will perform.
|
||||
int invocations = int(threads_per_threadgroup.x * threadgroups_per_grid.x);
|
||||
|
||||
int copiesRequired = constant_buffers.stride_arguments->pixelCount;
|
||||
|
||||
// Find the copies that this invocation should perform.
|
||||
|
||||
// - Copies that all invocations perform.
|
||||
int allInvocationCopies = copiesRequired / invocations;
|
||||
|
||||
// - Extra remainder copy that this invocation performs.
|
||||
int index = int(thread_position_in_grid.x);
|
||||
int extra = (index < (copiesRequired % invocations)) ? 1 : 0;
|
||||
|
||||
int copyCount = allInvocationCopies + extra;
|
||||
|
||||
// Finally, get the starting offset. Make sure to count extra copies.
|
||||
|
||||
int startCopy = allInvocationCopies * index + min(copiesRequired % invocations, index);
|
||||
|
||||
int srcOffset = startCopy * 2;
|
||||
int dstOffset = constant_buffers.stride_arguments->dstStartOffset + startCopy;
|
||||
|
||||
// Perform the conversion for this region.
|
||||
for (int i = 0; i < copyCount; i++)
|
||||
{
|
||||
float depth = as_type<float>(storage_buffers.in_data->data[srcOffset++]);
|
||||
uint stencil = storage_buffers.in_data->data[srcOffset++];
|
||||
|
||||
uint rescaledDepth = uint(clamp(depth, 0.0, 1.0) * 16777215.0);
|
||||
|
||||
storage_buffers.out_data->data[dstOffset++] = (rescaledDepth << 8) | (stencil & 0xff);
|
||||
}
|
||||
}
|
|
@ -277,9 +277,18 @@ namespace Ryujinx.Graphics.Metal
|
|||
var autoBuffer = Renderer.BufferManager.GetBuffer(range.Handle, true);
|
||||
var mtlBuffer = autoBuffer.Get(cbs, range.Offset, outSize).Value;
|
||||
|
||||
// TODO: D32S8 conversion via temp copy holder
|
||||
if (PrepareOutputBuffer(cbs, hostSize, mtlBuffer, out MTLBuffer copyToBuffer, out BufferHolder tempCopyHolder))
|
||||
{
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
CopyFromOrToBuffer(cbs, mtlBuffer, MtlTexture, hostSize, true, layer, level, 1, 1, singleSlice: true, offset: offset, stride: stride);
|
||||
CopyFromOrToBuffer(cbs, copyToBuffer, MtlTexture, hostSize, true, layer, level, 1, 1, singleSlice: true, offset, stride);
|
||||
|
||||
if (tempCopyHolder != null)
|
||||
{
|
||||
CopyDataToOutputBuffer(cbs, tempCopyHolder, autoBuffer, hostSize, range.Offset);
|
||||
tempCopyHolder.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
public ITexture CreateView(TextureCreateInfo info, int firstLayer, int firstLevel)
|
||||
|
@ -287,27 +296,62 @@ namespace Ryujinx.Graphics.Metal
|
|||
return new Texture(Device, Renderer, Pipeline, info, _identitySwizzleHandle, firstLayer, firstLevel);
|
||||
}
|
||||
|
||||
private int GetBufferDataLength(int size)
|
||||
{
|
||||
// TODO: D32S8 conversion
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
private void CopyDataToBuffer(Span<byte> storage, ReadOnlySpan<byte> input)
|
||||
{
|
||||
// TODO: D32S8 conversion
|
||||
if (NeedsD24S8Conversion())
|
||||
{
|
||||
FormatConverter.ConvertD24S8ToD32FS8(storage, input);
|
||||
return;
|
||||
}
|
||||
|
||||
input.CopyTo(storage);
|
||||
}
|
||||
|
||||
private ReadOnlySpan<byte> GetDataFromBuffer(ReadOnlySpan<byte> storage, int size, Span<byte> output)
|
||||
{
|
||||
// TODO: D32S8 conversion
|
||||
if (NeedsD24S8Conversion())
|
||||
{
|
||||
if (output.IsEmpty)
|
||||
{
|
||||
output = new byte[GetBufferDataLength(size)];
|
||||
}
|
||||
|
||||
FormatConverter.ConvertD32FS8ToD24S8(output, storage);
|
||||
return output;
|
||||
}
|
||||
|
||||
return storage;
|
||||
}
|
||||
|
||||
private bool PrepareOutputBuffer(CommandBufferScoped cbs, int hostSize, MTLBuffer target, out MTLBuffer copyTarget, out BufferHolder copyTargetHolder)
|
||||
{
|
||||
if (NeedsD24S8Conversion())
|
||||
{
|
||||
copyTargetHolder = Renderer.BufferManager.Create(hostSize);
|
||||
copyTarget = copyTargetHolder.GetBuffer().Get(cbs, 0, hostSize).Value;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
copyTarget = target;
|
||||
copyTargetHolder = null;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private void CopyDataToOutputBuffer(CommandBufferScoped cbs, BufferHolder hostData, Auto<DisposableBuffer> copyTarget, int hostSize, int dstOffset)
|
||||
{
|
||||
if (NeedsD24S8Conversion())
|
||||
{
|
||||
Renderer.HelperShader.ConvertD32S8ToD24S8(cbs, hostData, copyTarget, hostSize / (2 * sizeof(int)), dstOffset);
|
||||
}
|
||||
}
|
||||
|
||||
private bool NeedsD24S8Conversion()
|
||||
{
|
||||
return FormatTable.IsD24S8(Info.Format) && MtlFormat == MTLPixelFormat.Depth32FloatStencil8;
|
||||
}
|
||||
|
||||
public void CopyFromOrToBuffer(
|
||||
CommandBufferScoped cbs,
|
||||
MTLBuffer buffer,
|
||||
|
@ -564,6 +608,16 @@ namespace Ryujinx.Graphics.Metal
|
|||
buffer.Dispose();
|
||||
}
|
||||
|
||||
private int GetBufferDataLength(int length)
|
||||
{
|
||||
if (NeedsD24S8Conversion())
|
||||
{
|
||||
return length * 2;
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
public void SetStorage(BufferRange buffer)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
|
|
Loading…
Reference in a new issue