D32FS8 to D24S8 Conversion

This commit is contained in:
Isaac Marovitz 2024-08-13 17:03:19 +01:00 committed by Isaac Marovitz
parent f06c869df1
commit d7c71c8a7b
6 changed files with 233 additions and 14 deletions

View file

@ -0,0 +1,49 @@
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Metal
class FormatConverter
public static void ConvertD24S8ToD32FS8(Span<byte> output, ReadOnlySpan<byte> input)
const float UnormToFloat = 1f / 0xffffff;
Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output);
ReadOnlySpan<uint> inputUint = MemoryMarshal.Cast<byte, uint>(input);
int i = 0;
for (; i < inputUint.Length; i++)
uint depthStencil = inputUint[i];
uint depth = depthStencil >> 8;
uint stencil = depthStencil & 0xff;
int j = i * 2;
outputUint[j] = (uint)BitConverter.SingleToInt32Bits(depth * UnormToFloat);
outputUint[j + 1] = stencil;
public static void ConvertD32FS8ToD24S8(Span<byte> output, ReadOnlySpan<byte> input)
Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output);
ReadOnlySpan<uint> inputUint = MemoryMarshal.Cast<byte, uint>(input);
int i = 0;
for (; i < inputUint.Length; i += 2)
float depth = BitConverter.Int32BitsToSingle((int)inputUint[i]);
uint stencil = inputUint[i + 1];
uint depthStencil = (Math.Clamp((uint)(depth * 0xffffff), 0, 0xffffff) << 8) | (stencil & 0xff);
int j = i >> 1;
outputUint[j] = depthStencil;

View file

@ -172,21 +172,25 @@ namespace Ryujinx.Graphics.Metal
var mtlFormat = _table[(int)format];
if (mtlFormat == MTLPixelFormat.Depth24UnormStencil8)
if (IsD24S8(format))
if (!MTLDevice.CreateSystemDefaultDevice().Depth24Stencil8PixelFormatSupported)
Logger.Error?.PrintMsg(LogClass.Gpu, "Application requested Depth24Stencil8, which is unsupported on this device!");
mtlFormat = MTLPixelFormat.Depth32FloatStencil8;
if (mtlFormat == MTLPixelFormat.Invalid)
Logger.Error?.PrintMsg(LogClass.Gpu, $"Application requested {format}, no direct equivalent was found!");
Logger.Error?.PrintMsg(LogClass.Gpu, $"Format {format} is not supported by the host.");
return mtlFormat;
public static bool IsD24S8(Format format)
return format == Format.D24UnormS8Uint || format == Format.S8UintD24Unorm || format == Format.X8UintD24Unorm;

View file

@ -32,6 +32,7 @@ namespace Ryujinx.Graphics.Metal
private readonly List<IProgram> _programsColorClearU = new();
private readonly IProgram _programDepthStencilClear;
private readonly IProgram _programStrideChange;
private readonly IProgram _programConvertD32S8ToD24S8;
private readonly IProgram _programDepthBlit;
private readonly IProgram _programDepthBlitMs;
private readonly IProgram _programStencilBlit;
@ -151,6 +152,17 @@ namespace Ryujinx.Graphics.Metal
new ShaderSource(strideChangeSource, ShaderStage.Compute, TargetLanguage.Msl)
], strideChangeResourceLayout, device, new ComputeSize(64, 1, 1));
var convertD32S8ToD24S8ResourceLayout = new ResourceLayoutBuilder()
.Add(ResourceStages.Compute, ResourceType.UniformBuffer, 0)
.Add(ResourceStages.Compute, ResourceType.StorageBuffer, 1)
.Add(ResourceStages.Compute, ResourceType.StorageBuffer, 2, true).Build();
var convertD32S8ToD24S8Source = ReadMsl("ConvertD32S8ToD24S8.metal");
_programConvertD32S8ToD24S8 = new Program(
new ShaderSource(convertD32S8ToD24S8Source, ShaderStage.Compute, TargetLanguage.Msl)
], convertD32S8ToD24S8ResourceLayout, device, new ComputeSize(64, 1, 1));
var depthBlitSource = ReadMsl("DepthBlit.metal");
_programDepthBlit = new Program(
@ -591,6 +603,39 @@ namespace Ryujinx.Graphics.Metal
public unsafe void ConvertD32S8ToD24S8(CommandBufferScoped cbs, BufferHolder src, Auto<DisposableBuffer> dstBuffer, int pixelCount, int dstOffset)
int inSize = pixelCount * 2 * sizeof(int);
var srcBuffer = src.GetBuffer();
const int ParamsBufferSize = sizeof(int) * 2;
// Save current state
Span<int> shaderParams = stackalloc int[2];
shaderParams[0] = pixelCount;
shaderParams[1] = dstOffset;
using var buffer = _renderer.BufferManager.ReserveOrCreate(cbs, ParamsBufferSize);
buffer.Holder.SetDataUnchecked<int>(buffer.Offset, shaderParams);
_pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]);
Span<Auto<DisposableBuffer>> sbRanges = new Auto<DisposableBuffer>[2];
sbRanges[0] = srcBuffer;
sbRanges[1] = dstBuffer;
_pipeline.SetStorageBuffers(1, sbRanges);
_pipeline.DispatchCompute(1 + inSize / ConvertElementsPerWorkgroup, 1, 1, "D32S8 to D24S8 Conversion");
// Restore previous state
public unsafe void ClearColor(
int index,
ReadOnlySpan<float> clearColor,

View file

@ -18,6 +18,7 @@
<EmbeddedResource Include="Shaders\Blit.metal" />
<EmbeddedResource Include="Shaders\BlitMs.metal" />
<EmbeddedResource Include="Shaders\ChangeBufferStride.metal" />
<EmbeddedResource Include="Shaders\ConvertD32S8ToD24S8.metal" />
<EmbeddedResource Include="Shaders\ColorClear.metal" />
<EmbeddedResource Include="Shaders\DepthStencilClear.metal" />
<EmbeddedResource Include="Shaders\DepthBlit.metal" />

View file

@ -0,0 +1,66 @@
#include <metal_stdlib>
using namespace metal;
struct StrideArguments {
int pixelCount;
int dstStartOffset;
struct InData {
uint data[1];
struct OutData {
uint data[1];
struct ConstantBuffers {
constant StrideArguments* stride_arguments;
struct StorageBuffers {
device InData* in_data;
device OutData* out_data;
kernel void kernelMain(constant ConstantBuffers &constant_buffers [[buffer(CONSTANT_BUFFERS_INDEX)]],
device StorageBuffers &storage_buffers [[buffer(STORAGE_BUFFERS_INDEX)]],
uint3 thread_position_in_grid [[thread_position_in_grid]],
uint3 threads_per_threadgroup [[threads_per_threadgroup]],
uint3 threadgroups_per_grid [[threads_per_grid]])
// Determine what slice of the stride copies this invocation will perform.
int invocations = int(threads_per_threadgroup.x * threadgroups_per_grid.x);
int copiesRequired = constant_buffers.stride_arguments->pixelCount;
// Find the copies that this invocation should perform.
// - Copies that all invocations perform.
int allInvocationCopies = copiesRequired / invocations;
// - Extra remainder copy that this invocation performs.
int index = int(thread_position_in_grid.x);
int extra = (index < (copiesRequired % invocations)) ? 1 : 0;
int copyCount = allInvocationCopies + extra;
// Finally, get the starting offset. Make sure to count extra copies.
int startCopy = allInvocationCopies * index + min(copiesRequired % invocations, index);
int srcOffset = startCopy * 2;
int dstOffset = constant_buffers.stride_arguments->dstStartOffset + startCopy;
// Perform the conversion for this region.
for (int i = 0; i < copyCount; i++)
float depth = as_type<float>(storage_buffers.in_data->data[srcOffset++]);
uint stencil = storage_buffers.in_data->data[srcOffset++];
uint rescaledDepth = uint(clamp(depth, 0.0, 1.0) * 16777215.0);
storage_buffers.out_data->data[dstOffset++] = (rescaledDepth << 8) | (stencil & 0xff);

View file

@ -277,9 +277,18 @@ namespace Ryujinx.Graphics.Metal
var autoBuffer = Renderer.BufferManager.GetBuffer(range.Handle, true);
var mtlBuffer = autoBuffer.Get(cbs, range.Offset, outSize).Value;
// TODO: D32S8 conversion via temp copy holder
if (PrepareOutputBuffer(cbs, hostSize, mtlBuffer, out MTLBuffer copyToBuffer, out BufferHolder tempCopyHolder))
offset = 0;
CopyFromOrToBuffer(cbs, mtlBuffer, MtlTexture, hostSize, true, layer, level, 1, 1, singleSlice: true, offset: offset, stride: stride);
CopyFromOrToBuffer(cbs, copyToBuffer, MtlTexture, hostSize, true, layer, level, 1, 1, singleSlice: true, offset, stride);
if (tempCopyHolder != null)
CopyDataToOutputBuffer(cbs, tempCopyHolder, autoBuffer, hostSize, range.Offset);
public ITexture CreateView(TextureCreateInfo info, int firstLayer, int firstLevel)
@ -287,27 +296,62 @@ namespace Ryujinx.Graphics.Metal
return new Texture(Device, Renderer, Pipeline, info, _identitySwizzleHandle, firstLayer, firstLevel);
private int GetBufferDataLength(int size)
// TODO: D32S8 conversion
return size;
private void CopyDataToBuffer(Span<byte> storage, ReadOnlySpan<byte> input)
// TODO: D32S8 conversion
if (NeedsD24S8Conversion())
FormatConverter.ConvertD24S8ToD32FS8(storage, input);
private ReadOnlySpan<byte> GetDataFromBuffer(ReadOnlySpan<byte> storage, int size, Span<byte> output)
// TODO: D32S8 conversion
if (NeedsD24S8Conversion())
if (output.IsEmpty)
output = new byte[GetBufferDataLength(size)];
FormatConverter.ConvertD32FS8ToD24S8(output, storage);
return output;
return storage;
private bool PrepareOutputBuffer(CommandBufferScoped cbs, int hostSize, MTLBuffer target, out MTLBuffer copyTarget, out BufferHolder copyTargetHolder)
if (NeedsD24S8Conversion())
copyTargetHolder = Renderer.BufferManager.Create(hostSize);
copyTarget = copyTargetHolder.GetBuffer().Get(cbs, 0, hostSize).Value;
return true;
copyTarget = target;
copyTargetHolder = null;
return false;
private void CopyDataToOutputBuffer(CommandBufferScoped cbs, BufferHolder hostData, Auto<DisposableBuffer> copyTarget, int hostSize, int dstOffset)
if (NeedsD24S8Conversion())
Renderer.HelperShader.ConvertD32S8ToD24S8(cbs, hostData, copyTarget, hostSize / (2 * sizeof(int)), dstOffset);
private bool NeedsD24S8Conversion()
return FormatTable.IsD24S8(Info.Format) && MtlFormat == MTLPixelFormat.Depth32FloatStencil8;
public void CopyFromOrToBuffer(
CommandBufferScoped cbs,
MTLBuffer buffer,
@ -564,6 +608,16 @@ namespace Ryujinx.Graphics.Metal
private int GetBufferDataLength(int length)
if (NeedsD24S8Conversion())
return length * 2;
return length;
public void SetStorage(BufferRange buffer)
throw new NotImplementedException();