D32FS8 to D24S8 Conversion
This commit is contained in:
parent
f06c869df1
commit
d7c71c8a7b
6 changed files with 233 additions and 14 deletions
49
src/Ryujinx.Graphics.Metal/FormatConverter.cs
Normal file
49
src/Ryujinx.Graphics.Metal/FormatConverter.cs
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
using System;
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
|
namespace Ryujinx.Graphics.Metal
|
||||||
|
{
|
||||||
|
class FormatConverter
|
||||||
|
{
|
||||||
|
public static void ConvertD24S8ToD32FS8(Span<byte> output, ReadOnlySpan<byte> input)
|
||||||
|
{
|
||||||
|
const float UnormToFloat = 1f / 0xffffff;
|
||||||
|
|
||||||
|
Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output);
|
||||||
|
ReadOnlySpan<uint> inputUint = MemoryMarshal.Cast<byte, uint>(input);
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
for (; i < inputUint.Length; i++)
|
||||||
|
{
|
||||||
|
uint depthStencil = inputUint[i];
|
||||||
|
uint depth = depthStencil >> 8;
|
||||||
|
uint stencil = depthStencil & 0xff;
|
||||||
|
|
||||||
|
int j = i * 2;
|
||||||
|
|
||||||
|
outputUint[j] = (uint)BitConverter.SingleToInt32Bits(depth * UnormToFloat);
|
||||||
|
outputUint[j + 1] = stencil;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void ConvertD32FS8ToD24S8(Span<byte> output, ReadOnlySpan<byte> input)
|
||||||
|
{
|
||||||
|
Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output);
|
||||||
|
ReadOnlySpan<uint> inputUint = MemoryMarshal.Cast<byte, uint>(input);
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
for (; i < inputUint.Length; i += 2)
|
||||||
|
{
|
||||||
|
float depth = BitConverter.Int32BitsToSingle((int)inputUint[i]);
|
||||||
|
uint stencil = inputUint[i + 1];
|
||||||
|
uint depthStencil = (Math.Clamp((uint)(depth * 0xffffff), 0, 0xffffff) << 8) | (stencil & 0xff);
|
||||||
|
|
||||||
|
int j = i >> 1;
|
||||||
|
|
||||||
|
outputUint[j] = depthStencil;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -172,21 +172,25 @@ namespace Ryujinx.Graphics.Metal
|
||||||
{
|
{
|
||||||
var mtlFormat = _table[(int)format];
|
var mtlFormat = _table[(int)format];
|
||||||
|
|
||||||
if (mtlFormat == MTLPixelFormat.Depth24UnormStencil8)
|
if (IsD24S8(format))
|
||||||
{
|
{
|
||||||
if (!MTLDevice.CreateSystemDefaultDevice().Depth24Stencil8PixelFormatSupported)
|
if (!MTLDevice.CreateSystemDefaultDevice().Depth24Stencil8PixelFormatSupported)
|
||||||
{
|
{
|
||||||
Logger.Error?.PrintMsg(LogClass.Gpu, "Application requested Depth24Stencil8, which is unsupported on this device!");
|
|
||||||
mtlFormat = MTLPixelFormat.Depth32FloatStencil8;
|
mtlFormat = MTLPixelFormat.Depth32FloatStencil8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mtlFormat == MTLPixelFormat.Invalid)
|
if (mtlFormat == MTLPixelFormat.Invalid)
|
||||||
{
|
{
|
||||||
Logger.Error?.PrintMsg(LogClass.Gpu, $"Application requested {format}, no direct equivalent was found!");
|
Logger.Error?.PrintMsg(LogClass.Gpu, $"Format {format} is not supported by the host.");
|
||||||
}
|
}
|
||||||
|
|
||||||
return mtlFormat;
|
return mtlFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static bool IsD24S8(Format format)
|
||||||
|
{
|
||||||
|
return format == Format.D24UnormS8Uint || format == Format.S8UintD24Unorm || format == Format.X8UintD24Unorm;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,7 @@ namespace Ryujinx.Graphics.Metal
|
||||||
private readonly List<IProgram> _programsColorClearU = new();
|
private readonly List<IProgram> _programsColorClearU = new();
|
||||||
private readonly IProgram _programDepthStencilClear;
|
private readonly IProgram _programDepthStencilClear;
|
||||||
private readonly IProgram _programStrideChange;
|
private readonly IProgram _programStrideChange;
|
||||||
|
private readonly IProgram _programConvertD32S8ToD24S8;
|
||||||
private readonly IProgram _programDepthBlit;
|
private readonly IProgram _programDepthBlit;
|
||||||
private readonly IProgram _programDepthBlitMs;
|
private readonly IProgram _programDepthBlitMs;
|
||||||
private readonly IProgram _programStencilBlit;
|
private readonly IProgram _programStencilBlit;
|
||||||
|
@ -151,6 +152,17 @@ namespace Ryujinx.Graphics.Metal
|
||||||
new ShaderSource(strideChangeSource, ShaderStage.Compute, TargetLanguage.Msl)
|
new ShaderSource(strideChangeSource, ShaderStage.Compute, TargetLanguage.Msl)
|
||||||
], strideChangeResourceLayout, device, new ComputeSize(64, 1, 1));
|
], strideChangeResourceLayout, device, new ComputeSize(64, 1, 1));
|
||||||
|
|
||||||
|
var convertD32S8ToD24S8ResourceLayout = new ResourceLayoutBuilder()
|
||||||
|
.Add(ResourceStages.Compute, ResourceType.UniformBuffer, 0)
|
||||||
|
.Add(ResourceStages.Compute, ResourceType.StorageBuffer, 1)
|
||||||
|
.Add(ResourceStages.Compute, ResourceType.StorageBuffer, 2, true).Build();
|
||||||
|
|
||||||
|
var convertD32S8ToD24S8Source = ReadMsl("ConvertD32S8ToD24S8.metal");
|
||||||
|
_programConvertD32S8ToD24S8 = new Program(
|
||||||
|
[
|
||||||
|
new ShaderSource(convertD32S8ToD24S8Source, ShaderStage.Compute, TargetLanguage.Msl)
|
||||||
|
], convertD32S8ToD24S8ResourceLayout, device, new ComputeSize(64, 1, 1));
|
||||||
|
|
||||||
var depthBlitSource = ReadMsl("DepthBlit.metal");
|
var depthBlitSource = ReadMsl("DepthBlit.metal");
|
||||||
_programDepthBlit = new Program(
|
_programDepthBlit = new Program(
|
||||||
[
|
[
|
||||||
|
@ -591,6 +603,39 @@ namespace Ryujinx.Graphics.Metal
|
||||||
_pipeline.SwapState(null);
|
_pipeline.SwapState(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public unsafe void ConvertD32S8ToD24S8(CommandBufferScoped cbs, BufferHolder src, Auto<DisposableBuffer> dstBuffer, int pixelCount, int dstOffset)
|
||||||
|
{
|
||||||
|
int inSize = pixelCount * 2 * sizeof(int);
|
||||||
|
|
||||||
|
var srcBuffer = src.GetBuffer();
|
||||||
|
|
||||||
|
const int ParamsBufferSize = sizeof(int) * 2;
|
||||||
|
|
||||||
|
// Save current state
|
||||||
|
_pipeline.SwapState(_helperShaderState);
|
||||||
|
|
||||||
|
Span<int> shaderParams = stackalloc int[2];
|
||||||
|
|
||||||
|
shaderParams[0] = pixelCount;
|
||||||
|
shaderParams[1] = dstOffset;
|
||||||
|
|
||||||
|
using var buffer = _renderer.BufferManager.ReserveOrCreate(cbs, ParamsBufferSize);
|
||||||
|
buffer.Holder.SetDataUnchecked<int>(buffer.Offset, shaderParams);
|
||||||
|
_pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]);
|
||||||
|
|
||||||
|
Span<Auto<DisposableBuffer>> sbRanges = new Auto<DisposableBuffer>[2];
|
||||||
|
|
||||||
|
sbRanges[0] = srcBuffer;
|
||||||
|
sbRanges[1] = dstBuffer;
|
||||||
|
_pipeline.SetStorageBuffers(1, sbRanges);
|
||||||
|
|
||||||
|
_pipeline.SetProgram(_programConvertD32S8ToD24S8);
|
||||||
|
_pipeline.DispatchCompute(1 + inSize / ConvertElementsPerWorkgroup, 1, 1, "D32S8 to D24S8 Conversion");
|
||||||
|
|
||||||
|
// Restore previous state
|
||||||
|
_pipeline.SwapState(null);
|
||||||
|
}
|
||||||
|
|
||||||
public unsafe void ClearColor(
|
public unsafe void ClearColor(
|
||||||
int index,
|
int index,
|
||||||
ReadOnlySpan<float> clearColor,
|
ReadOnlySpan<float> clearColor,
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
<EmbeddedResource Include="Shaders\Blit.metal" />
|
<EmbeddedResource Include="Shaders\Blit.metal" />
|
||||||
<EmbeddedResource Include="Shaders\BlitMs.metal" />
|
<EmbeddedResource Include="Shaders\BlitMs.metal" />
|
||||||
<EmbeddedResource Include="Shaders\ChangeBufferStride.metal" />
|
<EmbeddedResource Include="Shaders\ChangeBufferStride.metal" />
|
||||||
|
<EmbeddedResource Include="Shaders\ConvertD32S8ToD24S8.metal" />
|
||||||
<EmbeddedResource Include="Shaders\ColorClear.metal" />
|
<EmbeddedResource Include="Shaders\ColorClear.metal" />
|
||||||
<EmbeddedResource Include="Shaders\DepthStencilClear.metal" />
|
<EmbeddedResource Include="Shaders\DepthStencilClear.metal" />
|
||||||
<EmbeddedResource Include="Shaders\DepthBlit.metal" />
|
<EmbeddedResource Include="Shaders\DepthBlit.metal" />
|
||||||
|
|
66
src/Ryujinx.Graphics.Metal/Shaders/ConvertD32S8ToD24S8.metal
Normal file
66
src/Ryujinx.Graphics.Metal/Shaders/ConvertD32S8ToD24S8.metal
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
#include <metal_stdlib>
|
||||||
|
|
||||||
|
using namespace metal;
|
||||||
|
|
||||||
|
struct StrideArguments {
|
||||||
|
int pixelCount;
|
||||||
|
int dstStartOffset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct InData {
|
||||||
|
uint data[1];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct OutData {
|
||||||
|
uint data[1];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ConstantBuffers {
|
||||||
|
constant StrideArguments* stride_arguments;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct StorageBuffers {
|
||||||
|
device InData* in_data;
|
||||||
|
device OutData* out_data;
|
||||||
|
};
|
||||||
|
|
||||||
|
kernel void kernelMain(constant ConstantBuffers &constant_buffers [[buffer(CONSTANT_BUFFERS_INDEX)]],
|
||||||
|
device StorageBuffers &storage_buffers [[buffer(STORAGE_BUFFERS_INDEX)]],
|
||||||
|
uint3 thread_position_in_grid [[thread_position_in_grid]],
|
||||||
|
uint3 threads_per_threadgroup [[threads_per_threadgroup]],
|
||||||
|
uint3 threadgroups_per_grid [[threads_per_grid]])
|
||||||
|
{
|
||||||
|
// Determine what slice of the stride copies this invocation will perform.
|
||||||
|
int invocations = int(threads_per_threadgroup.x * threadgroups_per_grid.x);
|
||||||
|
|
||||||
|
int copiesRequired = constant_buffers.stride_arguments->pixelCount;
|
||||||
|
|
||||||
|
// Find the copies that this invocation should perform.
|
||||||
|
|
||||||
|
// - Copies that all invocations perform.
|
||||||
|
int allInvocationCopies = copiesRequired / invocations;
|
||||||
|
|
||||||
|
// - Extra remainder copy that this invocation performs.
|
||||||
|
int index = int(thread_position_in_grid.x);
|
||||||
|
int extra = (index < (copiesRequired % invocations)) ? 1 : 0;
|
||||||
|
|
||||||
|
int copyCount = allInvocationCopies + extra;
|
||||||
|
|
||||||
|
// Finally, get the starting offset. Make sure to count extra copies.
|
||||||
|
|
||||||
|
int startCopy = allInvocationCopies * index + min(copiesRequired % invocations, index);
|
||||||
|
|
||||||
|
int srcOffset = startCopy * 2;
|
||||||
|
int dstOffset = constant_buffers.stride_arguments->dstStartOffset + startCopy;
|
||||||
|
|
||||||
|
// Perform the conversion for this region.
|
||||||
|
for (int i = 0; i < copyCount; i++)
|
||||||
|
{
|
||||||
|
float depth = as_type<float>(storage_buffers.in_data->data[srcOffset++]);
|
||||||
|
uint stencil = storage_buffers.in_data->data[srcOffset++];
|
||||||
|
|
||||||
|
uint rescaledDepth = uint(clamp(depth, 0.0, 1.0) * 16777215.0);
|
||||||
|
|
||||||
|
storage_buffers.out_data->data[dstOffset++] = (rescaledDepth << 8) | (stencil & 0xff);
|
||||||
|
}
|
||||||
|
}
|
|
@ -277,9 +277,18 @@ namespace Ryujinx.Graphics.Metal
|
||||||
var autoBuffer = Renderer.BufferManager.GetBuffer(range.Handle, true);
|
var autoBuffer = Renderer.BufferManager.GetBuffer(range.Handle, true);
|
||||||
var mtlBuffer = autoBuffer.Get(cbs, range.Offset, outSize).Value;
|
var mtlBuffer = autoBuffer.Get(cbs, range.Offset, outSize).Value;
|
||||||
|
|
||||||
// TODO: D32S8 conversion via temp copy holder
|
if (PrepareOutputBuffer(cbs, hostSize, mtlBuffer, out MTLBuffer copyToBuffer, out BufferHolder tempCopyHolder))
|
||||||
|
{
|
||||||
|
offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
CopyFromOrToBuffer(cbs, mtlBuffer, MtlTexture, hostSize, true, layer, level, 1, 1, singleSlice: true, offset: offset, stride: stride);
|
CopyFromOrToBuffer(cbs, copyToBuffer, MtlTexture, hostSize, true, layer, level, 1, 1, singleSlice: true, offset, stride);
|
||||||
|
|
||||||
|
if (tempCopyHolder != null)
|
||||||
|
{
|
||||||
|
CopyDataToOutputBuffer(cbs, tempCopyHolder, autoBuffer, hostSize, range.Offset);
|
||||||
|
tempCopyHolder.Dispose();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public ITexture CreateView(TextureCreateInfo info, int firstLayer, int firstLevel)
|
public ITexture CreateView(TextureCreateInfo info, int firstLayer, int firstLevel)
|
||||||
|
@ -287,27 +296,62 @@ namespace Ryujinx.Graphics.Metal
|
||||||
return new Texture(Device, Renderer, Pipeline, info, _identitySwizzleHandle, firstLayer, firstLevel);
|
return new Texture(Device, Renderer, Pipeline, info, _identitySwizzleHandle, firstLayer, firstLevel);
|
||||||
}
|
}
|
||||||
|
|
||||||
private int GetBufferDataLength(int size)
|
|
||||||
{
|
|
||||||
// TODO: D32S8 conversion
|
|
||||||
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void CopyDataToBuffer(Span<byte> storage, ReadOnlySpan<byte> input)
|
private void CopyDataToBuffer(Span<byte> storage, ReadOnlySpan<byte> input)
|
||||||
{
|
{
|
||||||
// TODO: D32S8 conversion
|
if (NeedsD24S8Conversion())
|
||||||
|
{
|
||||||
|
FormatConverter.ConvertD24S8ToD32FS8(storage, input);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
input.CopyTo(storage);
|
input.CopyTo(storage);
|
||||||
}
|
}
|
||||||
|
|
||||||
private ReadOnlySpan<byte> GetDataFromBuffer(ReadOnlySpan<byte> storage, int size, Span<byte> output)
|
private ReadOnlySpan<byte> GetDataFromBuffer(ReadOnlySpan<byte> storage, int size, Span<byte> output)
|
||||||
{
|
{
|
||||||
// TODO: D32S8 conversion
|
if (NeedsD24S8Conversion())
|
||||||
|
{
|
||||||
|
if (output.IsEmpty)
|
||||||
|
{
|
||||||
|
output = new byte[GetBufferDataLength(size)];
|
||||||
|
}
|
||||||
|
|
||||||
|
FormatConverter.ConvertD32FS8ToD24S8(output, storage);
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
return storage;
|
return storage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private bool PrepareOutputBuffer(CommandBufferScoped cbs, int hostSize, MTLBuffer target, out MTLBuffer copyTarget, out BufferHolder copyTargetHolder)
|
||||||
|
{
|
||||||
|
if (NeedsD24S8Conversion())
|
||||||
|
{
|
||||||
|
copyTargetHolder = Renderer.BufferManager.Create(hostSize);
|
||||||
|
copyTarget = copyTargetHolder.GetBuffer().Get(cbs, 0, hostSize).Value;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
copyTarget = target;
|
||||||
|
copyTargetHolder = null;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void CopyDataToOutputBuffer(CommandBufferScoped cbs, BufferHolder hostData, Auto<DisposableBuffer> copyTarget, int hostSize, int dstOffset)
|
||||||
|
{
|
||||||
|
if (NeedsD24S8Conversion())
|
||||||
|
{
|
||||||
|
Renderer.HelperShader.ConvertD32S8ToD24S8(cbs, hostData, copyTarget, hostSize / (2 * sizeof(int)), dstOffset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private bool NeedsD24S8Conversion()
|
||||||
|
{
|
||||||
|
return FormatTable.IsD24S8(Info.Format) && MtlFormat == MTLPixelFormat.Depth32FloatStencil8;
|
||||||
|
}
|
||||||
|
|
||||||
public void CopyFromOrToBuffer(
|
public void CopyFromOrToBuffer(
|
||||||
CommandBufferScoped cbs,
|
CommandBufferScoped cbs,
|
||||||
MTLBuffer buffer,
|
MTLBuffer buffer,
|
||||||
|
@ -564,6 +608,16 @@ namespace Ryujinx.Graphics.Metal
|
||||||
buffer.Dispose();
|
buffer.Dispose();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private int GetBufferDataLength(int length)
|
||||||
|
{
|
||||||
|
if (NeedsD24S8Conversion())
|
||||||
|
{
|
||||||
|
return length * 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
public void SetStorage(BufferRange buffer)
|
public void SetStorage(BufferRange buffer)
|
||||||
{
|
{
|
||||||
throw new NotImplementedException();
|
throw new NotImplementedException();
|
||||||
|
|
Loading…
Reference in a new issue