Implement Load/Store Local/Shared and Atomic shared using new instructions (#5241)

* Implement Load/Store Local/Shared and Atomic shared using new instructions

* Remove now unused code

* Fix base offset register overwrite

* Fix missing storage buffer set index when generating GLSL for Vulkan

* Shader cache version bump

* Remove more unused code

* Some PR feedback
This commit is contained in:
gdkchan 2023-06-15 17:31:53 -03:00 committed by GitHub
parent 32d21ddf17
commit f92921a6d1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
30 changed files with 475 additions and 567 deletions

View file

@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMajor = 1;
private const ushort FileFormatVersionMinor = 2; private const ushort FileFormatVersionMinor = 2;
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
private const uint CodeGenVersion = 5080; private const uint CodeGenVersion = 5241;
private const string SharedTocFileName = "shared.toc"; private const string SharedTocFileName = "shared.toc";
private const string SharedDataFileName = "shared.data"; private const string SharedDataFileName = "shared.data";

View file

@ -71,40 +71,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
context.AppendLine($"const int {DefaultNames.UndefinedName} = 0;"); context.AppendLine($"const int {DefaultNames.UndefinedName} = 0;");
context.AppendLine(); context.AppendLine();
if (context.Config.Stage == ShaderStage.Compute)
{
int localMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeLocalMemorySize(), 4);
if (localMemorySize != 0)
{
string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize);
context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];");
context.AppendLine();
}
int sharedMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeSharedMemorySize(), 4);
if (sharedMemorySize != 0)
{
string sharedMemorySizeStr = NumberFormatter.FormatInt(sharedMemorySize);
context.AppendLine($"shared uint {DefaultNames.SharedMemoryName}[{sharedMemorySizeStr}];");
context.AppendLine();
}
}
else if (context.Config.LocalMemorySize != 0)
{
int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4);
string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize);
context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];");
context.AppendLine();
}
DeclareConstantBuffers(context, context.Config.Properties.ConstantBuffers.Values); DeclareConstantBuffers(context, context.Config.Properties.ConstantBuffers.Values);
DeclareStorageBuffers(context, context.Config.Properties.StorageBuffers.Values); DeclareStorageBuffers(context, context.Config.Properties.StorageBuffers.Values);
DeclareMemories(context, context.Config.Properties.LocalMemories.Values, isShared: false);
DeclareMemories(context, context.Config.Properties.SharedMemories.Values, isShared: true);
var textureDescriptors = context.Config.GetTextureDescriptors(); var textureDescriptors = context.Config.GetTextureDescriptors();
if (textureDescriptors.Length != 0) if (textureDescriptors.Length != 0)
@ -238,11 +208,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
context.AppendLine(); context.AppendLine();
} }
if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Shared) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighS32) != 0) if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighS32) != 0)
{ {
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl"); AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl");
@ -273,11 +238,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl"); AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl");
} }
if ((info.HelperFunctionsMask & HelperFunctionsMask.StoreSharedSmallInt) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0) if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0)
{ {
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl"); AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl");
@ -358,7 +318,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
_ => "std430" _ => "std430"
}; };
context.AppendLine($"layout (binding = {buffer.Binding}, {layout}) {declType} _{buffer.Name}"); string set = string.Empty;
if (context.Config.Options.TargetApi == TargetApi.Vulkan)
{
set = $"set = {buffer.Set}, ";
}
context.AppendLine($"layout ({set}binding = {buffer.Binding}, {layout}) {declType} _{buffer.Name}");
context.EnterScope(); context.EnterScope();
foreach (StructureField field in buffer.Type.Fields) foreach (StructureField field in buffer.Type.Fields)
@ -391,6 +358,27 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
} }
} }
private static void DeclareMemories(CodeGenContext context, IEnumerable<MemoryDefinition> memories, bool isShared)
{
string prefix = isShared ? "shared " : string.Empty;
foreach (MemoryDefinition memory in memories)
{
string typeName = GetVarTypeName(context, memory.Type & ~AggregateType.Array);
if (memory.ArrayLength > 0)
{
string arraySize = memory.ArrayLength.ToString(CultureInfo.InvariantCulture);
context.AppendLine($"{prefix}{typeName} {memory.Name}[{arraySize}];");
}
else
{
context.AppendLine($"{prefix}{typeName} {memory.Name}[];");
}
}
}
private static void DeclareSamplers(CodeGenContext context, TextureDescriptor[] descriptors) private static void DeclareSamplers(CodeGenContext context, TextureDescriptor[] descriptors)
{ {
int arraySize = 0; int arraySize = 0;
@ -717,7 +705,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
string code = EmbeddedResources.ReadAllText(filename); string code = EmbeddedResources.ReadAllText(filename);
code = code.Replace("\t", CodeGenContext.Tab); code = code.Replace("\t", CodeGenContext.Tab);
code = code.Replace("$SHARED_MEM$", DefaultNames.SharedMemoryName);
if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot()) if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot())
{ {

View file

@ -11,9 +11,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
public const string IAttributePrefix = "in_attr"; public const string IAttributePrefix = "in_attr";
public const string OAttributePrefix = "out_attr"; public const string OAttributePrefix = "out_attr";
public const string LocalMemoryName = "local_mem";
public const string SharedMemoryName = "shared_mem";
public const string ArgumentNamePrefix = "a"; public const string ArgumentNamePrefix = "a";
public const string UndefinedName = "undef"; public const string UndefinedName = "undef";

View file

@ -1,21 +0,0 @@
int Helper_AtomicMaxS32(int offset, int value)
{
uint oldValue, newValue;
do
{
oldValue = $SHARED_MEM$[offset];
newValue = uint(max(int(oldValue), value));
} while (atomicCompSwap($SHARED_MEM$[offset], oldValue, newValue) != oldValue);
return int(oldValue);
}
int Helper_AtomicMinS32(int offset, int value)
{
uint oldValue, newValue;
do
{
oldValue = $SHARED_MEM$[offset];
newValue = uint(min(int(oldValue), value));
} while (atomicCompSwap($SHARED_MEM$[offset], oldValue, newValue) != oldValue);
return int(oldValue);
}

View file

@ -2,9 +2,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{ {
static class HelperFunctionNames static class HelperFunctionNames
{ {
public static string AtomicMaxS32 = "Helper_AtomicMaxS32";
public static string AtomicMinS32 = "Helper_AtomicMinS32";
public static string MultiplyHighS32 = "Helper_MultiplyHighS32"; public static string MultiplyHighS32 = "Helper_MultiplyHighS32";
public static string MultiplyHighU32 = "Helper_MultiplyHighU32"; public static string MultiplyHighU32 = "Helper_MultiplyHighU32";
@ -13,10 +10,5 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
public static string ShuffleUp = "Helper_ShuffleUp"; public static string ShuffleUp = "Helper_ShuffleUp";
public static string ShuffleXor = "Helper_ShuffleXor"; public static string ShuffleXor = "Helper_ShuffleXor";
public static string SwizzleAdd = "Helper_SwizzleAdd"; public static string SwizzleAdd = "Helper_SwizzleAdd";
public static string StoreShared16 = "Helper_StoreShared16";
public static string StoreShared8 = "Helper_StoreShared8";
public static string StoreStorage16 = "Helper_StoreStorage16";
public static string StoreStorage8 = "Helper_StoreStorage8";
} }
} }

View file

@ -1,23 +0,0 @@
void Helper_StoreShared16(int offset, uint value)
{
int wordOffset = offset >> 2;
int bitOffset = (offset & 3) * 8;
uint oldValue, newValue;
do
{
oldValue = $SHARED_MEM$[wordOffset];
newValue = bitfieldInsert(oldValue, value, bitOffset, 16);
} while (atomicCompSwap($SHARED_MEM$[wordOffset], oldValue, newValue) != oldValue);
}
void Helper_StoreShared8(int offset, uint value)
{
int wordOffset = offset >> 2;
int bitOffset = (offset & 3) * 8;
uint oldValue, newValue;
do
{
oldValue = $SHARED_MEM$[wordOffset];
newValue = bitfieldInsert(oldValue, value, bitOffset, 8);
} while (atomicCompSwap($SHARED_MEM$[wordOffset], oldValue, newValue) != oldValue);
}

View file

@ -68,7 +68,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
string args = string.Empty; string args = string.Empty;
if (atomic && operation.StorageKind == StorageKind.StorageBuffer) if (atomic && (operation.StorageKind == StorageKind.StorageBuffer || operation.StorageKind == StorageKind.SharedMemory))
{ {
args = GenerateLoadOrStore(context, operation, isStore: false); args = GenerateLoadOrStore(context, operation, isStore: false);
@ -81,23 +81,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
args += ", " + GetSoureExpr(context, operation.GetSource(argIndex), dstType); args += ", " + GetSoureExpr(context, operation.GetSource(argIndex), dstType);
} }
} }
else if (atomic && operation.StorageKind == StorageKind.SharedMemory)
{
args = LoadShared(context, operation);
// For shared memory access, the second argument is unused and should be ignored.
// It is there to make both storage and shared access have the same number of arguments.
// For storage, both inputs are consumed when the argument index is 0, so we should skip it here.
for (int argIndex = 2; argIndex < arity; argIndex++)
{
args += ", ";
AggregateType dstType = GetSrcVarType(inst, argIndex);
args += GetSoureExpr(context, operation.GetSource(argIndex), dstType);
}
}
else else
{ {
for (int argIndex = 0; argIndex < arity; argIndex++) for (int argIndex = 0; argIndex < arity; argIndex++)
@ -179,12 +162,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
case Instruction.Load: case Instruction.Load:
return Load(context, operation); return Load(context, operation);
case Instruction.LoadLocal:
return LoadLocal(context, operation);
case Instruction.LoadShared:
return LoadShared(context, operation);
case Instruction.Lod: case Instruction.Lod:
return Lod(context, operation); return Lod(context, operation);
@ -200,18 +177,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
case Instruction.Store: case Instruction.Store:
return Store(context, operation); return Store(context, operation);
case Instruction.StoreLocal:
return StoreLocal(context, operation);
case Instruction.StoreShared:
return StoreShared(context, operation);
case Instruction.StoreShared16:
return StoreShared16(context, operation);
case Instruction.StoreShared8:
return StoreShared8(context, operation);
case Instruction.TextureSample: case Instruction.TextureSample:
return TextureSample(context, operation); return TextureSample(context, operation);

View file

@ -17,9 +17,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.AtomicAdd, InstType.AtomicBinary, "atomicAdd"); Add(Instruction.AtomicAdd, InstType.AtomicBinary, "atomicAdd");
Add(Instruction.AtomicAnd, InstType.AtomicBinary, "atomicAnd"); Add(Instruction.AtomicAnd, InstType.AtomicBinary, "atomicAnd");
Add(Instruction.AtomicCompareAndSwap, InstType.AtomicTernary, "atomicCompSwap"); Add(Instruction.AtomicCompareAndSwap, InstType.AtomicTernary, "atomicCompSwap");
Add(Instruction.AtomicMaxS32, InstType.CallTernary, HelperFunctionNames.AtomicMaxS32);
Add(Instruction.AtomicMaxU32, InstType.AtomicBinary, "atomicMax"); Add(Instruction.AtomicMaxU32, InstType.AtomicBinary, "atomicMax");
Add(Instruction.AtomicMinS32, InstType.CallTernary, HelperFunctionNames.AtomicMinS32);
Add(Instruction.AtomicMinU32, InstType.AtomicBinary, "atomicMin"); Add(Instruction.AtomicMinU32, InstType.AtomicBinary, "atomicMin");
Add(Instruction.AtomicOr, InstType.AtomicBinary, "atomicOr"); Add(Instruction.AtomicOr, InstType.AtomicBinary, "atomicOr");
Add(Instruction.AtomicSwap, InstType.AtomicBinary, "atomicExchange"); Add(Instruction.AtomicSwap, InstType.AtomicBinary, "atomicExchange");
@ -83,8 +81,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.ImageAtomic, InstType.Special); Add(Instruction.ImageAtomic, InstType.Special);
Add(Instruction.IsNan, InstType.CallUnary, "isnan"); Add(Instruction.IsNan, InstType.CallUnary, "isnan");
Add(Instruction.Load, InstType.Special); Add(Instruction.Load, InstType.Special);
Add(Instruction.LoadLocal, InstType.Special);
Add(Instruction.LoadShared, InstType.Special);
Add(Instruction.Lod, InstType.Special); Add(Instruction.Lod, InstType.Special);
Add(Instruction.LogarithmB2, InstType.CallUnary, "log2"); Add(Instruction.LogarithmB2, InstType.CallUnary, "log2");
Add(Instruction.LogicalAnd, InstType.OpBinaryCom, "&&", 9); Add(Instruction.LogicalAnd, InstType.OpBinaryCom, "&&", 9);
@ -118,10 +114,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.Sine, InstType.CallUnary, "sin"); Add(Instruction.Sine, InstType.CallUnary, "sin");
Add(Instruction.SquareRoot, InstType.CallUnary, "sqrt"); Add(Instruction.SquareRoot, InstType.CallUnary, "sqrt");
Add(Instruction.Store, InstType.Special); Add(Instruction.Store, InstType.Special);
Add(Instruction.StoreLocal, InstType.Special);
Add(Instruction.StoreShared, InstType.Special);
Add(Instruction.StoreShared16, InstType.Special);
Add(Instruction.StoreShared8, InstType.Special);
Add(Instruction.Subtract, InstType.OpBinary, "-", 2); Add(Instruction.Subtract, InstType.OpBinary, "-", 2);
Add(Instruction.SwizzleAdd, InstType.CallTernary, HelperFunctionNames.SwizzleAdd); Add(Instruction.SwizzleAdd, InstType.CallTernary, HelperFunctionNames.SwizzleAdd);
Add(Instruction.TextureSample, InstType.Special); Add(Instruction.TextureSample, InstType.Special);

View file

@ -191,25 +191,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
return GenerateLoadOrStore(context, operation, isStore: false); return GenerateLoadOrStore(context, operation, isStore: false);
} }
public static string LoadLocal(CodeGenContext context, AstOperation operation)
{
return LoadLocalOrShared(context, operation, DefaultNames.LocalMemoryName);
}
public static string LoadShared(CodeGenContext context, AstOperation operation)
{
return LoadLocalOrShared(context, operation, DefaultNames.SharedMemoryName);
}
private static string LoadLocalOrShared(CodeGenContext context, AstOperation operation, string arrayName)
{
IAstNode src1 = operation.GetSource(0);
string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
return $"{arrayName}[{offsetExpr}]";
}
public static string Lod(CodeGenContext context, AstOperation operation) public static string Lod(CodeGenContext context, AstOperation operation)
{ {
AstTextureOperation texOp = (AstTextureOperation)operation; AstTextureOperation texOp = (AstTextureOperation)operation;
@ -263,58 +244,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
return GenerateLoadOrStore(context, operation, isStore: true); return GenerateLoadOrStore(context, operation, isStore: true);
} }
public static string StoreLocal(CodeGenContext context, AstOperation operation)
{
return StoreLocalOrShared(context, operation, DefaultNames.LocalMemoryName);
}
public static string StoreShared(CodeGenContext context, AstOperation operation)
{
return StoreLocalOrShared(context, operation, DefaultNames.SharedMemoryName);
}
private static string StoreLocalOrShared(CodeGenContext context, AstOperation operation, string arrayName)
{
IAstNode src1 = operation.GetSource(0);
IAstNode src2 = operation.GetSource(1);
string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
AggregateType srcType = OperandManager.GetNodeDestType(context, src2);
string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32);
return $"{arrayName}[{offsetExpr}] = {src}";
}
public static string StoreShared16(CodeGenContext context, AstOperation operation)
{
IAstNode src1 = operation.GetSource(0);
IAstNode src2 = operation.GetSource(1);
string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
AggregateType srcType = OperandManager.GetNodeDestType(context, src2);
string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32);
return $"{HelperFunctionNames.StoreShared16}({offsetExpr}, {src})";
}
public static string StoreShared8(CodeGenContext context, AstOperation operation)
{
IAstNode src1 = operation.GetSource(0);
IAstNode src2 = operation.GetSource(1);
string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
AggregateType srcType = OperandManager.GetNodeDestType(context, src2);
string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32);
return $"{HelperFunctionNames.StoreShared8}({offsetExpr}, {src})";
}
public static string TextureSample(CodeGenContext context, AstOperation operation) public static string TextureSample(CodeGenContext context, AstOperation operation)
{ {
AstTextureOperation texOp = (AstTextureOperation)operation; AstTextureOperation texOp = (AstTextureOperation)operation;
@ -675,6 +604,21 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
varType = field.Type; varType = field.Type;
break; break;
case StorageKind.LocalMemory:
case StorageKind.SharedMemory:
if (!(operation.GetSource(srcIndex++) is AstOperand bindingId) || bindingId.Type != OperandType.Constant)
{
throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand.");
}
MemoryDefinition memory = storageKind == StorageKind.LocalMemory
? context.Config.Properties.LocalMemories[bindingId.Value]
: context.Config.Properties.SharedMemories[bindingId.Value];
varName = memory.Name;
varType = memory.Type;
break;
case StorageKind.Input: case StorageKind.Input:
case StorageKind.InputPerPatch: case StorageKind.InputPerPatch:
case StorageKind.Output: case StorageKind.Output:

View file

@ -113,7 +113,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
if (node is AstOperation operation) if (node is AstOperation operation)
{ {
if (operation.Inst == Instruction.Load) if (operation.Inst == Instruction.Load || operation.Inst.IsAtomic())
{ {
switch (operation.StorageKind) switch (operation.StorageKind)
{ {
@ -136,6 +136,19 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
return field.Type & AggregateType.ElementTypeMask; return field.Type & AggregateType.ElementTypeMask;
case StorageKind.LocalMemory:
case StorageKind.SharedMemory:
if (!(operation.GetSource(0) is AstOperand bindingId) || bindingId.Type != OperandType.Constant)
{
throw new InvalidOperationException($"First input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand.");
}
MemoryDefinition memory = operation.StorageKind == StorageKind.LocalMemory
? context.Config.Properties.LocalMemories[bindingId.Value]
: context.Config.Properties.SharedMemories[bindingId.Value];
return memory.Type & AggregateType.ElementTypeMask;
case StorageKind.Input: case StorageKind.Input:
case StorageKind.InputPerPatch: case StorageKind.InputPerPatch:
case StorageKind.Output: case StorageKind.Output:

View file

@ -25,8 +25,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
public Dictionary<int, Instruction> ConstantBuffers { get; } = new Dictionary<int, Instruction>(); public Dictionary<int, Instruction> ConstantBuffers { get; } = new Dictionary<int, Instruction>();
public Dictionary<int, Instruction> StorageBuffers { get; } = new Dictionary<int, Instruction>(); public Dictionary<int, Instruction> StorageBuffers { get; } = new Dictionary<int, Instruction>();
public Instruction LocalMemory { get; set; } public Dictionary<int, Instruction> LocalMemories { get; } = new Dictionary<int, Instruction>();
public Instruction SharedMemory { get; set; } public Dictionary<int, Instruction> SharedMemories { get; } = new Dictionary<int, Instruction>();
public Dictionary<TextureMeta, SamplerType> SamplersTypes { get; } = new Dictionary<TextureMeta, SamplerType>(); public Dictionary<TextureMeta, SamplerType> SamplersTypes { get; } = new Dictionary<TextureMeta, SamplerType>();
public Dictionary<TextureMeta, (Instruction, Instruction, Instruction)> Samplers { get; } = new Dictionary<TextureMeta, (Instruction, Instruction, Instruction)>(); public Dictionary<TextureMeta, (Instruction, Instruction, Instruction)> Samplers { get; } = new Dictionary<TextureMeta, (Instruction, Instruction, Instruction)>();
public Dictionary<TextureMeta, (Instruction, Instruction)> Images { get; } = new Dictionary<TextureMeta, (Instruction, Instruction)>(); public Dictionary<TextureMeta, (Instruction, Instruction)> Images { get; } = new Dictionary<TextureMeta, (Instruction, Instruction)>();
@ -35,7 +35,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
public Dictionary<IoDefinition, Instruction> InputsPerPatch { get; } = new Dictionary<IoDefinition, Instruction>(); public Dictionary<IoDefinition, Instruction> InputsPerPatch { get; } = new Dictionary<IoDefinition, Instruction>();
public Dictionary<IoDefinition, Instruction> OutputsPerPatch { get; } = new Dictionary<IoDefinition, Instruction>(); public Dictionary<IoDefinition, Instruction> OutputsPerPatch { get; } = new Dictionary<IoDefinition, Instruction>();
public Instruction CoordTemp { get; set; }
public StructuredFunction CurrentFunction { get; set; } public StructuredFunction CurrentFunction { get; set; }
private readonly Dictionary<AstOperand, Instruction> _locals = new Dictionary<AstOperand, Instruction>(); private readonly Dictionary<AstOperand, Instruction> _locals = new Dictionary<AstOperand, Instruction>();
private readonly Dictionary<int, Instruction[]> _localForArgs = new Dictionary<int, Instruction[]>(); private readonly Dictionary<int, Instruction[]> _localForArgs = new Dictionary<int, Instruction[]>();

View file

@ -6,7 +6,6 @@ using Spv.Generator;
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Diagnostics; using System.Diagnostics;
using System.Linq;
using System.Numerics; using System.Numerics;
using static Spv.Specification; using static Spv.Specification;
using SpvInstruction = Spv.Generator.Instruction; using SpvInstruction = Spv.Generator.Instruction;
@ -44,13 +43,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
context.AddLocalVariable(spvLocal); context.AddLocalVariable(spvLocal);
context.DeclareLocal(local, spvLocal); context.DeclareLocal(local, spvLocal);
} }
var ivector2Type = context.TypeVector(context.TypeS32(), 2);
var coordTempPointerType = context.TypePointer(StorageClass.Function, ivector2Type);
var coordTemp = context.Variable(coordTempPointerType, StorageClass.Function);
context.AddLocalVariable(coordTemp);
context.CoordTemp = coordTemp;
} }
public static void DeclareLocalForArgs(CodeGenContext context, List<StructuredFunction> functions) public static void DeclareLocalForArgs(CodeGenContext context, List<StructuredFunction> functions)
@ -77,54 +69,30 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
public static void DeclareAll(CodeGenContext context, StructuredProgramInfo info) public static void DeclareAll(CodeGenContext context, StructuredProgramInfo info)
{ {
if (context.Config.Stage == ShaderStage.Compute)
{
int localMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeLocalMemorySize(), 4);
if (localMemorySize != 0)
{
DeclareLocalMemory(context, localMemorySize);
}
int sharedMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeSharedMemorySize(), 4);
if (sharedMemorySize != 0)
{
DeclareSharedMemory(context, sharedMemorySize);
}
}
else if (context.Config.LocalMemorySize != 0)
{
int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4);
DeclareLocalMemory(context, localMemorySize);
}
DeclareConstantBuffers(context, context.Config.Properties.ConstantBuffers.Values); DeclareConstantBuffers(context, context.Config.Properties.ConstantBuffers.Values);
DeclareStorageBuffers(context, context.Config.Properties.StorageBuffers.Values); DeclareStorageBuffers(context, context.Config.Properties.StorageBuffers.Values);
DeclareMemories(context, context.Config.Properties.LocalMemories, context.LocalMemories, StorageClass.Private);
DeclareMemories(context, context.Config.Properties.SharedMemories, context.SharedMemories, StorageClass.Workgroup);
DeclareSamplers(context, context.Config.GetTextureDescriptors()); DeclareSamplers(context, context.Config.GetTextureDescriptors());
DeclareImages(context, context.Config.GetImageDescriptors()); DeclareImages(context, context.Config.GetImageDescriptors());
DeclareInputsAndOutputs(context, info); DeclareInputsAndOutputs(context, info);
} }
private static void DeclareLocalMemory(CodeGenContext context, int size) private static void DeclareMemories(
CodeGenContext context,
IReadOnlyDictionary<int, MemoryDefinition> memories,
Dictionary<int, SpvInstruction> dict,
StorageClass storage)
{ {
context.LocalMemory = DeclareMemory(context, StorageClass.Private, size); foreach ((int id, MemoryDefinition memory) in memories)
}
private static void DeclareSharedMemory(CodeGenContext context, int size)
{ {
context.SharedMemory = DeclareMemory(context, StorageClass.Workgroup, size); var pointerType = context.TypePointer(storage, context.GetType(memory.Type, memory.ArrayLength));
}
private static SpvInstruction DeclareMemory(CodeGenContext context, StorageClass storage, int size)
{
var arrayType = context.TypeArray(context.TypeU32(), context.Constant(context.TypeU32(), size));
var pointerType = context.TypePointer(storage, arrayType);
var variable = context.Variable(pointerType, storage); var variable = context.Variable(pointerType, storage);
context.AddGlobalVariable(variable); context.AddGlobalVariable(variable);
return variable; dict.Add(id, variable);
}
} }
private static void DeclareConstantBuffers(CodeGenContext context, IEnumerable<BufferDefinition> buffers) private static void DeclareConstantBuffers(CodeGenContext context, IEnumerable<BufferDefinition> buffers)

View file

@ -97,8 +97,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
Add(Instruction.ImageStore, GenerateImageStore); Add(Instruction.ImageStore, GenerateImageStore);
Add(Instruction.IsNan, GenerateIsNan); Add(Instruction.IsNan, GenerateIsNan);
Add(Instruction.Load, GenerateLoad); Add(Instruction.Load, GenerateLoad);
Add(Instruction.LoadLocal, GenerateLoadLocal);
Add(Instruction.LoadShared, GenerateLoadShared);
Add(Instruction.Lod, GenerateLod); Add(Instruction.Lod, GenerateLod);
Add(Instruction.LogarithmB2, GenerateLogarithmB2); Add(Instruction.LogarithmB2, GenerateLogarithmB2);
Add(Instruction.LogicalAnd, GenerateLogicalAnd); Add(Instruction.LogicalAnd, GenerateLogicalAnd);
@ -132,10 +130,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
Add(Instruction.Sine, GenerateSine); Add(Instruction.Sine, GenerateSine);
Add(Instruction.SquareRoot, GenerateSquareRoot); Add(Instruction.SquareRoot, GenerateSquareRoot);
Add(Instruction.Store, GenerateStore); Add(Instruction.Store, GenerateStore);
Add(Instruction.StoreLocal, GenerateStoreLocal);
Add(Instruction.StoreShared, GenerateStoreShared);
Add(Instruction.StoreShared16, GenerateStoreShared16);
Add(Instruction.StoreShared8, GenerateStoreShared8);
Add(Instruction.Subtract, GenerateSubtract); Add(Instruction.Subtract, GenerateSubtract);
Add(Instruction.SwizzleAdd, GenerateSwizzleAdd); Add(Instruction.SwizzleAdd, GenerateSwizzleAdd);
Add(Instruction.TextureSample, GenerateTextureSample); Add(Instruction.TextureSample, GenerateTextureSample);
@ -871,30 +865,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
return GenerateLoadOrStore(context, operation, isStore: false); return GenerateLoadOrStore(context, operation, isStore: false);
} }
private static OperationResult GenerateLoadLocal(CodeGenContext context, AstOperation operation)
{
return GenerateLoadLocalOrShared(context, operation, StorageClass.Private, context.LocalMemory);
}
private static OperationResult GenerateLoadShared(CodeGenContext context, AstOperation operation)
{
return GenerateLoadLocalOrShared(context, operation, StorageClass.Workgroup, context.SharedMemory);
}
private static OperationResult GenerateLoadLocalOrShared(
CodeGenContext context,
AstOperation operation,
StorageClass storageClass,
SpvInstruction memory)
{
var offset = context.Get(AggregateType.S32, operation.GetSource(0));
var elemPointer = context.AccessChain(context.TypePointer(storageClass, context.TypeU32()), memory, offset);
var value = context.Load(context.TypeU32(), elemPointer);
return new OperationResult(AggregateType.U32, value);
}
private static OperationResult GenerateLod(CodeGenContext context, AstOperation operation) private static OperationResult GenerateLod(CodeGenContext context, AstOperation operation)
{ {
AstTextureOperation texOp = (AstTextureOperation)operation; AstTextureOperation texOp = (AstTextureOperation)operation;
@ -1268,45 +1238,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
return GenerateLoadOrStore(context, operation, isStore: true); return GenerateLoadOrStore(context, operation, isStore: true);
} }
private static OperationResult GenerateStoreLocal(CodeGenContext context, AstOperation operation)
{
return GenerateStoreLocalOrShared(context, operation, StorageClass.Private, context.LocalMemory);
}
private static OperationResult GenerateStoreShared(CodeGenContext context, AstOperation operation)
{
return GenerateStoreLocalOrShared(context, operation, StorageClass.Workgroup, context.SharedMemory);
}
private static OperationResult GenerateStoreLocalOrShared(
CodeGenContext context,
AstOperation operation,
StorageClass storageClass,
SpvInstruction memory)
{
var offset = context.Get(AggregateType.S32, operation.GetSource(0));
var value = context.Get(AggregateType.U32, operation.GetSource(1));
var elemPointer = context.AccessChain(context.TypePointer(storageClass, context.TypeU32()), memory, offset);
context.Store(elemPointer, value);
return OperationResult.Invalid;
}
private static OperationResult GenerateStoreShared16(CodeGenContext context, AstOperation operation)
{
GenerateStoreSharedSmallInt(context, operation, 16);
return OperationResult.Invalid;
}
private static OperationResult GenerateStoreShared8(CodeGenContext context, AstOperation operation)
{
GenerateStoreSharedSmallInt(context, operation, 8);
return OperationResult.Invalid;
}
private static OperationResult GenerateSubtract(CodeGenContext context, AstOperation operation) private static OperationResult GenerateSubtract(CodeGenContext context, AstOperation operation)
{ {
return GenerateBinary(context, operation, context.Delegates.FSub, context.Delegates.ISub); return GenerateBinary(context, operation, context.Delegates.FSub, context.Delegates.ISub);
@ -1827,55 +1758,27 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
AstOperation operation, AstOperation operation,
Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitU) Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitU)
{ {
var value = context.GetU32(operation.GetSource(operation.SourcesCount - 1)); SpvInstruction elemPointer = GetStoragePointer(context, operation, out AggregateType varType);
SpvInstruction elemPointer; var value = context.Get(varType, operation.GetSource(operation.SourcesCount - 1));
if (operation.StorageKind == StorageKind.StorageBuffer)
{
elemPointer = GetStoragePointer(context, operation, out _);
}
else if (operation.StorageKind == StorageKind.SharedMemory)
{
var offset = context.GetU32(operation.GetSource(0));
elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), context.SharedMemory, offset);
}
else
{
throw new InvalidOperationException($"Invalid storage kind \"{operation.StorageKind}\".");
}
var one = context.Constant(context.TypeU32(), 1); var one = context.Constant(context.TypeU32(), 1);
var zero = context.Constant(context.TypeU32(), 0); var zero = context.Constant(context.TypeU32(), 0);
return new OperationResult(AggregateType.U32, emitU(context.TypeU32(), elemPointer, one, zero, value)); return new OperationResult(varType, emitU(context.GetType(varType), elemPointer, one, zero, value));
} }
private static OperationResult GenerateAtomicMemoryCas(CodeGenContext context, AstOperation operation) private static OperationResult GenerateAtomicMemoryCas(CodeGenContext context, AstOperation operation)
{ {
var value0 = context.GetU32(operation.GetSource(operation.SourcesCount - 2)); SpvInstruction elemPointer = GetStoragePointer(context, operation, out AggregateType varType);
var value1 = context.GetU32(operation.GetSource(operation.SourcesCount - 1));
SpvInstruction elemPointer; var value0 = context.Get(varType, operation.GetSource(operation.SourcesCount - 2));
var value1 = context.Get(varType, operation.GetSource(operation.SourcesCount - 1));
if (operation.StorageKind == StorageKind.StorageBuffer)
{
elemPointer = GetStoragePointer(context, operation, out _);
}
else if (operation.StorageKind == StorageKind.SharedMemory)
{
var offset = context.GetU32(operation.GetSource(0));
elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), context.SharedMemory, offset);
}
else
{
throw new InvalidOperationException($"Invalid storage kind \"{operation.StorageKind}\".");
}
var one = context.Constant(context.TypeU32(), 1); var one = context.Constant(context.TypeU32(), 1);
var zero = context.Constant(context.TypeU32(), 0); var zero = context.Constant(context.TypeU32(), 0);
return new OperationResult(AggregateType.U32, context.AtomicCompareExchange(context.TypeU32(), elemPointer, one, zero, zero, value1, value0)); return new OperationResult(varType, context.AtomicCompareExchange(context.GetType(varType), elemPointer, one, zero, zero, value1, value0));
} }
private static OperationResult GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore) private static OperationResult GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore)
@ -1928,6 +1831,27 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
: context.StorageBuffers[bindingIndex.Value]; : context.StorageBuffers[bindingIndex.Value];
break; break;
case StorageKind.LocalMemory:
case StorageKind.SharedMemory:
if (!(operation.GetSource(srcIndex++) is AstOperand bindingId) || bindingId.Type != OperandType.Constant)
{
throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand.");
}
if (storageKind == StorageKind.LocalMemory)
{
storageClass = StorageClass.Private;
varType = context.Config.Properties.LocalMemories[bindingId.Value].Type & AggregateType.ElementTypeMask;
baseObj = context.LocalMemories[bindingId.Value];
}
else
{
storageClass = StorageClass.Workgroup;
varType = context.Config.Properties.SharedMemories[bindingId.Value].Type & AggregateType.ElementTypeMask;
baseObj = context.SharedMemories[bindingId.Value];
}
break;
case StorageKind.Input: case StorageKind.Input:
case StorageKind.InputPerPatch: case StorageKind.InputPerPatch:
case StorageKind.Output: case StorageKind.Output:
@ -2048,50 +1972,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
return context.Load(context.GetType(varType), context.Inputs[ioDefinition]); return context.Load(context.GetType(varType), context.Inputs[ioDefinition]);
} }
private static void GenerateStoreSharedSmallInt(CodeGenContext context, AstOperation operation, int bitSize)
{
var offset = context.Get(AggregateType.U32, operation.GetSource(0));
var value = context.Get(AggregateType.U32, operation.GetSource(1));
var wordOffset = context.ShiftRightLogical(context.TypeU32(), offset, context.Constant(context.TypeU32(), 2));
var bitOffset = context.BitwiseAnd(context.TypeU32(), offset, context.Constant(context.TypeU32(), 3));
bitOffset = context.ShiftLeftLogical(context.TypeU32(), bitOffset, context.Constant(context.TypeU32(), 3));
var memory = context.SharedMemory;
var elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), memory, wordOffset);
GenerateStoreSmallInt(context, elemPointer, bitOffset, value, bitSize);
}
private static void GenerateStoreSmallInt(
CodeGenContext context,
SpvInstruction elemPointer,
SpvInstruction bitOffset,
SpvInstruction value,
int bitSize)
{
var loopStart = context.Label();
var loopEnd = context.Label();
context.Branch(loopStart);
context.AddLabel(loopStart);
var oldValue = context.Load(context.TypeU32(), elemPointer);
var newValue = context.BitFieldInsert(context.TypeU32(), oldValue, value, bitOffset, context.Constant(context.TypeU32(), bitSize));
var one = context.Constant(context.TypeU32(), 1);
var zero = context.Constant(context.TypeU32(), 0);
var result = context.AtomicCompareExchange(context.TypeU32(), elemPointer, one, zero, zero, newValue, oldValue);
var failed = context.INotEqual(context.TypeBool(), result, oldValue);
context.LoopMerge(loopEnd, loopStart, LoopControlMask.MaskNone);
context.BranchConditional(failed, loopStart, loopEnd);
context.AddLabel(loopEnd);
}
private static OperationResult GetZeroOperationResult( private static OperationResult GetZeroOperationResult(
CodeGenContext context, CodeGenContext context,
AstTextureOperation texOp, AstTextureOperation texOp,

View file

@ -10,12 +10,6 @@ namespace Ryujinx.Graphics.Shader.Instructions
{ {
static partial class InstEmit static partial class InstEmit
{ {
private enum MemoryRegion
{
Local,
Shared
}
public static void Atom(EmitterContext context) public static void Atom(EmitterContext context)
{ {
InstAtom op = context.GetOp<InstAtom>(); InstAtom op = context.GetOp<InstAtom>();
@ -51,7 +45,8 @@ namespace Ryujinx.Graphics.Shader.Instructions
_ => AtomSize.U32 _ => AtomSize.U32
}; };
Operand res = EmitAtomicOp(context, StorageKind.SharedMemory, op.AtomOp, size, offset, Const(0), value); Operand id = Const(context.Config.ResourceManager.SharedMemoryId);
Operand res = EmitAtomicOp(context, StorageKind.SharedMemory, op.AtomOp, size, id, offset, value);
context.Copy(GetDest(op.Dest), res); context.Copy(GetDest(op.Dest), res);
} }
@ -114,14 +109,14 @@ namespace Ryujinx.Graphics.Shader.Instructions
{ {
InstLdl op = context.GetOp<InstLdl>(); InstLdl op = context.GetOp<InstLdl>();
EmitLoad(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); EmitLoad(context, StorageKind.LocalMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
} }
public static void Lds(EmitterContext context) public static void Lds(EmitterContext context)
{ {
InstLds op = context.GetOp<InstLds>(); InstLds op = context.GetOp<InstLds>();
EmitLoad(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); EmitLoad(context, StorageKind.SharedMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
} }
public static void Red(EmitterContext context) public static void Red(EmitterContext context)
@ -144,14 +139,14 @@ namespace Ryujinx.Graphics.Shader.Instructions
{ {
InstStl op = context.GetOp<InstStl>(); InstStl op = context.GetOp<InstStl>();
EmitStore(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); EmitStore(context, StorageKind.LocalMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
} }
public static void Sts(EmitterContext context) public static void Sts(EmitterContext context)
{ {
InstSts op = context.GetOp<InstSts>(); InstSts op = context.GetOp<InstSts>();
EmitStore(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); EmitStore(context, StorageKind.SharedMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
} }
private static Operand EmitLoadConstant(EmitterContext context, Operand slot, Operand offset) private static Operand EmitLoadConstant(EmitterContext context, Operand slot, Operand offset)
@ -192,8 +187,8 @@ namespace Ryujinx.Graphics.Shader.Instructions
StorageKind storageKind, StorageKind storageKind,
AtomOp op, AtomOp op,
AtomSize type, AtomSize type,
Operand addrLow, Operand e0,
Operand addrHigh, Operand e1,
Operand value) Operand value)
{ {
Operand res = Const(0); Operand res = Const(0);
@ -203,7 +198,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
case AtomOp.Add: case AtomOp.Add:
if (type == AtomSize.S32 || type == AtomSize.U32) if (type == AtomSize.S32 || type == AtomSize.U32)
{ {
res = context.AtomicAdd(storageKind, addrLow, addrHigh, value); res = context.AtomicAdd(storageKind, e0, e1, value);
} }
else else
{ {
@ -213,7 +208,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
case AtomOp.And: case AtomOp.And:
if (type == AtomSize.S32 || type == AtomSize.U32) if (type == AtomSize.S32 || type == AtomSize.U32)
{ {
res = context.AtomicAnd(storageKind, addrLow, addrHigh, value); res = context.AtomicAnd(storageKind, e0, e1, value);
} }
else else
{ {
@ -223,7 +218,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
case AtomOp.Xor: case AtomOp.Xor:
if (type == AtomSize.S32 || type == AtomSize.U32) if (type == AtomSize.S32 || type == AtomSize.U32)
{ {
res = context.AtomicXor(storageKind, addrLow, addrHigh, value); res = context.AtomicXor(storageKind, e0, e1, value);
} }
else else
{ {
@ -233,7 +228,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
case AtomOp.Or: case AtomOp.Or:
if (type == AtomSize.S32 || type == AtomSize.U32) if (type == AtomSize.S32 || type == AtomSize.U32)
{ {
res = context.AtomicOr(storageKind, addrLow, addrHigh, value); res = context.AtomicOr(storageKind, e0, e1, value);
} }
else else
{ {
@ -243,11 +238,11 @@ namespace Ryujinx.Graphics.Shader.Instructions
case AtomOp.Max: case AtomOp.Max:
if (type == AtomSize.S32) if (type == AtomSize.S32)
{ {
res = context.AtomicMaxS32(storageKind, addrLow, addrHigh, value); res = context.AtomicMaxS32(storageKind, e0, e1, value);
} }
else if (type == AtomSize.U32) else if (type == AtomSize.U32)
{ {
res = context.AtomicMaxU32(storageKind, addrLow, addrHigh, value); res = context.AtomicMaxU32(storageKind, e0, e1, value);
} }
else else
{ {
@ -257,11 +252,11 @@ namespace Ryujinx.Graphics.Shader.Instructions
case AtomOp.Min: case AtomOp.Min:
if (type == AtomSize.S32) if (type == AtomSize.S32)
{ {
res = context.AtomicMinS32(storageKind, addrLow, addrHigh, value); res = context.AtomicMinS32(storageKind, e0, e1, value);
} }
else if (type == AtomSize.U32) else if (type == AtomSize.U32)
{ {
res = context.AtomicMinU32(storageKind, addrLow, addrHigh, value); res = context.AtomicMinU32(storageKind, e0, e1, value);
} }
else else
{ {
@ -275,7 +270,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
private static void EmitLoad( private static void EmitLoad(
EmitterContext context, EmitterContext context,
MemoryRegion region, StorageKind storageKind,
LsSize2 size, LsSize2 size,
Operand srcA, Operand srcA,
int rd, int rd,
@ -287,19 +282,19 @@ namespace Ryujinx.Graphics.Shader.Instructions
return; return;
} }
int id = storageKind == StorageKind.LocalMemory
? context.Config.ResourceManager.LocalMemoryId
: context.Config.ResourceManager.SharedMemoryId;
bool isSmallInt = size < LsSize2.B32; bool isSmallInt = size < LsSize2.B32;
int count = 1; int count = size switch
switch (size)
{ {
case LsSize2.B64: count = 2; break; LsSize2.B64 => 2,
case LsSize2.B128: count = 4; break; LsSize2.B128 => 4,
} _ => 1
};
Operand baseOffset = context.IAdd(srcA, Const(offset)); Operand baseOffset = context.Copy(srcA);
Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes).
Operand bitOffset = GetBitOffset(context, baseOffset);
for (int index = 0; index < count; index++) for (int index = 0; index < count; index++)
{ {
@ -310,14 +305,10 @@ namespace Ryujinx.Graphics.Shader.Instructions
break; break;
} }
Operand elemOffset = context.IAdd(wordOffset, Const(index)); Operand byteOffset = context.IAdd(baseOffset, Const(offset + index * 4));
Operand value = null; Operand wordOffset = context.ShiftRightU32(byteOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes).
Operand bitOffset = GetBitOffset(context, byteOffset);
switch (region) Operand value = context.Load(storageKind, id, wordOffset);
{
case MemoryRegion.Local: value = context.LoadLocal(elemOffset); break;
case MemoryRegion.Shared: value = context.LoadShared(elemOffset); break;
}
if (isSmallInt) if (isSmallInt)
{ {
@ -360,7 +351,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
private static void EmitStore( private static void EmitStore(
EmitterContext context, EmitterContext context,
MemoryRegion region, StorageKind storageKind,
LsSize2 size, LsSize2 size,
Operand srcA, Operand srcA,
int rd, int rd,
@ -372,52 +363,54 @@ namespace Ryujinx.Graphics.Shader.Instructions
return; return;
} }
int id = storageKind == StorageKind.LocalMemory
? context.Config.ResourceManager.LocalMemoryId
: context.Config.ResourceManager.SharedMemoryId;
bool isSmallInt = size < LsSize2.B32; bool isSmallInt = size < LsSize2.B32;
int count = 1; int count = size switch
switch (size)
{ {
case LsSize2.B64: count = 2; break; LsSize2.B64 => 2,
case LsSize2.B128: count = 4; break; LsSize2.B128 => 4,
} _ => 1
};
Operand baseOffset = context.IAdd(srcA, Const(offset)); Operand baseOffset = context.Copy(srcA);
Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
Operand bitOffset = GetBitOffset(context, baseOffset);
for (int index = 0; index < count; index++) for (int index = 0; index < count; index++)
{ {
bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex; bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr); Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
Operand elemOffset = context.IAdd(wordOffset, Const(index)); Operand byteOffset = context.IAdd(baseOffset, Const(offset + index * 4));
Operand wordOffset = context.ShiftRightU32(byteOffset, Const(2));
Operand bitOffset = GetBitOffset(context, byteOffset);
if (isSmallInt && region == MemoryRegion.Local) if (isSmallInt && storageKind == StorageKind.LocalMemory)
{ {
Operand word = context.LoadLocal(elemOffset); Operand word = context.Load(storageKind, id, wordOffset);
value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value); value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
} }
if (region == MemoryRegion.Local) if (storageKind == StorageKind.LocalMemory)
{ {
context.StoreLocal(elemOffset, value); context.Store(storageKind, id, wordOffset, value);
} }
else if (region == MemoryRegion.Shared) else if (storageKind == StorageKind.SharedMemory)
{ {
switch (size) switch (size)
{ {
case LsSize2.U8: case LsSize2.U8:
case LsSize2.S8: case LsSize2.S8:
context.StoreShared8(baseOffset, value); context.Store(StorageKind.SharedMemory8, id, byteOffset, value);
break; break;
case LsSize2.U16: case LsSize2.U16:
case LsSize2.S16: case LsSize2.S16:
context.StoreShared16(baseOffset, value); context.Store(StorageKind.SharedMemory16, id, byteOffset, value);
break; break;
default: default:
context.StoreShared(elemOffset, value); context.Store(storageKind, id, wordOffset, value);
break; break;
} }
} }

View file

@ -79,8 +79,6 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
ImageAtomic, ImageAtomic,
IsNan, IsNan,
Load, Load,
LoadLocal,
LoadShared,
Lod, Lod,
LogarithmB2, LogarithmB2,
LogicalAnd, LogicalAnd,
@ -115,10 +113,6 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
Sine, Sine,
SquareRoot, SquareRoot,
Store, Store,
StoreLocal,
StoreShared,
StoreShared16,
StoreShared8,
Subtract, Subtract,
SwizzleAdd, SwizzleAdd,
TextureSample, TextureSample,

View file

@ -11,12 +11,13 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
StorageBuffer, StorageBuffer,
LocalMemory, LocalMemory,
SharedMemory, SharedMemory,
SharedMemory8, // TODO: Remove this and store type as a field on the Operation class itself.
SharedMemory16, // TODO: Remove this and store type as a field on the Operation class itself.
GlobalMemory, GlobalMemory,
// TODO: Remove those and store type as a field on the Operation class itself. GlobalMemoryS8, // TODO: Remove this and store type as a field on the Operation class itself.
GlobalMemoryS8, GlobalMemoryS16, // TODO: Remove this and store type as a field on the Operation class itself.
GlobalMemoryS16, GlobalMemoryU8, // TODO: Remove this and store type as a field on the Operation class itself.
GlobalMemoryU8, GlobalMemoryU16 // TODO: Remove this and store type as a field on the Operation class itself.
GlobalMemoryU16
} }
static class StorageKindExtensions static class StorageKindExtensions

View file

@ -10,14 +10,12 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\AtomicMinMaxS32Shared.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighS32.glsl" /> <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighS32.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighU32.glsl" /> <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighU32.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\Shuffle.glsl" /> <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\Shuffle.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleDown.glsl" /> <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleDown.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleUp.glsl" /> <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleUp.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleXor.glsl" /> <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleXor.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\StoreSharedSmallInt.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\SwizzleAdd.glsl" /> <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\SwizzleAdd.glsl" />
</ItemGroup> </ItemGroup>

View file

@ -5,14 +5,12 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
[Flags] [Flags]
enum HelperFunctionsMask enum HelperFunctionsMask
{ {
AtomicMinMaxS32Shared = 1 << 0,
MultiplyHighS32 = 1 << 2, MultiplyHighS32 = 1 << 2,
MultiplyHighU32 = 1 << 3, MultiplyHighU32 = 1 << 3,
Shuffle = 1 << 4, Shuffle = 1 << 4,
ShuffleDown = 1 << 5, ShuffleDown = 1 << 5,
ShuffleUp = 1 << 6, ShuffleUp = 1 << 6,
ShuffleXor = 1 << 7, ShuffleXor = 1 << 7,
StoreSharedSmallInt = 1 << 8,
SwizzleAdd = 1 << 10, SwizzleAdd = 1 << 10,
FSI = 1 << 11 FSI = 1 << 11
} }

View file

@ -90,8 +90,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
Add(Instruction.ImageAtomic, AggregateType.S32); Add(Instruction.ImageAtomic, AggregateType.S32);
Add(Instruction.IsNan, AggregateType.Bool, AggregateType.Scalar); Add(Instruction.IsNan, AggregateType.Bool, AggregateType.Scalar);
Add(Instruction.Load, AggregateType.FP32); Add(Instruction.Load, AggregateType.FP32);
Add(Instruction.LoadLocal, AggregateType.U32, AggregateType.S32);
Add(Instruction.LoadShared, AggregateType.U32, AggregateType.S32);
Add(Instruction.Lod, AggregateType.FP32); Add(Instruction.Lod, AggregateType.FP32);
Add(Instruction.LogarithmB2, AggregateType.Scalar, AggregateType.Scalar); Add(Instruction.LogarithmB2, AggregateType.Scalar, AggregateType.Scalar);
Add(Instruction.LogicalAnd, AggregateType.Bool, AggregateType.Bool, AggregateType.Bool); Add(Instruction.LogicalAnd, AggregateType.Bool, AggregateType.Bool, AggregateType.Bool);
@ -121,10 +119,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
Add(Instruction.Sine, AggregateType.Scalar, AggregateType.Scalar); Add(Instruction.Sine, AggregateType.Scalar, AggregateType.Scalar);
Add(Instruction.SquareRoot, AggregateType.Scalar, AggregateType.Scalar); Add(Instruction.SquareRoot, AggregateType.Scalar, AggregateType.Scalar);
Add(Instruction.Store, AggregateType.Void); Add(Instruction.Store, AggregateType.Void);
Add(Instruction.StoreLocal, AggregateType.Void, AggregateType.S32, AggregateType.U32);
Add(Instruction.StoreShared, AggregateType.Void, AggregateType.S32, AggregateType.U32);
Add(Instruction.StoreShared16, AggregateType.Void, AggregateType.S32, AggregateType.U32);
Add(Instruction.StoreShared8, AggregateType.Void, AggregateType.S32, AggregateType.U32);
Add(Instruction.Subtract, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar); Add(Instruction.Subtract, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar);
Add(Instruction.SwizzleAdd, AggregateType.FP32, AggregateType.FP32, AggregateType.FP32, AggregateType.S32); Add(Instruction.SwizzleAdd, AggregateType.FP32, AggregateType.FP32, AggregateType.FP32, AggregateType.S32);
Add(Instruction.TextureSample, AggregateType.FP32); Add(Instruction.TextureSample, AggregateType.FP32);

View file

@ -0,0 +1,18 @@
using Ryujinx.Graphics.Shader.Translation;
namespace Ryujinx.Graphics.Shader.StructuredIr
{
readonly struct MemoryDefinition
{
public string Name { get; }
public AggregateType Type { get; }
public int ArrayLength { get; }
public MemoryDefinition(string name, AggregateType type, int arrayLength = 1)
{
Name = name;
Type = type;
ArrayLength = arrayLength;
}
}
}

View file

@ -6,14 +6,20 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
{ {
private readonly Dictionary<int, BufferDefinition> _constantBuffers; private readonly Dictionary<int, BufferDefinition> _constantBuffers;
private readonly Dictionary<int, BufferDefinition> _storageBuffers; private readonly Dictionary<int, BufferDefinition> _storageBuffers;
private readonly Dictionary<int, MemoryDefinition> _localMemories;
private readonly Dictionary<int, MemoryDefinition> _sharedMemories;
public IReadOnlyDictionary<int, BufferDefinition> ConstantBuffers => _constantBuffers; public IReadOnlyDictionary<int, BufferDefinition> ConstantBuffers => _constantBuffers;
public IReadOnlyDictionary<int, BufferDefinition> StorageBuffers => _storageBuffers; public IReadOnlyDictionary<int, BufferDefinition> StorageBuffers => _storageBuffers;
public IReadOnlyDictionary<int, MemoryDefinition> LocalMemories => _localMemories;
public IReadOnlyDictionary<int, MemoryDefinition> SharedMemories => _sharedMemories;
public ShaderProperties() public ShaderProperties()
{ {
_constantBuffers = new Dictionary<int, BufferDefinition>(); _constantBuffers = new Dictionary<int, BufferDefinition>();
_storageBuffers = new Dictionary<int, BufferDefinition>(); _storageBuffers = new Dictionary<int, BufferDefinition>();
_localMemories = new Dictionary<int, MemoryDefinition>();
_sharedMemories = new Dictionary<int, MemoryDefinition>();
} }
public void AddConstantBuffer(int binding, BufferDefinition definition) public void AddConstantBuffer(int binding, BufferDefinition definition)
@ -25,5 +31,21 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
{ {
_storageBuffers[binding] = definition; _storageBuffers[binding] = definition;
} }
public int AddLocalMemory(MemoryDefinition definition)
{
int id = _localMemories.Count;
_localMemories.Add(id, definition);
return id;
}
public int AddSharedMemory(MemoryDefinition definition)
{
int id = _sharedMemories.Count;
_sharedMemories.Add(id, definition);
return id;
}
} }
} }

View file

@ -274,13 +274,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
// decide which helper functions are needed on the final generated code. // decide which helper functions are needed on the final generated code.
switch (operation.Inst) switch (operation.Inst)
{ {
case Instruction.AtomicMaxS32:
case Instruction.AtomicMinS32:
if (operation.StorageKind == StorageKind.SharedMemory)
{
context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Shared;
}
break;
case Instruction.MultiplyHighS32: case Instruction.MultiplyHighS32:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighS32; context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighS32;
break; break;
@ -299,10 +292,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
case Instruction.ShuffleXor: case Instruction.ShuffleXor:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleXor; context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleXor;
break; break;
case Instruction.StoreShared16:
case Instruction.StoreShared8:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.StoreSharedSmallInt;
break;
case Instruction.SwizzleAdd: case Instruction.SwizzleAdd:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.SwizzleAdd; context.Info.HelperFunctionsMask |= HelperFunctionsMask.SwizzleAdd;
break; break;

View file

@ -67,6 +67,11 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.AtomicAnd, storageKind, Local(), Const(binding), e0, e1, value); return context.Add(Instruction.AtomicAnd, storageKind, Local(), Const(binding), e0, e1, value);
} }
public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand compare, Operand value)
{
return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), Const(binding), e0, compare, value);
}
public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand compare, Operand value) public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand compare, Operand value)
{ {
return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), Const(binding), e0, e1, compare, value); return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), Const(binding), e0, e1, compare, value);
@ -661,16 +666,6 @@ namespace Ryujinx.Graphics.Shader.Translation
: context.Load(storageKind, (int)ioVariable, arrayIndex, elemIndex); : context.Load(storageKind, (int)ioVariable, arrayIndex, elemIndex);
} }
public static Operand LoadLocal(this EmitterContext context, Operand a)
{
return context.Add(Instruction.LoadLocal, Local(), a);
}
public static Operand LoadShared(this EmitterContext context, Operand a)
{
return context.Add(Instruction.LoadShared, Local(), a);
}
public static Operand MemoryBarrier(this EmitterContext context) public static Operand MemoryBarrier(this EmitterContext context)
{ {
return context.Add(Instruction.MemoryBarrier); return context.Add(Instruction.MemoryBarrier);
@ -753,6 +748,11 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.Store, storageKind, null, e0, e1, value); return context.Add(Instruction.Store, storageKind, null, e0, e1, value);
} }
public static Operand Store(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand value)
{
return context.Add(Instruction.Store, storageKind, null, Const(binding), e0, value);
}
public static Operand Store(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) public static Operand Store(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
{ {
return context.Add(Instruction.Store, storageKind, null, Const(binding), e0, e1, value); return context.Add(Instruction.Store, storageKind, null, Const(binding), e0, e1, value);
@ -797,26 +797,6 @@ namespace Ryujinx.Graphics.Shader.Translation
: context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), arrayIndex, elemIndex, value); : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), arrayIndex, elemIndex, value);
} }
public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b)
{
return context.Add(Instruction.StoreLocal, null, a, b);
}
public static Operand StoreShared(this EmitterContext context, Operand a, Operand b)
{
return context.Add(Instruction.StoreShared, null, a, b);
}
public static Operand StoreShared16(this EmitterContext context, Operand a, Operand b)
{
return context.Add(Instruction.StoreShared16, null, a, b);
}
public static Operand StoreShared8(this EmitterContext context, Operand a, Operand b)
{
return context.Add(Instruction.StoreShared8, null, a, b);
}
public static Operand UnpackDouble2x32High(this EmitterContext context, Operand a) public static Operand UnpackDouble2x32High(this EmitterContext context, Operand a)
{ {
return UnpackDouble2x32(context, a, 1); return UnpackDouble2x32(context, a, 1);

View file

@ -9,13 +9,13 @@ namespace Ryujinx.Graphics.Shader.Translation
class HelperFunctionManager class HelperFunctionManager
{ {
private readonly List<Function> _functionList; private readonly List<Function> _functionList;
private readonly Dictionary<HelperFunctionName, int> _functionIds; private readonly Dictionary<int, int> _functionIds;
private readonly ShaderStage _stage; private readonly ShaderStage _stage;
public HelperFunctionManager(List<Function> functionList, ShaderStage stage) public HelperFunctionManager(List<Function> functionList, ShaderStage stage)
{ {
_functionList = functionList; _functionList = functionList;
_functionIds = new Dictionary<HelperFunctionName, int>(); _functionIds = new Dictionary<int, int>();
_stage = stage; _stage = stage;
} }
@ -29,14 +29,30 @@ namespace Ryujinx.Graphics.Shader.Translation
public int GetOrCreateFunctionId(HelperFunctionName functionName) public int GetOrCreateFunctionId(HelperFunctionName functionName)
{ {
if (_functionIds.TryGetValue(functionName, out int functionId)) if (_functionIds.TryGetValue((int)functionName, out int functionId))
{ {
return functionId; return functionId;
} }
Function function = GenerateFunction(functionName); Function function = GenerateFunction(functionName);
functionId = AddFunction(function); functionId = AddFunction(function);
_functionIds.Add(functionName, functionId); _functionIds.Add((int)functionName, functionId);
return functionId;
}
public int GetOrCreateFunctionId(HelperFunctionName functionName, int id)
{
int key = (int)functionName | (id << 16);
if (_functionIds.TryGetValue(key, out int functionId))
{
return functionId;
}
Function function = GenerateFunction(functionName, id);
functionId = AddFunction(function);
_functionIds.Add(key, functionId);
return functionId; return functionId;
} }
@ -140,6 +156,67 @@ namespace Ryujinx.Graphics.Shader.Translation
return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, "ConvertFloatToDouble", false, 1, 2); return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, "ConvertFloatToDouble", false, 1, 2);
} }
private static Function GenerateFunction(HelperFunctionName functionName, int id)
{
return functionName switch
{
HelperFunctionName.SharedAtomicMaxS32 => GenerateSharedAtomicSigned(id, isMin: false),
HelperFunctionName.SharedAtomicMinS32 => GenerateSharedAtomicSigned(id, isMin: true),
HelperFunctionName.SharedStore8 => GenerateSharedStore8(id),
HelperFunctionName.SharedStore16 => GenerateSharedStore16(id),
_ => throw new ArgumentException($"Invalid function name {functionName}")
};
}
private static Function GenerateSharedAtomicSigned(int id, bool isMin)
{
EmitterContext context = new EmitterContext();
Operand wordOffset = Argument(0);
Operand value = Argument(1);
Operand result = GenerateSharedAtomicCasLoop(context, wordOffset, id, (memValue) =>
{
return isMin
? context.IMinimumS32(memValue, value)
: context.IMaximumS32(memValue, value);
});
context.Return(result);
return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, $"SharedAtomic{(isMin ? "Min" : "Max")}_{id}", true, 2, 0);
}
private static Function GenerateSharedStore8(int id)
{
return GenerateSharedStore(id, 8);
}
private static Function GenerateSharedStore16(int id)
{
return GenerateSharedStore(id, 16);
}
private static Function GenerateSharedStore(int id, int bitSize)
{
EmitterContext context = new EmitterContext();
Operand offset = Argument(0);
Operand value = Argument(1);
Operand wordOffset = context.ShiftRightU32(offset, Const(2));
Operand bitOffset = GetBitOffset(context, offset);
GenerateSharedAtomicCasLoop(context, wordOffset, id, (memValue) =>
{
return context.BitfieldInsert(memValue, value, bitOffset, Const(bitSize));
});
context.Return();
return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, $"SharedStore{bitSize}_{id}", false, 2, 0);
}
private Function GenerateTexelFetchScaleFunction() private Function GenerateTexelFetchScaleFunction()
{ {
EmitterContext context = new EmitterContext(); EmitterContext context = new EmitterContext();
@ -226,5 +303,29 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.IAdd(Const(1), index); return context.IAdd(Const(1), index);
} }
} }
public static Operand GetBitOffset(EmitterContext context, Operand offset)
{
return context.ShiftLeft(context.BitwiseAnd(offset, Const(3)), Const(3));
}
private static Operand GenerateSharedAtomicCasLoop(EmitterContext context, Operand wordOffset, int id, Func<Operand, Operand> opCallback)
{
Operand lblLoopHead = Label();
context.MarkLabel(lblLoopHead);
Operand oldValue = context.Load(StorageKind.SharedMemory, id, wordOffset);
Operand newValue = opCallback(oldValue);
Operand casResult = context.AtomicCompareAndSwap(StorageKind.SharedMemory, id, wordOffset, oldValue, newValue);
Operand casFail = context.ICompareNotEqual(casResult, oldValue);
context.BranchIfTrue(lblLoopHead, casFail);
return oldValue;
}
} }
} }

View file

@ -4,6 +4,10 @@ namespace Ryujinx.Graphics.Shader.Translation
{ {
ConvertDoubleToFloat, ConvertDoubleToFloat,
ConvertFloatToDouble, ConvertFloatToDouble,
SharedAtomicMaxS32,
SharedAtomicMinS32,
SharedStore8,
SharedStore16,
TexelFetchScale, TexelFetchScale,
TextureSizeUnscale TextureSizeUnscale
} }

View file

@ -244,7 +244,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
node = nextNode; node = nextNode;
} }
} }
else if (operation.Inst == Instruction.StoreShared || operation.Inst == Instruction.StoreLocal) else if (operation.Inst == Instruction.Store &&
(operation.StorageKind == StorageKind.SharedMemory ||
operation.StorageKind == StorageKind.LocalMemory))
{ {
// The NVIDIA compiler can sometimes use shared or local memory as temporary // The NVIDIA compiler can sometimes use shared or local memory as temporary
// storage to place the base address and size on, so we need // storage to place the base address and size on, so we need
@ -874,7 +876,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
if (bitSize < 32) if (bitSize < 32)
{ {
Operand bitOffset = GetBitOffset(context, offset); Operand bitOffset = HelperFunctionManager.GetBitOffset(context, offset);
GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) => GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) =>
{ {
@ -892,7 +894,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
if (IsSmallInt(storageKind)) if (IsSmallInt(storageKind))
{ {
Operand bitOffset = GetBitOffset(context, offset); Operand bitOffset = HelperFunctionManager.GetBitOffset(context, offset);
switch (storageKind) switch (storageKind)
{ {
@ -921,11 +923,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
return true; return true;
} }
private static Operand GetBitOffset(EmitterContext context, Operand offset)
{
return context.ShiftLeft(context.BitwiseAnd(offset, Const(3)), Const(3));
}
private static Operand GenerateAtomicCasLoop(EmitterContext context, Operand wordOffset, int binding, Func<Operand, Operand> opCallback) private static Operand GenerateAtomicCasLoop(EmitterContext context, Operand wordOffset, int binding, Func<Operand, Operand> opCallback)
{ {
Operand lblLoopHead = Label(); Operand lblLoopHead = Label();
@ -1070,16 +1067,19 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{ {
baseOffset = null; baseOffset = null;
if (operation.Inst == Instruction.LoadShared || operation.Inst == Instruction.StoreShared) if (operation.Inst == Instruction.Load || operation.Inst == Instruction.Store)
{
if (operation.StorageKind == StorageKind.SharedMemory)
{ {
type = LsMemoryType.Shared; type = LsMemoryType.Shared;
return TryGetSharedMemoryOffsets(operation, out baseOffset, out constOffset); return TryGetSharedMemoryOffsets(operation, out baseOffset, out constOffset);
} }
else if (operation.Inst == Instruction.LoadLocal || operation.Inst == Instruction.StoreLocal) else if (operation.StorageKind == StorageKind.LocalMemory)
{ {
type = LsMemoryType.Local; type = LsMemoryType.Local;
return TryGetLocalMemoryOffset(operation, out constOffset); return TryGetLocalMemoryOffset(operation, out constOffset);
} }
}
type = default; type = default;
constOffset = 0; constOffset = 0;

View file

@ -1,3 +1,4 @@
using Ryujinx.Common;
using Ryujinx.Graphics.Shader.StructuredIr; using Ryujinx.Graphics.Shader.StructuredIr;
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
@ -22,9 +23,12 @@ namespace Ryujinx.Graphics.Shader.Translation
private readonly HashSet<int> _usedConstantBufferBindings; private readonly HashSet<int> _usedConstantBufferBindings;
public int LocalMemoryId { get; }
public int SharedMemoryId { get; }
public ShaderProperties Properties => _properties; public ShaderProperties Properties => _properties;
public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor, ShaderProperties properties) public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor, ShaderProperties properties, int localMemorySize)
{ {
_gpuAccessor = gpuAccessor; _gpuAccessor = gpuAccessor;
_properties = properties; _properties = properties;
@ -41,6 +45,25 @@ namespace Ryujinx.Graphics.Shader.Translation
_usedConstantBufferBindings = new HashSet<int>(); _usedConstantBufferBindings = new HashSet<int>();
properties.AddConstantBuffer(0, new BufferDefinition(BufferLayout.Std140, 0, 0, "support_buffer", SupportBuffer.GetStructureType())); properties.AddConstantBuffer(0, new BufferDefinition(BufferLayout.Std140, 0, 0, "support_buffer", SupportBuffer.GetStructureType()));
LocalMemoryId = -1;
SharedMemoryId = -1;
if (localMemorySize != 0)
{
var lmem = new MemoryDefinition("local_memory", AggregateType.Array | AggregateType.U32, BitUtils.DivRoundUp(localMemorySize, sizeof(uint)));
LocalMemoryId = properties.AddLocalMemory(lmem);
}
int sharedMemorySize = stage == ShaderStage.Compute ? gpuAccessor.QueryComputeSharedMemorySize() : 0;
if (sharedMemorySize != 0)
{
var smem = new MemoryDefinition("shared_memory", AggregateType.Array | AggregateType.U32, BitUtils.DivRoundUp(sharedMemorySize, sizeof(uint)));
SharedMemoryId = properties.AddSharedMemory(smem);
}
} }
public int GetConstantBufferBinding(int slot) public int GetConstantBufferBinding(int slot)

View file

@ -1,6 +1,8 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr; using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation.Optimizations;
using System.Collections.Generic; using System.Collections.Generic;
using System.Diagnostics;
using System.Linq; using System.Linq;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
@ -70,6 +72,15 @@ namespace Ryujinx.Graphics.Shader.Translation
} }
} }
} }
else
{
node = InsertSharedStoreSmallInt(hfm, node);
if (config.Options.TargetLanguage != TargetLanguage.Spirv)
{
node = InsertSharedAtomicSigned(hfm, node);
}
}
} }
} }
} }
@ -171,6 +182,87 @@ namespace Ryujinx.Graphics.Shader.Translation
operation.TurnIntoCopy(result); operation.TurnIntoCopy(result);
} }
private static LinkedListNode<INode> InsertSharedStoreSmallInt(HelperFunctionManager hfm, LinkedListNode<INode> node)
{
Operation operation = (Operation)node.Value;
HelperFunctionName name;
if (operation.StorageKind == StorageKind.SharedMemory8)
{
name = HelperFunctionName.SharedStore8;
}
else if (operation.StorageKind == StorageKind.SharedMemory16)
{
name = HelperFunctionName.SharedStore16;
}
else
{
return node;
}
if (operation.Inst != Instruction.Store)
{
return node;
}
Operand memoryId = operation.GetSource(0);
Operand byteOffset = operation.GetSource(1);
Operand value = operation.GetSource(2);
Debug.Assert(memoryId.Type == OperandType.Constant);
int functionId = hfm.GetOrCreateFunctionId(name, memoryId.Value);
Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value };
LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, (Operand)null, callArgs));
Utils.DeleteNode(node, operation);
return newNode;
}
private static LinkedListNode<INode> InsertSharedAtomicSigned(HelperFunctionManager hfm, LinkedListNode<INode> node)
{
Operation operation = (Operation)node.Value;
HelperFunctionName name;
if (operation.Inst == Instruction.AtomicMaxS32)
{
name = HelperFunctionName.SharedAtomicMaxS32;
}
else if (operation.Inst == Instruction.AtomicMinS32)
{
name = HelperFunctionName.SharedAtomicMinS32;
}
else
{
return node;
}
if (operation.StorageKind != StorageKind.SharedMemory)
{
return node;
}
Operand result = operation.Dest;
Operand memoryId = operation.GetSource(0);
Operand byteOffset = operation.GetSource(1);
Operand value = operation.GetSource(2);
Debug.Assert(memoryId.Type == OperandType.Constant);
int functionId = hfm.GetOrCreateFunctionId(name, memoryId.Value);
Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value };
LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, result, callArgs));
Utils.DeleteNode(node, operation);
return newNode;
}
private static LinkedListNode<INode> InsertTexelFetchScale(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config) private static LinkedListNode<INode> InsertTexelFetchScale(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config)
{ {
TextureOperation texOp = (TextureOperation)node.Value; TextureOperation texOp = (TextureOperation)node.Value;

View file

@ -124,7 +124,7 @@ namespace Ryujinx.Graphics.Shader.Translation
private TextureDescriptor[] _cachedTextureDescriptors; private TextureDescriptor[] _cachedTextureDescriptors;
private TextureDescriptor[] _cachedImageDescriptors; private TextureDescriptor[] _cachedImageDescriptors;
public ShaderConfig(ShaderStage stage, IGpuAccessor gpuAccessor, TranslationOptions options) public ShaderConfig(ShaderStage stage, IGpuAccessor gpuAccessor, TranslationOptions options, int localMemorySize)
{ {
Stage = stage; Stage = stage;
GpuAccessor = gpuAccessor; GpuAccessor = gpuAccessor;
@ -143,7 +143,7 @@ namespace Ryujinx.Graphics.Shader.Translation
_usedTextures = new Dictionary<TextureInfo, TextureMeta>(); _usedTextures = new Dictionary<TextureInfo, TextureMeta>();
_usedImages = new Dictionary<TextureInfo, TextureMeta>(); _usedImages = new Dictionary<TextureInfo, TextureMeta>();
ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties()); ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties(), localMemorySize);
if (!gpuAccessor.QueryHostSupportsTransformFeedback() && gpuAccessor.QueryTransformFeedbackEnabled()) if (!gpuAccessor.QueryHostSupportsTransformFeedback() && gpuAccessor.QueryTransformFeedbackEnabled())
{ {
@ -176,14 +176,17 @@ namespace Ryujinx.Graphics.Shader.Translation
OutputTopology outputTopology, OutputTopology outputTopology,
int maxOutputVertices, int maxOutputVertices,
IGpuAccessor gpuAccessor, IGpuAccessor gpuAccessor,
TranslationOptions options) : this(stage, gpuAccessor, options) TranslationOptions options) : this(stage, gpuAccessor, options, 0)
{ {
ThreadsPerInputPrimitive = 1; ThreadsPerInputPrimitive = 1;
OutputTopology = outputTopology; OutputTopology = outputTopology;
MaxOutputVertices = maxOutputVertices; MaxOutputVertices = maxOutputVertices;
} }
public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(header.Stage, gpuAccessor, options) public ShaderConfig(
ShaderHeader header,
IGpuAccessor gpuAccessor,
TranslationOptions options) : this(header.Stage, gpuAccessor, options, GetLocalMemorySize(header))
{ {
GpPassthrough = header.Stage == ShaderStage.Geometry && header.GpPassthrough; GpPassthrough = header.Stage == ShaderStage.Geometry && header.GpPassthrough;
ThreadsPerInputPrimitive = header.ThreadsPerInputPrimitive; ThreadsPerInputPrimitive = header.ThreadsPerInputPrimitive;
@ -197,6 +200,11 @@ namespace Ryujinx.Graphics.Shader.Translation
LastInVertexPipeline = header.Stage < ShaderStage.Fragment; LastInVertexPipeline = header.Stage < ShaderStage.Fragment;
} }
private static int GetLocalMemorySize(ShaderHeader header)
{
return header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp);
}
private void EnsureTransformFeedbackInitialized() private void EnsureTransformFeedbackInitialized()
{ {
if (HasTransformFeedbackOutputs() && _transformFeedbackOutputs == null) if (HasTransformFeedbackOutputs() && _transformFeedbackOutputs == null)

View file

@ -107,7 +107,7 @@ namespace Ryujinx.Graphics.Shader.Translation
if (options.Flags.HasFlag(TranslationFlags.Compute)) if (options.Flags.HasFlag(TranslationFlags.Compute))
{ {
config = new ShaderConfig(ShaderStage.Compute, gpuAccessor, options); config = new ShaderConfig(ShaderStage.Compute, gpuAccessor, options, gpuAccessor.QueryComputeLocalMemorySize());
program = Decoder.Decode(config, address); program = Decoder.Decode(config, address);
} }