Implement Shader Instructions SUATOM and SURED (#2090)

* Initial Implementation

* Further improvements (no support for float/64-bit types)

* Merge atomic and reduce instructions, add missing format switch

* Fix rebase issues.

* Not used.

* Whoops. Fixed.

* Partial implementation of inc/dec, cleanup and TODOs

* Remove testing path

* Address Feedback
This commit is contained in:
riperiperi 2021-08-31 06:51:57 +01:00 committed by GitHub
parent 416dc8fde4
commit 142cededd4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 510 additions and 18 deletions

View file

@ -40,7 +40,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <summary>
/// Version of the codegen (to be changed when codegen or guest format change).
/// </summary>
private const ulong ShaderCodeGenVersion = 2605;
private const ulong ShaderCodeGenVersion = 2092;
// Progress reporting helpers
private volatile int _shaderCount;

View file

@ -132,9 +132,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
return Call(context, operation);
case Instruction.ImageLoad:
return ImageLoadOrStore(context, operation);
case Instruction.ImageStore:
case Instruction.ImageAtomic:
return ImageLoadOrStore(context, operation);
case Instruction.LoadAttribute:

View file

@ -72,6 +72,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.GroupMemoryBarrier, InstType.CallNullary, "groupMemoryBarrier");
Add(Instruction.ImageLoad, InstType.Special);
Add(Instruction.ImageStore, InstType.Special);
Add(Instruction.ImageAtomic, InstType.Special);
Add(Instruction.IsNan, InstType.CallUnary, "isnan");
Add(Instruction.LoadAttribute, InstType.Special);
Add(Instruction.LoadConstant, InstType.Special);

View file

@ -18,13 +18,39 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
// TODO: Bindless texture support. For now we just return 0/do nothing.
if (isBindless)
{
return texOp.Inst == Instruction.ImageLoad ? NumberFormatter.FormatFloat(0) : "// imageStore(bindless)";
return texOp.Inst switch
{
Instruction.ImageStore => "// imageStore(bindless)",
Instruction.ImageLoad => NumberFormatter.FormatFloat(0),
_ => NumberFormatter.FormatInt(0)
};
}
bool isArray = (texOp.Type & SamplerType.Array) != 0;
bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
string texCall = texOp.Inst == Instruction.ImageLoad ? "imageLoad" : "imageStore";
string texCall;
if (texOp.Inst == Instruction.ImageAtomic)
{
texCall = (texOp.Flags & TextureFlags.AtomicMask) switch {
TextureFlags.Add => "imageAtomicAdd",
TextureFlags.Minimum => "imageAtomicMin",
TextureFlags.Maximum => "imageAtomicMax",
TextureFlags.Increment => "imageAtomicAdd", // TODO: Clamp value.
TextureFlags.Decrement => "imageAtomicAdd", // TODO: Clamp value.
TextureFlags.BitwiseAnd => "imageAtomicAnd",
TextureFlags.BitwiseOr => "imageAtomicOr",
TextureFlags.BitwiseXor => "imageAtomicXor",
TextureFlags.Swap => "imageAtomicExchange",
TextureFlags.CAS => "imageAtomicCompSwap",
_ => "imageAtomicAdd",
};
}
else
{
texCall = texOp.Inst == Instruction.ImageLoad ? "imageLoad" : "imageStore";
}
int srcIndex = isBindless ? 1 : 0;
@ -95,8 +121,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
if (texOp.Inst == Instruction.ImageStore)
{
int texIndex = context.FindImageDescriptorIndex(texOp);
VariableType type = texOp.Format.GetComponentType();
string[] cElems = new string[4];
@ -128,7 +152,35 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Append(prefix + "vec4(" + string.Join(", ", cElems) + ")");
}
if (texOp.Inst == Instruction.ImageAtomic)
{
VariableType type = texOp.Format.GetComponentType();
if ((texOp.Flags & TextureFlags.AtomicMask) == TextureFlags.CAS)
{
Append(Src(type)); // Compare value.
}
string value = (texOp.Flags & TextureFlags.AtomicMask) switch
{
TextureFlags.Increment => NumberFormatter.FormatInt(1, type), // TODO: Clamp value
TextureFlags.Decrement => NumberFormatter.FormatInt(-1, type), // TODO: Clamp value
_ => Src(type)
};
Append(value);
texCall += ")";
if (type != VariableType.S32)
{
texCall = "int(" + texCall + ")";
}
}
else
{
texCall += ")" + (texOp.Inst == Instruction.ImageLoad ? GetMask(texOp.Index) : "");
}
return texCall;
}

View file

@ -362,7 +362,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
}
else if (operation is AstTextureOperation texOp &&
(texOp.Inst == Instruction.ImageLoad ||
texOp.Inst == Instruction.ImageStore))
texOp.Inst == Instruction.ImageStore ||
texOp.Inst == Instruction.ImageAtomic))
{
return texOp.Format.GetComponentType();
}

View file

@ -0,0 +1,46 @@
using Ryujinx.Graphics.Shader.Instructions;
namespace Ryujinx.Graphics.Shader.Decoders
{
class OpCodeSuatom : OpCodeTextureBase
{
public Register Rd { get; }
public Register Ra { get; }
public Register Rb { get; }
public Register Rc { get; }
public ReductionType Type { get; }
public AtomicOp AtomicOp { get; }
public ImageDimensions Dimensions { get; }
public ClampMode ClampMode { get; }
public bool ByteAddress { get; }
public bool UseType { get; }
public bool IsBindless { get; }
public bool CompareAndSwap { get; }
public new static OpCode Create(InstEmitter emitter, ulong address, long opCode) => new OpCodeSuatom(emitter, address, opCode);
public OpCodeSuatom(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
{
Rd = new Register(opCode.Extract(0, 8), RegisterType.Gpr);
Ra = new Register(opCode.Extract(8, 8), RegisterType.Gpr);
Rb = new Register(opCode.Extract(20, 8), RegisterType.Gpr);
Rc = new Register(opCode.Extract(39, 8), RegisterType.Gpr);
bool supportsBindless = opCode.Extract(54);
Type = (ReductionType)opCode.Extract(supportsBindless ? 36 : 51, 3);
ByteAddress = opCode.Extract(28);
AtomicOp = (AtomicOp)opCode.Extract(29, 4); // Only useful if CAS is not true.
Dimensions = (ImageDimensions)opCode.Extract(33, 3);
ClampMode = (ClampMode)opCode.Extract(49, 2);
IsBindless = supportsBindless && !opCode.Extract(51);
UseType = !supportsBindless || opCode.Extract(52);
CompareAndSwap = opCode.Extract(55);
}
}
}

View file

@ -0,0 +1,44 @@
using Ryujinx.Graphics.Shader.Instructions;
namespace Ryujinx.Graphics.Shader.Decoders
{
enum ClampMode
{
Ignore = 0,
Trap = 2
}
class OpCodeSured : OpCodeTextureBase
{
public Register Ra { get; }
public Register Rb { get; }
public Register Rc { get; }
public ReductionType Type { get; }
public AtomicOp AtomicOp { get; }
public ImageDimensions Dimensions { get; }
public ClampMode ClampMode { get; }
public bool UseType { get; }
public bool IsBindless { get; }
public bool ByteAddress { get; }
public new static OpCode Create(InstEmitter emitter, ulong address, long opCode) => new OpCodeSured(emitter, address, opCode);
public OpCodeSured(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
{
Ra = new Register(opCode.Extract(8, 8), RegisterType.Gpr);
Rb = new Register(opCode.Extract(0, 8), RegisterType.Gpr);
Rc = new Register(opCode.Extract(39, 8), RegisterType.Gpr);
Type = (ReductionType)opCode.Extract(20, 3);
ByteAddress = opCode.Extract(23);
AtomicOp = (AtomicOp)opCode.Extract(24, 3);
Dimensions = (ImageDimensions)opCode.Extract(33, 3);
ClampMode = (ClampMode)opCode.Extract(49, 2);
IsBindless = !opCode.Extract(51);
UseType = opCode.Extract(52);
}
}
}

View file

@ -209,6 +209,11 @@ namespace Ryujinx.Graphics.Shader.Decoders
Set("1110111101011x", InstEmit.Sts, OpCodeMemory.Create);
Set("11101011000xxx", InstEmit.Suld, OpCodeImage.Create);
Set("11101011001xxx", InstEmit.Sust, OpCodeImage.Create);
Set("11101011010xxx", InstEmit.Sured, OpCodeSured.Create);
Set("11101010110xxx", InstEmit.Suatom, OpCodeSuatom.Create);
Set("1110101010xxxx", InstEmit.Suatom, OpCodeSuatom.Create);
Set("11101010011xxx", InstEmit.Suatom, OpCodeSuatom.Create);
Set("1110101000xxxx", InstEmit.Suatom, OpCodeSuatom.Create);
Set("1111000011111x", InstEmit.Sync, OpCodeBranchPop.Create);
Set("110000xxxx111x", InstEmit.Tex, OpCodeTex.Create);
Set("1101111010111x", InstEmit.TexB, OpCodeTexB.Create);

View file

@ -7,6 +7,8 @@ namespace Ryujinx.Graphics.Shader.Decoders
U64 = 2,
FP32FtzRn = 3,
FP16x2FtzRn = 4,
S64 = 5
S64 = 5,
SD32 = 6,
SD64 = 7
}
}

View file

@ -277,6 +277,249 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Add(operation);
}
public static void Sured(EmitterContext context)
{
OpCodeSured op = (OpCodeSured)context.CurrOp;
SamplerType type = ConvertSamplerType(op.Dimensions);
if (type == SamplerType.None)
{
context.Config.GpuAccessor.Log("Invalid image reduction sampler type.");
return;
}
int raIndex = op.Ra.Index;
int rbIndex = op.Rb.Index;
Operand Ra()
{
if (raIndex > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(raIndex++, RegisterType.Gpr));
}
Operand Rb()
{
if (rbIndex > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(rbIndex++, RegisterType.Gpr));
}
List<Operand> sourcesList = new List<Operand>();
if (op.IsBindless)
{
sourcesList.Add(context.Copy(Register(op.Rc)));
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
{
sourcesList.Add(Const(0));
type &= ~SamplerType.Mask;
type |= SamplerType.Texture2D;
}
if (type.HasFlag(SamplerType.Array))
{
sourcesList.Add(Ra());
type |= SamplerType.Array;
}
TextureFormat format = TextureFormat.R32Sint;
if (op.UseType)
{
if (op.ByteAddress)
{
int xIndex = op.IsBindless ? 1 : 0;
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(op.Type)));
}
// TODO: FP and 64-bit formats.
format = (op.Type == ReductionType.SD32 || op.Type == ReductionType.SD64) ?
context.Config.GetTextureFormatAtomic(op.HandleOffset) :
GetTextureFormat(op.Type);
}
else if (!op.IsBindless)
{
format = context.Config.GetTextureFormatAtomic(op.HandleOffset);
}
sourcesList.Add(Rb());
Operand[] sources = sourcesList.ToArray();
int handle = op.HandleOffset;
TextureFlags flags = GetAtomicOpFlags(op.AtomicOp);
if (op.IsBindless)
{
handle = 0;
flags |= TextureFlags.Bindless;
}
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageAtomic,
type,
format,
flags,
handle,
0,
null,
sources);
context.Add(operation);
}
public static void Suatom(EmitterContext context)
{
OpCodeSuatom op = (OpCodeSuatom)context.CurrOp;
SamplerType type = ConvertSamplerType(op.Dimensions);
if (type == SamplerType.None)
{
context.Config.GpuAccessor.Log("Invalid image atomic sampler type.");
return;
}
int raIndex = op.Ra.Index;
int rbIndex = op.Rb.Index;
Operand Ra()
{
if (raIndex > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(raIndex++, RegisterType.Gpr));
}
Operand Rb()
{
if (rbIndex > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(rbIndex++, RegisterType.Gpr));
}
int rdIndex = op.Rd.Index;
Operand GetDest()
{
if (rdIndex > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return Register(rdIndex++, RegisterType.Gpr);
}
List<Operand> sourcesList = new List<Operand>();
if (op.IsBindless)
{
sourcesList.Add(context.Copy(Register(op.Rc)));
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
{
sourcesList.Add(Const(0));
type &= ~SamplerType.Mask;
type |= SamplerType.Texture2D;
}
if (type.HasFlag(SamplerType.Array))
{
sourcesList.Add(Ra());
type |= SamplerType.Array;
}
TextureFormat format = TextureFormat.R32Sint;
if (op.UseType)
{
if (op.ByteAddress)
{
int xIndex = op.IsBindless ? 1 : 0;
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(op.Type)));
}
// TODO: FP and 64-bit formats.
format = (op.Type == ReductionType.SD32 || op.Type == ReductionType.SD64) ?
context.Config.GetTextureFormatAtomic(op.HandleOffset) :
GetTextureFormat(op.Type);
}
else if (!op.IsBindless)
{
format = context.Config.GetTextureFormatAtomic(op.HandleOffset);
}
if (op.CompareAndSwap)
{
sourcesList.Add(Rb());
}
sourcesList.Add(Rb());
Operand[] sources = sourcesList.ToArray();
int handle = op.HandleOffset;
TextureFlags flags = op.CompareAndSwap ? TextureFlags.CAS : GetAtomicOpFlags(op.AtomicOp);
if (op.IsBindless)
{
handle = 0;
flags |= TextureFlags.Bindless;
}
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageAtomic,
type,
format,
flags,
handle,
0,
GetDest(),
sources);
context.Add(operation);
}
public static void Tex(EmitterContext context)
{
EmitTextureSample(context, TextureFlags.None);
@ -1332,6 +1575,55 @@ namespace Ryujinx.Graphics.Shader.Instructions
};
}
private static int GetComponentSizeInBytesLog2(ReductionType type)
{
return type switch
{
ReductionType.U32 => 2,
ReductionType.S32 => 2,
ReductionType.U64 => 3,
ReductionType.FP32FtzRn => 2,
ReductionType.FP16x2FtzRn => 2,
ReductionType.S64 => 3,
ReductionType.SD32 => 2,
ReductionType.SD64 => 3,
_ => 2
};
}
private static TextureFormat GetTextureFormat(ReductionType type)
{
return type switch
{
ReductionType.U32 => TextureFormat.R32Uint,
ReductionType.S32 => TextureFormat.R32Sint,
ReductionType.U64 => TextureFormat.R32G32Uint,
ReductionType.FP32FtzRn => TextureFormat.R32Float,
ReductionType.FP16x2FtzRn => TextureFormat.R16G16Float,
ReductionType.S64 => TextureFormat.R32G32Uint,
ReductionType.SD32 => TextureFormat.R32Uint,
ReductionType.SD64 => TextureFormat.R32G32Uint,
_ => TextureFormat.R32Uint
};
}
private static TextureFlags GetAtomicOpFlags(AtomicOp op)
{
return op switch
{
AtomicOp.Add => TextureFlags.Add,
AtomicOp.Minimum => TextureFlags.Minimum,
AtomicOp.Maximum => TextureFlags.Maximum,
AtomicOp.Increment => TextureFlags.Increment,
AtomicOp.Decrement => TextureFlags.Decrement,
AtomicOp.BitwiseAnd => TextureFlags.BitwiseAnd,
AtomicOp.BitwiseOr => TextureFlags.BitwiseOr,
AtomicOp.BitwiseExclusiveOr => TextureFlags.BitwiseXor,
AtomicOp.Swap => TextureFlags.Swap,
_ => TextureFlags.Add
};
}
private static SamplerType ConvertSamplerType(ImageDimensions target)
{
return target switch

View file

@ -69,6 +69,7 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
GroupMemoryBarrier,
ImageLoad,
ImageStore,
ImageAtomic,
IsNan,
LoadAttribute,
LoadConstant,

View file

@ -13,6 +13,19 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
LodBias = 1 << 4,
LodLevel = 1 << 5,
Offset = 1 << 6,
Offsets = 1 << 7
Offsets = 1 << 7,
AtomicMask = 15 << 16,
Add = 0 << 16,
Minimum = 1 << 16,
Maximum = 2 << 16,
Increment = 3 << 16,
Decrement = 4 << 16,
BitwiseAnd = 5 << 16,
BitwiseOr = 6 << 16,
BitwiseXor = 7 << 16,
Swap = 8 << 16,
CAS = 9 << 16
}
}

View file

@ -81,6 +81,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
Add(Instruction.FusedMultiplyAdd, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
Add(Instruction.ImageLoad, VariableType.F32);
Add(Instruction.ImageStore, VariableType.None);
Add(Instruction.ImageAtomic, VariableType.S32);
Add(Instruction.IsNan, VariableType.Bool, VariableType.F32);
Add(Instruction.LoadAttribute, VariableType.F32, VariableType.S32, VariableType.S32, VariableType.S32);
Add(Instruction.LoadConstant, VariableType.F32, VariableType.S32, VariableType.S32);
@ -148,6 +149,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
// that can improve the decompiler output.
if (inst == Instruction.ImageLoad ||
inst == Instruction.ImageStore ||
inst == Instruction.ImageAtomic ||
inst == Instruction.Lod ||
inst == Instruction.TextureSample)
{

View file

@ -61,7 +61,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
src0.GetCbufOffset() | ((src1.GetCbufOffset() + 1) << 16),
src0.GetCbufSlot() | ((src1.GetCbufSlot() + 1) << 16));
}
else if (texOp.Inst == Instruction.ImageLoad || texOp.Inst == Instruction.ImageStore)
else if (texOp.Inst == Instruction.ImageLoad ||
texOp.Inst == Instruction.ImageStore ||
texOp.Inst == Instruction.ImageAtomic)
{
Operand src0 = Utils.FindLastOperation(texOp.GetSource(0), block);
@ -69,7 +71,16 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
int cbufOffset = src0.GetCbufOffset();
int cbufSlot = src0.GetCbufSlot();
if (texOp.Inst == Instruction.ImageAtomic)
{
texOp.Format = config.GetTextureFormatAtomic(cbufOffset, cbufSlot);
}
else
{
texOp.Format = config.GetTextureFormat(cbufOffset, cbufSlot);
}
SetHandle(config, texOp, cbufOffset, cbufSlot);
}
}

View file

@ -278,6 +278,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
case Instruction.AtomicSwap:
case Instruction.AtomicXor:
case Instruction.Call:
case Instruction.ImageAtomic:
return true;
}
}

View file

@ -162,6 +162,28 @@ namespace Ryujinx.Graphics.Shader.Translation
return format;
}
private bool FormatSupportsAtomic(TextureFormat format)
{
return format == TextureFormat.R32Sint || format == TextureFormat.R32Uint;
}
public TextureFormat GetTextureFormatAtomic(int handle, int cbufSlot = -1)
{
// Atomic image instructions do not support GL_EXT_shader_image_load_formatted,
// and must have a type specified. Default to R32Sint if not available.
var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot);
if (!FormatSupportsAtomic(format))
{
GpuAccessor.Log($"Unsupported format for texture {handle}: {format}.");
format = TextureFormat.R32Sint;
}
return format;
}
public void SizeAdd(int size)
{
Size += size;
@ -270,8 +292,8 @@ namespace Ryujinx.Graphics.Shader.Translation
int handle)
{
inst &= Instruction.Mask;
bool isImage = inst == Instruction.ImageLoad || inst == Instruction.ImageStore;
bool isWrite = inst == Instruction.ImageStore;
bool isImage = inst == Instruction.ImageLoad || inst == Instruction.ImageStore || inst == Instruction.ImageAtomic;
bool isWrite = inst == Instruction.ImageStore || inst == Instruction.ImageAtomic;
bool accurateType = inst != Instruction.TextureSize && inst != Instruction.Lod;
if (isImage)