Implement Shader Instructions SUATOM and SURED (#2090)
* Initial Implementation * Further improvements (no support for float/64-bit types) * Merge atomic and reduce instructions, add missing format switch * Fix rebase issues. * Not used. * Whoops. Fixed. * Partial implementation of inc/dec, cleanup and TODOs * Remove testing path * Address Feedback
This commit is contained in:
parent
416dc8fde4
commit
142cededd4
16 changed files with 510 additions and 18 deletions
|
@ -40,7 +40,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Version of the codegen (to be changed when codegen or guest format change).
|
/// Version of the codegen (to be changed when codegen or guest format change).
|
||||||
/// </summary>
|
/// </summary>
|
||||||
private const ulong ShaderCodeGenVersion = 2605;
|
private const ulong ShaderCodeGenVersion = 2092;
|
||||||
|
|
||||||
// Progress reporting helpers
|
// Progress reporting helpers
|
||||||
private volatile int _shaderCount;
|
private volatile int _shaderCount;
|
||||||
|
|
|
@ -132,9 +132,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
|
||||||
return Call(context, operation);
|
return Call(context, operation);
|
||||||
|
|
||||||
case Instruction.ImageLoad:
|
case Instruction.ImageLoad:
|
||||||
return ImageLoadOrStore(context, operation);
|
|
||||||
|
|
||||||
case Instruction.ImageStore:
|
case Instruction.ImageStore:
|
||||||
|
case Instruction.ImageAtomic:
|
||||||
return ImageLoadOrStore(context, operation);
|
return ImageLoadOrStore(context, operation);
|
||||||
|
|
||||||
case Instruction.LoadAttribute:
|
case Instruction.LoadAttribute:
|
||||||
|
|
|
@ -72,6 +72,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
|
||||||
Add(Instruction.GroupMemoryBarrier, InstType.CallNullary, "groupMemoryBarrier");
|
Add(Instruction.GroupMemoryBarrier, InstType.CallNullary, "groupMemoryBarrier");
|
||||||
Add(Instruction.ImageLoad, InstType.Special);
|
Add(Instruction.ImageLoad, InstType.Special);
|
||||||
Add(Instruction.ImageStore, InstType.Special);
|
Add(Instruction.ImageStore, InstType.Special);
|
||||||
|
Add(Instruction.ImageAtomic, InstType.Special);
|
||||||
Add(Instruction.IsNan, InstType.CallUnary, "isnan");
|
Add(Instruction.IsNan, InstType.CallUnary, "isnan");
|
||||||
Add(Instruction.LoadAttribute, InstType.Special);
|
Add(Instruction.LoadAttribute, InstType.Special);
|
||||||
Add(Instruction.LoadConstant, InstType.Special);
|
Add(Instruction.LoadConstant, InstType.Special);
|
||||||
|
|
|
@ -18,13 +18,39 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
|
||||||
// TODO: Bindless texture support. For now we just return 0/do nothing.
|
// TODO: Bindless texture support. For now we just return 0/do nothing.
|
||||||
if (isBindless)
|
if (isBindless)
|
||||||
{
|
{
|
||||||
return texOp.Inst == Instruction.ImageLoad ? NumberFormatter.FormatFloat(0) : "// imageStore(bindless)";
|
return texOp.Inst switch
|
||||||
|
{
|
||||||
|
Instruction.ImageStore => "// imageStore(bindless)",
|
||||||
|
Instruction.ImageLoad => NumberFormatter.FormatFloat(0),
|
||||||
|
_ => NumberFormatter.FormatInt(0)
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isArray = (texOp.Type & SamplerType.Array) != 0;
|
bool isArray = (texOp.Type & SamplerType.Array) != 0;
|
||||||
bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
|
bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
|
||||||
|
|
||||||
string texCall = texOp.Inst == Instruction.ImageLoad ? "imageLoad" : "imageStore";
|
string texCall;
|
||||||
|
|
||||||
|
if (texOp.Inst == Instruction.ImageAtomic)
|
||||||
|
{
|
||||||
|
texCall = (texOp.Flags & TextureFlags.AtomicMask) switch {
|
||||||
|
TextureFlags.Add => "imageAtomicAdd",
|
||||||
|
TextureFlags.Minimum => "imageAtomicMin",
|
||||||
|
TextureFlags.Maximum => "imageAtomicMax",
|
||||||
|
TextureFlags.Increment => "imageAtomicAdd", // TODO: Clamp value.
|
||||||
|
TextureFlags.Decrement => "imageAtomicAdd", // TODO: Clamp value.
|
||||||
|
TextureFlags.BitwiseAnd => "imageAtomicAnd",
|
||||||
|
TextureFlags.BitwiseOr => "imageAtomicOr",
|
||||||
|
TextureFlags.BitwiseXor => "imageAtomicXor",
|
||||||
|
TextureFlags.Swap => "imageAtomicExchange",
|
||||||
|
TextureFlags.CAS => "imageAtomicCompSwap",
|
||||||
|
_ => "imageAtomicAdd",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
texCall = texOp.Inst == Instruction.ImageLoad ? "imageLoad" : "imageStore";
|
||||||
|
}
|
||||||
|
|
||||||
int srcIndex = isBindless ? 1 : 0;
|
int srcIndex = isBindless ? 1 : 0;
|
||||||
|
|
||||||
|
@ -95,8 +121,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
|
||||||
|
|
||||||
if (texOp.Inst == Instruction.ImageStore)
|
if (texOp.Inst == Instruction.ImageStore)
|
||||||
{
|
{
|
||||||
int texIndex = context.FindImageDescriptorIndex(texOp);
|
|
||||||
|
|
||||||
VariableType type = texOp.Format.GetComponentType();
|
VariableType type = texOp.Format.GetComponentType();
|
||||||
|
|
||||||
string[] cElems = new string[4];
|
string[] cElems = new string[4];
|
||||||
|
@ -128,7 +152,35 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
|
||||||
Append(prefix + "vec4(" + string.Join(", ", cElems) + ")");
|
Append(prefix + "vec4(" + string.Join(", ", cElems) + ")");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (texOp.Inst == Instruction.ImageAtomic)
|
||||||
|
{
|
||||||
|
VariableType type = texOp.Format.GetComponentType();
|
||||||
|
|
||||||
|
if ((texOp.Flags & TextureFlags.AtomicMask) == TextureFlags.CAS)
|
||||||
|
{
|
||||||
|
Append(Src(type)); // Compare value.
|
||||||
|
}
|
||||||
|
|
||||||
|
string value = (texOp.Flags & TextureFlags.AtomicMask) switch
|
||||||
|
{
|
||||||
|
TextureFlags.Increment => NumberFormatter.FormatInt(1, type), // TODO: Clamp value
|
||||||
|
TextureFlags.Decrement => NumberFormatter.FormatInt(-1, type), // TODO: Clamp value
|
||||||
|
_ => Src(type)
|
||||||
|
};
|
||||||
|
|
||||||
|
Append(value);
|
||||||
|
|
||||||
|
texCall += ")";
|
||||||
|
|
||||||
|
if (type != VariableType.S32)
|
||||||
|
{
|
||||||
|
texCall = "int(" + texCall + ")";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
texCall += ")" + (texOp.Inst == Instruction.ImageLoad ? GetMask(texOp.Index) : "");
|
texCall += ")" + (texOp.Inst == Instruction.ImageLoad ? GetMask(texOp.Index) : "");
|
||||||
|
}
|
||||||
|
|
||||||
return texCall;
|
return texCall;
|
||||||
}
|
}
|
||||||
|
|
|
@ -362,7 +362,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
|
||||||
}
|
}
|
||||||
else if (operation is AstTextureOperation texOp &&
|
else if (operation is AstTextureOperation texOp &&
|
||||||
(texOp.Inst == Instruction.ImageLoad ||
|
(texOp.Inst == Instruction.ImageLoad ||
|
||||||
texOp.Inst == Instruction.ImageStore))
|
texOp.Inst == Instruction.ImageStore ||
|
||||||
|
texOp.Inst == Instruction.ImageAtomic))
|
||||||
{
|
{
|
||||||
return texOp.Format.GetComponentType();
|
return texOp.Format.GetComponentType();
|
||||||
}
|
}
|
||||||
|
|
46
Ryujinx.Graphics.Shader/Decoders/OpCodeSuatom.cs
Normal file
46
Ryujinx.Graphics.Shader/Decoders/OpCodeSuatom.cs
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
using Ryujinx.Graphics.Shader.Instructions;
|
||||||
|
|
||||||
|
namespace Ryujinx.Graphics.Shader.Decoders
|
||||||
|
{
|
||||||
|
class OpCodeSuatom : OpCodeTextureBase
|
||||||
|
{
|
||||||
|
public Register Rd { get; }
|
||||||
|
public Register Ra { get; }
|
||||||
|
public Register Rb { get; }
|
||||||
|
public Register Rc { get; }
|
||||||
|
|
||||||
|
public ReductionType Type { get; }
|
||||||
|
public AtomicOp AtomicOp { get; }
|
||||||
|
public ImageDimensions Dimensions { get; }
|
||||||
|
public ClampMode ClampMode { get; }
|
||||||
|
|
||||||
|
public bool ByteAddress { get; }
|
||||||
|
public bool UseType { get; }
|
||||||
|
public bool IsBindless { get; }
|
||||||
|
|
||||||
|
public bool CompareAndSwap { get; }
|
||||||
|
|
||||||
|
public new static OpCode Create(InstEmitter emitter, ulong address, long opCode) => new OpCodeSuatom(emitter, address, opCode);
|
||||||
|
|
||||||
|
public OpCodeSuatom(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
|
||||||
|
{
|
||||||
|
Rd = new Register(opCode.Extract(0, 8), RegisterType.Gpr);
|
||||||
|
Ra = new Register(opCode.Extract(8, 8), RegisterType.Gpr);
|
||||||
|
Rb = new Register(opCode.Extract(20, 8), RegisterType.Gpr);
|
||||||
|
Rc = new Register(opCode.Extract(39, 8), RegisterType.Gpr);
|
||||||
|
|
||||||
|
bool supportsBindless = opCode.Extract(54);
|
||||||
|
|
||||||
|
Type = (ReductionType)opCode.Extract(supportsBindless ? 36 : 51, 3);
|
||||||
|
ByteAddress = opCode.Extract(28);
|
||||||
|
AtomicOp = (AtomicOp)opCode.Extract(29, 4); // Only useful if CAS is not true.
|
||||||
|
Dimensions = (ImageDimensions)opCode.Extract(33, 3);
|
||||||
|
ClampMode = (ClampMode)opCode.Extract(49, 2);
|
||||||
|
|
||||||
|
IsBindless = supportsBindless && !opCode.Extract(51);
|
||||||
|
UseType = !supportsBindless || opCode.Extract(52);
|
||||||
|
|
||||||
|
CompareAndSwap = opCode.Extract(55);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
44
Ryujinx.Graphics.Shader/Decoders/OpCodeSured.cs
Normal file
44
Ryujinx.Graphics.Shader/Decoders/OpCodeSured.cs
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
using Ryujinx.Graphics.Shader.Instructions;
|
||||||
|
|
||||||
|
namespace Ryujinx.Graphics.Shader.Decoders
|
||||||
|
{
|
||||||
|
enum ClampMode
|
||||||
|
{
|
||||||
|
Ignore = 0,
|
||||||
|
Trap = 2
|
||||||
|
}
|
||||||
|
|
||||||
|
class OpCodeSured : OpCodeTextureBase
|
||||||
|
{
|
||||||
|
public Register Ra { get; }
|
||||||
|
public Register Rb { get; }
|
||||||
|
public Register Rc { get; }
|
||||||
|
|
||||||
|
public ReductionType Type { get; }
|
||||||
|
public AtomicOp AtomicOp { get; }
|
||||||
|
public ImageDimensions Dimensions { get; }
|
||||||
|
public ClampMode ClampMode { get; }
|
||||||
|
|
||||||
|
public bool UseType { get; }
|
||||||
|
public bool IsBindless { get; }
|
||||||
|
public bool ByteAddress { get; }
|
||||||
|
|
||||||
|
public new static OpCode Create(InstEmitter emitter, ulong address, long opCode) => new OpCodeSured(emitter, address, opCode);
|
||||||
|
|
||||||
|
public OpCodeSured(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
|
||||||
|
{
|
||||||
|
Ra = new Register(opCode.Extract(8, 8), RegisterType.Gpr);
|
||||||
|
Rb = new Register(opCode.Extract(0, 8), RegisterType.Gpr);
|
||||||
|
Rc = new Register(opCode.Extract(39, 8), RegisterType.Gpr);
|
||||||
|
|
||||||
|
Type = (ReductionType)opCode.Extract(20, 3);
|
||||||
|
ByteAddress = opCode.Extract(23);
|
||||||
|
AtomicOp = (AtomicOp)opCode.Extract(24, 3);
|
||||||
|
Dimensions = (ImageDimensions)opCode.Extract(33, 3);
|
||||||
|
ClampMode = (ClampMode)opCode.Extract(49, 2);
|
||||||
|
|
||||||
|
IsBindless = !opCode.Extract(51);
|
||||||
|
UseType = opCode.Extract(52);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -209,6 +209,11 @@ namespace Ryujinx.Graphics.Shader.Decoders
|
||||||
Set("1110111101011x", InstEmit.Sts, OpCodeMemory.Create);
|
Set("1110111101011x", InstEmit.Sts, OpCodeMemory.Create);
|
||||||
Set("11101011000xxx", InstEmit.Suld, OpCodeImage.Create);
|
Set("11101011000xxx", InstEmit.Suld, OpCodeImage.Create);
|
||||||
Set("11101011001xxx", InstEmit.Sust, OpCodeImage.Create);
|
Set("11101011001xxx", InstEmit.Sust, OpCodeImage.Create);
|
||||||
|
Set("11101011010xxx", InstEmit.Sured, OpCodeSured.Create);
|
||||||
|
Set("11101010110xxx", InstEmit.Suatom, OpCodeSuatom.Create);
|
||||||
|
Set("1110101010xxxx", InstEmit.Suatom, OpCodeSuatom.Create);
|
||||||
|
Set("11101010011xxx", InstEmit.Suatom, OpCodeSuatom.Create);
|
||||||
|
Set("1110101000xxxx", InstEmit.Suatom, OpCodeSuatom.Create);
|
||||||
Set("1111000011111x", InstEmit.Sync, OpCodeBranchPop.Create);
|
Set("1111000011111x", InstEmit.Sync, OpCodeBranchPop.Create);
|
||||||
Set("110000xxxx111x", InstEmit.Tex, OpCodeTex.Create);
|
Set("110000xxxx111x", InstEmit.Tex, OpCodeTex.Create);
|
||||||
Set("1101111010111x", InstEmit.TexB, OpCodeTexB.Create);
|
Set("1101111010111x", InstEmit.TexB, OpCodeTexB.Create);
|
||||||
|
|
|
@ -7,6 +7,8 @@ namespace Ryujinx.Graphics.Shader.Decoders
|
||||||
U64 = 2,
|
U64 = 2,
|
||||||
FP32FtzRn = 3,
|
FP32FtzRn = 3,
|
||||||
FP16x2FtzRn = 4,
|
FP16x2FtzRn = 4,
|
||||||
S64 = 5
|
S64 = 5,
|
||||||
|
SD32 = 6,
|
||||||
|
SD64 = 7
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -277,6 +277,249 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
||||||
context.Add(operation);
|
context.Add(operation);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void Sured(EmitterContext context)
|
||||||
|
{
|
||||||
|
OpCodeSured op = (OpCodeSured)context.CurrOp;
|
||||||
|
|
||||||
|
SamplerType type = ConvertSamplerType(op.Dimensions);
|
||||||
|
|
||||||
|
if (type == SamplerType.None)
|
||||||
|
{
|
||||||
|
context.Config.GpuAccessor.Log("Invalid image reduction sampler type.");
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int raIndex = op.Ra.Index;
|
||||||
|
int rbIndex = op.Rb.Index;
|
||||||
|
|
||||||
|
Operand Ra()
|
||||||
|
{
|
||||||
|
if (raIndex > RegisterConsts.RegisterZeroIndex)
|
||||||
|
{
|
||||||
|
return Const(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return context.Copy(Register(raIndex++, RegisterType.Gpr));
|
||||||
|
}
|
||||||
|
|
||||||
|
Operand Rb()
|
||||||
|
{
|
||||||
|
if (rbIndex > RegisterConsts.RegisterZeroIndex)
|
||||||
|
{
|
||||||
|
return Const(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return context.Copy(Register(rbIndex++, RegisterType.Gpr));
|
||||||
|
}
|
||||||
|
|
||||||
|
List<Operand> sourcesList = new List<Operand>();
|
||||||
|
|
||||||
|
if (op.IsBindless)
|
||||||
|
{
|
||||||
|
sourcesList.Add(context.Copy(Register(op.Rc)));
|
||||||
|
}
|
||||||
|
|
||||||
|
int coordsCount = type.GetDimensions();
|
||||||
|
|
||||||
|
for (int index = 0; index < coordsCount; index++)
|
||||||
|
{
|
||||||
|
sourcesList.Add(Ra());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
|
||||||
|
{
|
||||||
|
sourcesList.Add(Const(0));
|
||||||
|
|
||||||
|
type &= ~SamplerType.Mask;
|
||||||
|
type |= SamplerType.Texture2D;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type.HasFlag(SamplerType.Array))
|
||||||
|
{
|
||||||
|
sourcesList.Add(Ra());
|
||||||
|
|
||||||
|
type |= SamplerType.Array;
|
||||||
|
}
|
||||||
|
|
||||||
|
TextureFormat format = TextureFormat.R32Sint;
|
||||||
|
|
||||||
|
if (op.UseType)
|
||||||
|
{
|
||||||
|
if (op.ByteAddress)
|
||||||
|
{
|
||||||
|
int xIndex = op.IsBindless ? 1 : 0;
|
||||||
|
|
||||||
|
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(op.Type)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: FP and 64-bit formats.
|
||||||
|
format = (op.Type == ReductionType.SD32 || op.Type == ReductionType.SD64) ?
|
||||||
|
context.Config.GetTextureFormatAtomic(op.HandleOffset) :
|
||||||
|
GetTextureFormat(op.Type);
|
||||||
|
}
|
||||||
|
else if (!op.IsBindless)
|
||||||
|
{
|
||||||
|
format = context.Config.GetTextureFormatAtomic(op.HandleOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
sourcesList.Add(Rb());
|
||||||
|
|
||||||
|
Operand[] sources = sourcesList.ToArray();
|
||||||
|
|
||||||
|
int handle = op.HandleOffset;
|
||||||
|
|
||||||
|
TextureFlags flags = GetAtomicOpFlags(op.AtomicOp);
|
||||||
|
|
||||||
|
if (op.IsBindless)
|
||||||
|
{
|
||||||
|
handle = 0;
|
||||||
|
flags |= TextureFlags.Bindless;
|
||||||
|
}
|
||||||
|
|
||||||
|
TextureOperation operation = context.CreateTextureOperation(
|
||||||
|
Instruction.ImageAtomic,
|
||||||
|
type,
|
||||||
|
format,
|
||||||
|
flags,
|
||||||
|
handle,
|
||||||
|
0,
|
||||||
|
null,
|
||||||
|
sources);
|
||||||
|
|
||||||
|
context.Add(operation);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void Suatom(EmitterContext context)
|
||||||
|
{
|
||||||
|
OpCodeSuatom op = (OpCodeSuatom)context.CurrOp;
|
||||||
|
|
||||||
|
SamplerType type = ConvertSamplerType(op.Dimensions);
|
||||||
|
|
||||||
|
if (type == SamplerType.None)
|
||||||
|
{
|
||||||
|
context.Config.GpuAccessor.Log("Invalid image atomic sampler type.");
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int raIndex = op.Ra.Index;
|
||||||
|
int rbIndex = op.Rb.Index;
|
||||||
|
|
||||||
|
Operand Ra()
|
||||||
|
{
|
||||||
|
if (raIndex > RegisterConsts.RegisterZeroIndex)
|
||||||
|
{
|
||||||
|
return Const(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return context.Copy(Register(raIndex++, RegisterType.Gpr));
|
||||||
|
}
|
||||||
|
|
||||||
|
Operand Rb()
|
||||||
|
{
|
||||||
|
if (rbIndex > RegisterConsts.RegisterZeroIndex)
|
||||||
|
{
|
||||||
|
return Const(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return context.Copy(Register(rbIndex++, RegisterType.Gpr));
|
||||||
|
}
|
||||||
|
|
||||||
|
int rdIndex = op.Rd.Index;
|
||||||
|
|
||||||
|
Operand GetDest()
|
||||||
|
{
|
||||||
|
if (rdIndex > RegisterConsts.RegisterZeroIndex)
|
||||||
|
{
|
||||||
|
return Const(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Register(rdIndex++, RegisterType.Gpr);
|
||||||
|
}
|
||||||
|
|
||||||
|
List<Operand> sourcesList = new List<Operand>();
|
||||||
|
|
||||||
|
if (op.IsBindless)
|
||||||
|
{
|
||||||
|
sourcesList.Add(context.Copy(Register(op.Rc)));
|
||||||
|
}
|
||||||
|
|
||||||
|
int coordsCount = type.GetDimensions();
|
||||||
|
|
||||||
|
for (int index = 0; index < coordsCount; index++)
|
||||||
|
{
|
||||||
|
sourcesList.Add(Ra());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
|
||||||
|
{
|
||||||
|
sourcesList.Add(Const(0));
|
||||||
|
|
||||||
|
type &= ~SamplerType.Mask;
|
||||||
|
type |= SamplerType.Texture2D;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type.HasFlag(SamplerType.Array))
|
||||||
|
{
|
||||||
|
sourcesList.Add(Ra());
|
||||||
|
|
||||||
|
type |= SamplerType.Array;
|
||||||
|
}
|
||||||
|
|
||||||
|
TextureFormat format = TextureFormat.R32Sint;
|
||||||
|
|
||||||
|
if (op.UseType)
|
||||||
|
{
|
||||||
|
if (op.ByteAddress)
|
||||||
|
{
|
||||||
|
int xIndex = op.IsBindless ? 1 : 0;
|
||||||
|
|
||||||
|
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(op.Type)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: FP and 64-bit formats.
|
||||||
|
format = (op.Type == ReductionType.SD32 || op.Type == ReductionType.SD64) ?
|
||||||
|
context.Config.GetTextureFormatAtomic(op.HandleOffset) :
|
||||||
|
GetTextureFormat(op.Type);
|
||||||
|
}
|
||||||
|
else if (!op.IsBindless)
|
||||||
|
{
|
||||||
|
format = context.Config.GetTextureFormatAtomic(op.HandleOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (op.CompareAndSwap)
|
||||||
|
{
|
||||||
|
sourcesList.Add(Rb());
|
||||||
|
}
|
||||||
|
|
||||||
|
sourcesList.Add(Rb());
|
||||||
|
|
||||||
|
Operand[] sources = sourcesList.ToArray();
|
||||||
|
|
||||||
|
int handle = op.HandleOffset;
|
||||||
|
|
||||||
|
TextureFlags flags = op.CompareAndSwap ? TextureFlags.CAS : GetAtomicOpFlags(op.AtomicOp);
|
||||||
|
|
||||||
|
if (op.IsBindless)
|
||||||
|
{
|
||||||
|
handle = 0;
|
||||||
|
flags |= TextureFlags.Bindless;
|
||||||
|
}
|
||||||
|
|
||||||
|
TextureOperation operation = context.CreateTextureOperation(
|
||||||
|
Instruction.ImageAtomic,
|
||||||
|
type,
|
||||||
|
format,
|
||||||
|
flags,
|
||||||
|
handle,
|
||||||
|
0,
|
||||||
|
GetDest(),
|
||||||
|
sources);
|
||||||
|
|
||||||
|
context.Add(operation);
|
||||||
|
}
|
||||||
|
|
||||||
public static void Tex(EmitterContext context)
|
public static void Tex(EmitterContext context)
|
||||||
{
|
{
|
||||||
EmitTextureSample(context, TextureFlags.None);
|
EmitTextureSample(context, TextureFlags.None);
|
||||||
|
@ -1332,6 +1575,55 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static int GetComponentSizeInBytesLog2(ReductionType type)
|
||||||
|
{
|
||||||
|
return type switch
|
||||||
|
{
|
||||||
|
ReductionType.U32 => 2,
|
||||||
|
ReductionType.S32 => 2,
|
||||||
|
ReductionType.U64 => 3,
|
||||||
|
ReductionType.FP32FtzRn => 2,
|
||||||
|
ReductionType.FP16x2FtzRn => 2,
|
||||||
|
ReductionType.S64 => 3,
|
||||||
|
ReductionType.SD32 => 2,
|
||||||
|
ReductionType.SD64 => 3,
|
||||||
|
_ => 2
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static TextureFormat GetTextureFormat(ReductionType type)
|
||||||
|
{
|
||||||
|
return type switch
|
||||||
|
{
|
||||||
|
ReductionType.U32 => TextureFormat.R32Uint,
|
||||||
|
ReductionType.S32 => TextureFormat.R32Sint,
|
||||||
|
ReductionType.U64 => TextureFormat.R32G32Uint,
|
||||||
|
ReductionType.FP32FtzRn => TextureFormat.R32Float,
|
||||||
|
ReductionType.FP16x2FtzRn => TextureFormat.R16G16Float,
|
||||||
|
ReductionType.S64 => TextureFormat.R32G32Uint,
|
||||||
|
ReductionType.SD32 => TextureFormat.R32Uint,
|
||||||
|
ReductionType.SD64 => TextureFormat.R32G32Uint,
|
||||||
|
_ => TextureFormat.R32Uint
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private static TextureFlags GetAtomicOpFlags(AtomicOp op)
|
||||||
|
{
|
||||||
|
return op switch
|
||||||
|
{
|
||||||
|
AtomicOp.Add => TextureFlags.Add,
|
||||||
|
AtomicOp.Minimum => TextureFlags.Minimum,
|
||||||
|
AtomicOp.Maximum => TextureFlags.Maximum,
|
||||||
|
AtomicOp.Increment => TextureFlags.Increment,
|
||||||
|
AtomicOp.Decrement => TextureFlags.Decrement,
|
||||||
|
AtomicOp.BitwiseAnd => TextureFlags.BitwiseAnd,
|
||||||
|
AtomicOp.BitwiseOr => TextureFlags.BitwiseOr,
|
||||||
|
AtomicOp.BitwiseExclusiveOr => TextureFlags.BitwiseXor,
|
||||||
|
AtomicOp.Swap => TextureFlags.Swap,
|
||||||
|
_ => TextureFlags.Add
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
private static SamplerType ConvertSamplerType(ImageDimensions target)
|
private static SamplerType ConvertSamplerType(ImageDimensions target)
|
||||||
{
|
{
|
||||||
return target switch
|
return target switch
|
||||||
|
|
|
@ -69,6 +69,7 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
|
||||||
GroupMemoryBarrier,
|
GroupMemoryBarrier,
|
||||||
ImageLoad,
|
ImageLoad,
|
||||||
ImageStore,
|
ImageStore,
|
||||||
|
ImageAtomic,
|
||||||
IsNan,
|
IsNan,
|
||||||
LoadAttribute,
|
LoadAttribute,
|
||||||
LoadConstant,
|
LoadConstant,
|
||||||
|
|
|
@ -13,6 +13,19 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
|
||||||
LodBias = 1 << 4,
|
LodBias = 1 << 4,
|
||||||
LodLevel = 1 << 5,
|
LodLevel = 1 << 5,
|
||||||
Offset = 1 << 6,
|
Offset = 1 << 6,
|
||||||
Offsets = 1 << 7
|
Offsets = 1 << 7,
|
||||||
|
|
||||||
|
AtomicMask = 15 << 16,
|
||||||
|
|
||||||
|
Add = 0 << 16,
|
||||||
|
Minimum = 1 << 16,
|
||||||
|
Maximum = 2 << 16,
|
||||||
|
Increment = 3 << 16,
|
||||||
|
Decrement = 4 << 16,
|
||||||
|
BitwiseAnd = 5 << 16,
|
||||||
|
BitwiseOr = 6 << 16,
|
||||||
|
BitwiseXor = 7 << 16,
|
||||||
|
Swap = 8 << 16,
|
||||||
|
CAS = 9 << 16
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -81,6 +81,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
|
||||||
Add(Instruction.FusedMultiplyAdd, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
|
Add(Instruction.FusedMultiplyAdd, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
|
||||||
Add(Instruction.ImageLoad, VariableType.F32);
|
Add(Instruction.ImageLoad, VariableType.F32);
|
||||||
Add(Instruction.ImageStore, VariableType.None);
|
Add(Instruction.ImageStore, VariableType.None);
|
||||||
|
Add(Instruction.ImageAtomic, VariableType.S32);
|
||||||
Add(Instruction.IsNan, VariableType.Bool, VariableType.F32);
|
Add(Instruction.IsNan, VariableType.Bool, VariableType.F32);
|
||||||
Add(Instruction.LoadAttribute, VariableType.F32, VariableType.S32, VariableType.S32, VariableType.S32);
|
Add(Instruction.LoadAttribute, VariableType.F32, VariableType.S32, VariableType.S32, VariableType.S32);
|
||||||
Add(Instruction.LoadConstant, VariableType.F32, VariableType.S32, VariableType.S32);
|
Add(Instruction.LoadConstant, VariableType.F32, VariableType.S32, VariableType.S32);
|
||||||
|
@ -148,6 +149,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
|
||||||
// that can improve the decompiler output.
|
// that can improve the decompiler output.
|
||||||
if (inst == Instruction.ImageLoad ||
|
if (inst == Instruction.ImageLoad ||
|
||||||
inst == Instruction.ImageStore ||
|
inst == Instruction.ImageStore ||
|
||||||
|
inst == Instruction.ImageAtomic ||
|
||||||
inst == Instruction.Lod ||
|
inst == Instruction.Lod ||
|
||||||
inst == Instruction.TextureSample)
|
inst == Instruction.TextureSample)
|
||||||
{
|
{
|
||||||
|
|
|
@ -61,7 +61,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||||
src0.GetCbufOffset() | ((src1.GetCbufOffset() + 1) << 16),
|
src0.GetCbufOffset() | ((src1.GetCbufOffset() + 1) << 16),
|
||||||
src0.GetCbufSlot() | ((src1.GetCbufSlot() + 1) << 16));
|
src0.GetCbufSlot() | ((src1.GetCbufSlot() + 1) << 16));
|
||||||
}
|
}
|
||||||
else if (texOp.Inst == Instruction.ImageLoad || texOp.Inst == Instruction.ImageStore)
|
else if (texOp.Inst == Instruction.ImageLoad ||
|
||||||
|
texOp.Inst == Instruction.ImageStore ||
|
||||||
|
texOp.Inst == Instruction.ImageAtomic)
|
||||||
{
|
{
|
||||||
Operand src0 = Utils.FindLastOperation(texOp.GetSource(0), block);
|
Operand src0 = Utils.FindLastOperation(texOp.GetSource(0), block);
|
||||||
|
|
||||||
|
@ -69,7 +71,16 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||||
{
|
{
|
||||||
int cbufOffset = src0.GetCbufOffset();
|
int cbufOffset = src0.GetCbufOffset();
|
||||||
int cbufSlot = src0.GetCbufSlot();
|
int cbufSlot = src0.GetCbufSlot();
|
||||||
|
|
||||||
|
if (texOp.Inst == Instruction.ImageAtomic)
|
||||||
|
{
|
||||||
|
texOp.Format = config.GetTextureFormatAtomic(cbufOffset, cbufSlot);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
texOp.Format = config.GetTextureFormat(cbufOffset, cbufSlot);
|
texOp.Format = config.GetTextureFormat(cbufOffset, cbufSlot);
|
||||||
|
}
|
||||||
|
|
||||||
SetHandle(config, texOp, cbufOffset, cbufSlot);
|
SetHandle(config, texOp, cbufOffset, cbufSlot);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -278,6 +278,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||||
case Instruction.AtomicSwap:
|
case Instruction.AtomicSwap:
|
||||||
case Instruction.AtomicXor:
|
case Instruction.AtomicXor:
|
||||||
case Instruction.Call:
|
case Instruction.Call:
|
||||||
|
case Instruction.ImageAtomic:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -162,6 +162,28 @@ namespace Ryujinx.Graphics.Shader.Translation
|
||||||
return format;
|
return format;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private bool FormatSupportsAtomic(TextureFormat format)
|
||||||
|
{
|
||||||
|
return format == TextureFormat.R32Sint || format == TextureFormat.R32Uint;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TextureFormat GetTextureFormatAtomic(int handle, int cbufSlot = -1)
|
||||||
|
{
|
||||||
|
// Atomic image instructions do not support GL_EXT_shader_image_load_formatted,
|
||||||
|
// and must have a type specified. Default to R32Sint if not available.
|
||||||
|
|
||||||
|
var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot);
|
||||||
|
|
||||||
|
if (!FormatSupportsAtomic(format))
|
||||||
|
{
|
||||||
|
GpuAccessor.Log($"Unsupported format for texture {handle}: {format}.");
|
||||||
|
|
||||||
|
format = TextureFormat.R32Sint;
|
||||||
|
}
|
||||||
|
|
||||||
|
return format;
|
||||||
|
}
|
||||||
|
|
||||||
public void SizeAdd(int size)
|
public void SizeAdd(int size)
|
||||||
{
|
{
|
||||||
Size += size;
|
Size += size;
|
||||||
|
@ -270,8 +292,8 @@ namespace Ryujinx.Graphics.Shader.Translation
|
||||||
int handle)
|
int handle)
|
||||||
{
|
{
|
||||||
inst &= Instruction.Mask;
|
inst &= Instruction.Mask;
|
||||||
bool isImage = inst == Instruction.ImageLoad || inst == Instruction.ImageStore;
|
bool isImage = inst == Instruction.ImageLoad || inst == Instruction.ImageStore || inst == Instruction.ImageAtomic;
|
||||||
bool isWrite = inst == Instruction.ImageStore;
|
bool isWrite = inst == Instruction.ImageStore || inst == Instruction.ImageAtomic;
|
||||||
bool accurateType = inst != Instruction.TextureSize && inst != Instruction.Lod;
|
bool accurateType = inst != Instruction.TextureSize && inst != Instruction.Lod;
|
||||||
|
|
||||||
if (isImage)
|
if (isImage)
|
||||||
|
|
Loading…
Reference in a new issue