diff --git a/src/ARMeilleure/Decoders/OpCodeTable.cs b/src/ARMeilleure/Decoders/OpCodeTable.cs index 859535670..20d567fe5 100644 --- a/src/ARMeilleure/Decoders/OpCodeTable.cs +++ b/src/ARMeilleure/Decoders/OpCodeTable.cs @@ -746,6 +746,7 @@ namespace ARMeilleure.Decoders SetA32("<<<<01101000xxxxxxxxxxxxxx01xxxx", InstName.Pkh, InstEmit32.Pkh, OpCode32AluRsImm.Create); SetA32("11110101xx01xxxx1111xxxxxxxxxxxx", InstName.Pld, InstEmit32.Nop, OpCode32.Create); SetA32("11110111xx01xxxx1111xxxxxxx0xxxx", InstName.Pld, InstEmit32.Nop, OpCode32.Create); + SetA32("<<<<01100010xxxxxxxx11110001xxxx", InstName.Qadd16, InstEmit32.Qadd16, OpCode32AluReg.Create); SetA32("<<<<011011111111xxxx11110011xxxx", InstName.Rbit, InstEmit32.Rbit, OpCode32AluReg.Create); SetA32("<<<<011010111111xxxx11110011xxxx", InstName.Rev, InstEmit32.Rev, OpCode32AluReg.Create); SetA32("<<<<011010111111xxxx11111011xxxx", InstName.Rev16, InstEmit32.Rev16, OpCode32AluReg.Create); @@ -1034,6 +1035,7 @@ namespace ARMeilleure.Decoders SetAsimd("111100101x>>>xxxxxxx0101>xx1xxxx", InstName.Vshl, InstEmit32.Vshl, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); SetAsimd("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl, InstEmit32.Vshl_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); SetAsimd("1111001x1x>>>xxxxxxx101000x1xxxx", InstName.Vshll, InstEmit32.Vshll, OpCode32SimdShImmLong.Create, OpCode32SimdShImmLong.CreateT32); // A1 encoding. + SetAsimd("111100111x11<<10xxxx001100x0xxxx", InstName.Vshll, InstEmit32.Vshll2, OpCode32SimdMovn.Create, OpCode32SimdMovn.CreateT32); // A2 encoding. SetAsimd("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); SetAsimd("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); SetAsimd("111100111x>>>xxxxxxx0101>xx1xxxx", InstName.Vsli, InstEmit32.Vsli_I, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); diff --git a/src/ARMeilleure/Instructions/InstEmitAlu32.cs b/src/ARMeilleure/Instructions/InstEmitAlu32.cs index 9f419ba99..8eabe093e 100644 --- a/src/ARMeilleure/Instructions/InstEmitAlu32.cs +++ b/src/ARMeilleure/Instructions/InstEmitAlu32.cs @@ -292,6 +292,16 @@ namespace ARMeilleure.Instructions EmitAluStore(context, res); } + public static void Qadd16(ArmEmitterContext context) + { + OpCode32AluReg op = (OpCode32AluReg)context.CurrOp; + + SetIntA32(context, op.Rd, EmitSigned16BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) => + { + EmitSaturateRange(context, d, context.Add(n, m), 16, unsigned: false, setQ: false); + })); + } + public static void Rbit(ArmEmitterContext context) { Operand m = GetAluM(context); @@ -976,6 +986,94 @@ namespace ARMeilleure.Instructions } } + private static void EmitSaturateRange(ArmEmitterContext context, Operand result, Operand value, uint saturateTo, bool unsigned, bool setQ = true) + { + Debug.Assert(saturateTo <= 32); + Debug.Assert(!unsigned || saturateTo < 32); + + if (!unsigned && saturateTo == 32) + { + // No saturation possible for this case. + + context.Copy(result, value); + + return; + } + else if (saturateTo == 0) + { + // Result is always zero if we saturate 0 bits. + + context.Copy(result, Const(0)); + + return; + } + + Operand satValue; + + if (unsigned) + { + // Negative values always saturate (to zero). + // So we must always ignore the sign bit when masking, so that the truncated value will differ from the original one. + + satValue = context.BitwiseAnd(value, Const((int)(uint.MaxValue >> (32 - (int)saturateTo)))); + } + else + { + satValue = context.ShiftLeft(value, Const(32 - (int)saturateTo)); + satValue = context.ShiftRightSI(satValue, Const(32 - (int)saturateTo)); + } + + // If the result is 0, the values are equal and we don't need saturation. + Operand lblNoSat = Label(); + context.BranchIfFalse(lblNoSat, context.Subtract(value, satValue)); + + // Saturate and set Q flag. + if (unsigned) + { + if (saturateTo == 31) + { + // Only saturation case possible when going from 32 bits signed to 32 or 31 bits unsigned + // is when the signed input is negative, as all positive values are representable on a 31 bits range. + + satValue = Const(0); + } + else + { + satValue = context.ShiftRightSI(value, Const(31)); + satValue = context.BitwiseNot(satValue); + satValue = context.ShiftRightUI(satValue, Const(32 - (int)saturateTo)); + } + } + else + { + if (saturateTo == 1) + { + satValue = context.ShiftRightSI(value, Const(31)); + } + else + { + satValue = Const(uint.MaxValue >> (33 - (int)saturateTo)); + satValue = context.BitwiseExclusiveOr(satValue, context.ShiftRightSI(value, Const(31))); + } + } + + if (setQ) + { + SetFlag(context, PState.QFlag, Const(1)); + } + + context.Copy(result, satValue); + + Operand lblExit = Label(); + context.Branch(lblExit); + + context.MarkLabel(lblNoSat); + + context.Copy(result, value); + + context.MarkLabel(lblExit); + } + private static void EmitSaturateUqadd(ArmEmitterContext context, Operand result, Operand value, uint saturateTo) { Debug.Assert(saturateTo <= 32); @@ -1053,6 +1151,21 @@ namespace ARMeilleure.Instructions context.MarkLabel(lblExit); } + private static Operand EmitSigned16BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action elementAction) + { + Operand tempD = context.AllocateLocal(OperandType.I32); + + Operand tempN = context.SignExtend16(OperandType.I32, rn); + Operand tempM = context.SignExtend16(OperandType.I32, rm); + elementAction(tempD, tempN, tempM); + Operand tempD2 = context.ZeroExtend16(OperandType.I32, tempD); + + tempN = context.ShiftRightSI(rn, Const(16)); + tempM = context.ShiftRightSI(rm, Const(16)); + elementAction(tempD, tempN, tempM); + return context.BitwiseOr(tempD2, context.ShiftLeft(tempD, Const(16))); + } + private static Operand EmitUnsigned16BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action elementAction) { Operand tempD = context.AllocateLocal(OperandType.I32); diff --git a/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs b/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs index e9e3b52b9..eb28a0c5a 100644 --- a/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs +++ b/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs @@ -106,6 +106,38 @@ namespace ARMeilleure.Instructions context.Copy(GetVecA32(op.Qd), res); } + public static void Vshll2(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U); + + if (op.Size == 2) + { + if (op.U) + { + me = context.ZeroExtend32(OperandType.I64, me); + } + else + { + me = context.SignExtend32(OperandType.I64, me); + } + } + + me = context.ShiftLeft(me, Const(8 << op.Size)); + + res = EmitVectorInsert(context, res, me, index, op.Size + 1); + } + + context.Copy(GetVecA32(op.Qd), res); + } + public static void Vshr(ArmEmitterContext context) { OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; diff --git a/src/ARMeilleure/Instructions/InstName.cs b/src/ARMeilleure/Instructions/InstName.cs index ac85412d1..74c33155b 100644 --- a/src/ARMeilleure/Instructions/InstName.cs +++ b/src/ARMeilleure/Instructions/InstName.cs @@ -527,6 +527,7 @@ namespace ARMeilleure.Instructions Pld, Pop, Push, + Qadd16, Rev, Revsh, Rsb, diff --git a/src/ARMeilleure/Translation/DelegateInfo.cs b/src/ARMeilleure/Translation/DelegateInfo.cs index 27479a003..706625437 100644 --- a/src/ARMeilleure/Translation/DelegateInfo.cs +++ b/src/ARMeilleure/Translation/DelegateInfo.cs @@ -1,5 +1,4 @@ using System; -using System.Runtime.InteropServices; namespace ARMeilleure.Translation { @@ -11,11 +10,10 @@ namespace ARMeilleure.Translation public IntPtr FuncPtr { get; } - public DelegateInfo(Delegate dlg) + public DelegateInfo(Delegate dlg, IntPtr funcPtr) { _dlg = dlg; - - FuncPtr = Marshal.GetFunctionPointerForDelegate(dlg); + FuncPtr = funcPtr; } } } diff --git a/src/ARMeilleure/Translation/Delegates.cs b/src/ARMeilleure/Translation/Delegates.cs index 63db789df..66412b8e6 100644 --- a/src/ARMeilleure/Translation/Delegates.cs +++ b/src/ARMeilleure/Translation/Delegates.cs @@ -3,6 +3,7 @@ using ARMeilleure.State; using System; using System.Collections.Generic; using System.Reflection; +using System.Runtime.InteropServices; namespace ARMeilleure.Translation { @@ -64,11 +65,11 @@ namespace ARMeilleure.Translation return index; } - private static void SetDelegateInfo(Delegate dlg) + private static void SetDelegateInfo(Delegate dlg, IntPtr funcPtr) { string key = GetKey(dlg.Method); - _delegates.Add(key, new DelegateInfo(dlg)); // ArgumentException (key). + _delegates.Add(key, new DelegateInfo(dlg, funcPtr)); // ArgumentException (key). } private static string GetKey(MethodInfo info) @@ -82,179 +83,353 @@ namespace ARMeilleure.Translation { _delegates = new SortedList(); - SetDelegateInfo(new MathAbs(Math.Abs)); - SetDelegateInfo(new MathCeiling(Math.Ceiling)); - SetDelegateInfo(new MathFloor(Math.Floor)); - SetDelegateInfo(new MathRound(Math.Round)); - SetDelegateInfo(new MathTruncate(Math.Truncate)); + var dlgMathAbs = new MathAbs(Math.Abs); + var dlgMathCeiling = new MathCeiling(Math.Ceiling); + var dlgMathFloor = new MathFloor(Math.Floor); + var dlgMathRound = new MathRound(Math.Round); + var dlgMathTruncate = new MathTruncate(Math.Truncate); - SetDelegateInfo(new MathFAbs(MathF.Abs)); - SetDelegateInfo(new MathFCeiling(MathF.Ceiling)); - SetDelegateInfo(new MathFFloor(MathF.Floor)); - SetDelegateInfo(new MathFRound(MathF.Round)); - SetDelegateInfo(new MathFTruncate(MathF.Truncate)); + var dlgMathFAbs = new MathFAbs(MathF.Abs); + var dlgMathFCeiling = new MathFCeiling(MathF.Ceiling); + var dlgMathFFloor = new MathFFloor(MathF.Floor); + var dlgMathFRound = new MathFRound(MathF.Round); + var dlgMathFTruncate = new MathFTruncate(MathF.Truncate); - SetDelegateInfo(new NativeInterfaceBreak(NativeInterface.Break)); - SetDelegateInfo(new NativeInterfaceCheckSynchronization(NativeInterface.CheckSynchronization)); - SetDelegateInfo(new NativeInterfaceEnqueueForRejit(NativeInterface.EnqueueForRejit)); - SetDelegateInfo(new NativeInterfaceGetCntfrqEl0(NativeInterface.GetCntfrqEl0)); - SetDelegateInfo(new NativeInterfaceGetCntpctEl0(NativeInterface.GetCntpctEl0)); - SetDelegateInfo(new NativeInterfaceGetCntvctEl0(NativeInterface.GetCntvctEl0)); - SetDelegateInfo(new NativeInterfaceGetCtrEl0(NativeInterface.GetCtrEl0)); - SetDelegateInfo(new NativeInterfaceGetDczidEl0(NativeInterface.GetDczidEl0)); - SetDelegateInfo(new NativeInterfaceGetFunctionAddress(NativeInterface.GetFunctionAddress)); - SetDelegateInfo(new NativeInterfaceInvalidateCacheLine(NativeInterface.InvalidateCacheLine)); - SetDelegateInfo(new NativeInterfaceReadByte(NativeInterface.ReadByte)); - SetDelegateInfo(new NativeInterfaceReadUInt16(NativeInterface.ReadUInt16)); - SetDelegateInfo(new NativeInterfaceReadUInt32(NativeInterface.ReadUInt32)); - SetDelegateInfo(new NativeInterfaceReadUInt64(NativeInterface.ReadUInt64)); - SetDelegateInfo(new NativeInterfaceReadVector128(NativeInterface.ReadVector128)); - SetDelegateInfo(new NativeInterfaceSignalMemoryTracking(NativeInterface.SignalMemoryTracking)); - SetDelegateInfo(new NativeInterfaceSupervisorCall(NativeInterface.SupervisorCall)); - SetDelegateInfo(new NativeInterfaceThrowInvalidMemoryAccess(NativeInterface.ThrowInvalidMemoryAccess)); - SetDelegateInfo(new NativeInterfaceUndefined(NativeInterface.Undefined)); - SetDelegateInfo(new NativeInterfaceWriteByte(NativeInterface.WriteByte)); - SetDelegateInfo(new NativeInterfaceWriteUInt16(NativeInterface.WriteUInt16)); - SetDelegateInfo(new NativeInterfaceWriteUInt32(NativeInterface.WriteUInt32)); - SetDelegateInfo(new NativeInterfaceWriteUInt64(NativeInterface.WriteUInt64)); - SetDelegateInfo(new NativeInterfaceWriteVector128(NativeInterface.WriteVector128)); + var dlgNativeInterfaceBreak = new NativeInterfaceBreak(NativeInterface.Break); + var dlgNativeInterfaceCheckSynchronization = new NativeInterfaceCheckSynchronization(NativeInterface.CheckSynchronization); + var dlgNativeInterfaceEnqueueForRejit = new NativeInterfaceEnqueueForRejit(NativeInterface.EnqueueForRejit); + var dlgNativeInterfaceGetCntfrqEl0 = new NativeInterfaceGetCntfrqEl0(NativeInterface.GetCntfrqEl0); + var dlgNativeInterfaceGetCntpctEl0 = new NativeInterfaceGetCntpctEl0(NativeInterface.GetCntpctEl0); + var dlgNativeInterfaceGetCntvctEl0 = new NativeInterfaceGetCntvctEl0(NativeInterface.GetCntvctEl0); + var dlgNativeInterfaceGetCtrEl0 = new NativeInterfaceGetCtrEl0(NativeInterface.GetCtrEl0); + var dlgNativeInterfaceGetDczidEl0 = new NativeInterfaceGetDczidEl0(NativeInterface.GetDczidEl0); + var dlgNativeInterfaceGetFunctionAddress = new NativeInterfaceGetFunctionAddress(NativeInterface.GetFunctionAddress); + var dlgNativeInterfaceInvalidateCacheLine = new NativeInterfaceInvalidateCacheLine(NativeInterface.InvalidateCacheLine); + var dlgNativeInterfaceReadByte = new NativeInterfaceReadByte(NativeInterface.ReadByte); + var dlgNativeInterfaceReadUInt16 = new NativeInterfaceReadUInt16(NativeInterface.ReadUInt16); + var dlgNativeInterfaceReadUInt32 = new NativeInterfaceReadUInt32(NativeInterface.ReadUInt32); + var dlgNativeInterfaceReadUInt64 = new NativeInterfaceReadUInt64(NativeInterface.ReadUInt64); + var dlgNativeInterfaceReadVector128 = new NativeInterfaceReadVector128(NativeInterface.ReadVector128); + var dlgNativeInterfaceSignalMemoryTracking = new NativeInterfaceSignalMemoryTracking(NativeInterface.SignalMemoryTracking); + var dlgNativeInterfaceSupervisorCall = new NativeInterfaceSupervisorCall(NativeInterface.SupervisorCall); + var dlgNativeInterfaceThrowInvalidMemoryAccess = new NativeInterfaceThrowInvalidMemoryAccess(NativeInterface.ThrowInvalidMemoryAccess); + var dlgNativeInterfaceUndefined = new NativeInterfaceUndefined(NativeInterface.Undefined); + var dlgNativeInterfaceWriteByte = new NativeInterfaceWriteByte(NativeInterface.WriteByte); + var dlgNativeInterfaceWriteUInt16 = new NativeInterfaceWriteUInt16(NativeInterface.WriteUInt16); + var dlgNativeInterfaceWriteUInt32 = new NativeInterfaceWriteUInt32(NativeInterface.WriteUInt32); + var dlgNativeInterfaceWriteUInt64 = new NativeInterfaceWriteUInt64(NativeInterface.WriteUInt64); + var dlgNativeInterfaceWriteVector128 = new NativeInterfaceWriteVector128(NativeInterface.WriteVector128); - SetDelegateInfo(new SoftFallbackCountLeadingSigns(SoftFallback.CountLeadingSigns)); - SetDelegateInfo(new SoftFallbackCountLeadingZeros(SoftFallback.CountLeadingZeros)); - SetDelegateInfo(new SoftFallbackCrc32b(SoftFallback.Crc32b)); - SetDelegateInfo(new SoftFallbackCrc32cb(SoftFallback.Crc32cb)); - SetDelegateInfo(new SoftFallbackCrc32ch(SoftFallback.Crc32ch)); - SetDelegateInfo(new SoftFallbackCrc32cw(SoftFallback.Crc32cw)); - SetDelegateInfo(new SoftFallbackCrc32cx(SoftFallback.Crc32cx)); - SetDelegateInfo(new SoftFallbackCrc32h(SoftFallback.Crc32h)); - SetDelegateInfo(new SoftFallbackCrc32w(SoftFallback.Crc32w)); - SetDelegateInfo(new SoftFallbackCrc32x(SoftFallback.Crc32x)); - SetDelegateInfo(new SoftFallbackDecrypt(SoftFallback.Decrypt)); - SetDelegateInfo(new SoftFallbackEncrypt(SoftFallback.Encrypt)); - SetDelegateInfo(new SoftFallbackFixedRotate(SoftFallback.FixedRotate)); - SetDelegateInfo(new SoftFallbackHashChoose(SoftFallback.HashChoose)); - SetDelegateInfo(new SoftFallbackHashLower(SoftFallback.HashLower)); - SetDelegateInfo(new SoftFallbackHashMajority(SoftFallback.HashMajority)); - SetDelegateInfo(new SoftFallbackHashParity(SoftFallback.HashParity)); - SetDelegateInfo(new SoftFallbackHashUpper(SoftFallback.HashUpper)); - SetDelegateInfo(new SoftFallbackInverseMixColumns(SoftFallback.InverseMixColumns)); - SetDelegateInfo(new SoftFallbackMixColumns(SoftFallback.MixColumns)); - SetDelegateInfo(new SoftFallbackPolynomialMult64_128(SoftFallback.PolynomialMult64_128)); - SetDelegateInfo(new SoftFallbackSatF32ToS32(SoftFallback.SatF32ToS32)); - SetDelegateInfo(new SoftFallbackSatF32ToS64(SoftFallback.SatF32ToS64)); - SetDelegateInfo(new SoftFallbackSatF32ToU32(SoftFallback.SatF32ToU32)); - SetDelegateInfo(new SoftFallbackSatF32ToU64(SoftFallback.SatF32ToU64)); - SetDelegateInfo(new SoftFallbackSatF64ToS32(SoftFallback.SatF64ToS32)); - SetDelegateInfo(new SoftFallbackSatF64ToS64(SoftFallback.SatF64ToS64)); - SetDelegateInfo(new SoftFallbackSatF64ToU32(SoftFallback.SatF64ToU32)); - SetDelegateInfo(new SoftFallbackSatF64ToU64(SoftFallback.SatF64ToU64)); - SetDelegateInfo(new SoftFallbackSha1SchedulePart1(SoftFallback.Sha1SchedulePart1)); - SetDelegateInfo(new SoftFallbackSha1SchedulePart2(SoftFallback.Sha1SchedulePart2)); - SetDelegateInfo(new SoftFallbackSha256SchedulePart1(SoftFallback.Sha256SchedulePart1)); - SetDelegateInfo(new SoftFallbackSha256SchedulePart2(SoftFallback.Sha256SchedulePart2)); - SetDelegateInfo(new SoftFallbackSignedShrImm64(SoftFallback.SignedShrImm64)); - SetDelegateInfo(new SoftFallbackTbl1(SoftFallback.Tbl1)); - SetDelegateInfo(new SoftFallbackTbl2(SoftFallback.Tbl2)); - SetDelegateInfo(new SoftFallbackTbl3(SoftFallback.Tbl3)); - SetDelegateInfo(new SoftFallbackTbl4(SoftFallback.Tbl4)); - SetDelegateInfo(new SoftFallbackTbx1(SoftFallback.Tbx1)); - SetDelegateInfo(new SoftFallbackTbx2(SoftFallback.Tbx2)); - SetDelegateInfo(new SoftFallbackTbx3(SoftFallback.Tbx3)); - SetDelegateInfo(new SoftFallbackTbx4(SoftFallback.Tbx4)); - SetDelegateInfo(new SoftFallbackUnsignedShrImm64(SoftFallback.UnsignedShrImm64)); + var dlgSoftFallbackCountLeadingSigns = new SoftFallbackCountLeadingSigns(SoftFallback.CountLeadingSigns); + var dlgSoftFallbackCountLeadingZeros = new SoftFallbackCountLeadingZeros(SoftFallback.CountLeadingZeros); + var dlgSoftFallbackCrc32b = new SoftFallbackCrc32b(SoftFallback.Crc32b); + var dlgSoftFallbackCrc32cb = new SoftFallbackCrc32cb(SoftFallback.Crc32cb); + var dlgSoftFallbackCrc32ch = new SoftFallbackCrc32ch(SoftFallback.Crc32ch); + var dlgSoftFallbackCrc32cw = new SoftFallbackCrc32cw(SoftFallback.Crc32cw); + var dlgSoftFallbackCrc32cx = new SoftFallbackCrc32cx(SoftFallback.Crc32cx); + var dlgSoftFallbackCrc32h = new SoftFallbackCrc32h(SoftFallback.Crc32h); + var dlgSoftFallbackCrc32w = new SoftFallbackCrc32w(SoftFallback.Crc32w); + var dlgSoftFallbackCrc32x = new SoftFallbackCrc32x(SoftFallback.Crc32x); + var dlgSoftFallbackDecrypt = new SoftFallbackDecrypt(SoftFallback.Decrypt); + var dlgSoftFallbackEncrypt = new SoftFallbackEncrypt(SoftFallback.Encrypt); + var dlgSoftFallbackFixedRotate = new SoftFallbackFixedRotate(SoftFallback.FixedRotate); + var dlgSoftFallbackHashChoose = new SoftFallbackHashChoose(SoftFallback.HashChoose); + var dlgSoftFallbackHashLower = new SoftFallbackHashLower(SoftFallback.HashLower); + var dlgSoftFallbackHashMajority = new SoftFallbackHashMajority(SoftFallback.HashMajority); + var dlgSoftFallbackHashParity = new SoftFallbackHashParity(SoftFallback.HashParity); + var dlgSoftFallbackHashUpper = new SoftFallbackHashUpper(SoftFallback.HashUpper); + var dlgSoftFallbackInverseMixColumns = new SoftFallbackInverseMixColumns(SoftFallback.InverseMixColumns); + var dlgSoftFallbackMixColumns = new SoftFallbackMixColumns(SoftFallback.MixColumns); + var dlgSoftFallbackPolynomialMult64_128 = new SoftFallbackPolynomialMult64_128(SoftFallback.PolynomialMult64_128); + var dlgSoftFallbackSatF32ToS32 = new SoftFallbackSatF32ToS32(SoftFallback.SatF32ToS32); + var dlgSoftFallbackSatF32ToS64 = new SoftFallbackSatF32ToS64(SoftFallback.SatF32ToS64); + var dlgSoftFallbackSatF32ToU32 = new SoftFallbackSatF32ToU32(SoftFallback.SatF32ToU32); + var dlgSoftFallbackSatF32ToU64 = new SoftFallbackSatF32ToU64(SoftFallback.SatF32ToU64); + var dlgSoftFallbackSatF64ToS32 = new SoftFallbackSatF64ToS32(SoftFallback.SatF64ToS32); + var dlgSoftFallbackSatF64ToS64 = new SoftFallbackSatF64ToS64(SoftFallback.SatF64ToS64); + var dlgSoftFallbackSatF64ToU32 = new SoftFallbackSatF64ToU32(SoftFallback.SatF64ToU32); + var dlgSoftFallbackSatF64ToU64 = new SoftFallbackSatF64ToU64(SoftFallback.SatF64ToU64); + var dlgSoftFallbackSha1SchedulePart1 = new SoftFallbackSha1SchedulePart1(SoftFallback.Sha1SchedulePart1); + var dlgSoftFallbackSha1SchedulePart2 = new SoftFallbackSha1SchedulePart2(SoftFallback.Sha1SchedulePart2); + var dlgSoftFallbackSha256SchedulePart1 = new SoftFallbackSha256SchedulePart1(SoftFallback.Sha256SchedulePart1); + var dlgSoftFallbackSha256SchedulePart2 = new SoftFallbackSha256SchedulePart2(SoftFallback.Sha256SchedulePart2); + var dlgSoftFallbackSignedShrImm64 = new SoftFallbackSignedShrImm64(SoftFallback.SignedShrImm64); + var dlgSoftFallbackTbl1 = new SoftFallbackTbl1(SoftFallback.Tbl1); + var dlgSoftFallbackTbl2 = new SoftFallbackTbl2(SoftFallback.Tbl2); + var dlgSoftFallbackTbl3 = new SoftFallbackTbl3(SoftFallback.Tbl3); + var dlgSoftFallbackTbl4 = new SoftFallbackTbl4(SoftFallback.Tbl4); + var dlgSoftFallbackTbx1 = new SoftFallbackTbx1(SoftFallback.Tbx1); + var dlgSoftFallbackTbx2 = new SoftFallbackTbx2(SoftFallback.Tbx2); + var dlgSoftFallbackTbx3 = new SoftFallbackTbx3(SoftFallback.Tbx3); + var dlgSoftFallbackTbx4 = new SoftFallbackTbx4(SoftFallback.Tbx4); + var dlgSoftFallbackUnsignedShrImm64 = new SoftFallbackUnsignedShrImm64(SoftFallback.UnsignedShrImm64); - SetDelegateInfo(new SoftFloat16_32FPConvert(SoftFloat16_32.FPConvert)); - SetDelegateInfo(new SoftFloat16_64FPConvert(SoftFloat16_64.FPConvert)); + var dlgSoftFloat16_32FPConvert = new SoftFloat16_32FPConvert(SoftFloat16_32.FPConvert); + var dlgSoftFloat16_64FPConvert = new SoftFloat16_64FPConvert(SoftFloat16_64.FPConvert); - SetDelegateInfo(new SoftFloat32FPAdd(SoftFloat32.FPAdd)); - SetDelegateInfo(new SoftFloat32FPAddFpscr(SoftFloat32.FPAddFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat32FPCompare(SoftFloat32.FPCompare)); - SetDelegateInfo(new SoftFloat32FPCompareEQ(SoftFloat32.FPCompareEQ)); - SetDelegateInfo(new SoftFloat32FPCompareEQFpscr(SoftFloat32.FPCompareEQFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat32FPCompareGE(SoftFloat32.FPCompareGE)); - SetDelegateInfo(new SoftFloat32FPCompareGEFpscr(SoftFloat32.FPCompareGEFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat32FPCompareGT(SoftFloat32.FPCompareGT)); - SetDelegateInfo(new SoftFloat32FPCompareGTFpscr(SoftFloat32.FPCompareGTFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat32FPCompareLE(SoftFloat32.FPCompareLE)); - SetDelegateInfo(new SoftFloat32FPCompareLEFpscr(SoftFloat32.FPCompareLEFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat32FPCompareLT(SoftFloat32.FPCompareLT)); - SetDelegateInfo(new SoftFloat32FPCompareLTFpscr(SoftFloat32.FPCompareLTFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat32FPDiv(SoftFloat32.FPDiv)); - SetDelegateInfo(new SoftFloat32FPMax(SoftFloat32.FPMax)); - SetDelegateInfo(new SoftFloat32FPMaxFpscr(SoftFloat32.FPMaxFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat32FPMaxNum(SoftFloat32.FPMaxNum)); - SetDelegateInfo(new SoftFloat32FPMaxNumFpscr(SoftFloat32.FPMaxNumFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat32FPMin(SoftFloat32.FPMin)); - SetDelegateInfo(new SoftFloat32FPMinFpscr(SoftFloat32.FPMinFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat32FPMinNum(SoftFloat32.FPMinNum)); - SetDelegateInfo(new SoftFloat32FPMinNumFpscr(SoftFloat32.FPMinNumFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat32FPMul(SoftFloat32.FPMul)); - SetDelegateInfo(new SoftFloat32FPMulFpscr(SoftFloat32.FPMulFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat32FPMulAdd(SoftFloat32.FPMulAdd)); - SetDelegateInfo(new SoftFloat32FPMulAddFpscr(SoftFloat32.FPMulAddFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat32FPMulSub(SoftFloat32.FPMulSub)); - SetDelegateInfo(new SoftFloat32FPMulSubFpscr(SoftFloat32.FPMulSubFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat32FPMulX(SoftFloat32.FPMulX)); - SetDelegateInfo(new SoftFloat32FPNegMulAdd(SoftFloat32.FPNegMulAdd)); - SetDelegateInfo(new SoftFloat32FPNegMulSub(SoftFloat32.FPNegMulSub)); - SetDelegateInfo(new SoftFloat32FPRecipEstimate(SoftFloat32.FPRecipEstimate)); - SetDelegateInfo(new SoftFloat32FPRecipEstimateFpscr(SoftFloat32.FPRecipEstimateFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat32FPRecipStep(SoftFloat32.FPRecipStep)); // A32 only. - SetDelegateInfo(new SoftFloat32FPRecipStepFused(SoftFloat32.FPRecipStepFused)); - SetDelegateInfo(new SoftFloat32FPRecpX(SoftFloat32.FPRecpX)); - SetDelegateInfo(new SoftFloat32FPRSqrtEstimate(SoftFloat32.FPRSqrtEstimate)); - SetDelegateInfo(new SoftFloat32FPRSqrtEstimateFpscr(SoftFloat32.FPRSqrtEstimateFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat32FPRSqrtStep(SoftFloat32.FPRSqrtStep)); // A32 only. - SetDelegateInfo(new SoftFloat32FPRSqrtStepFused(SoftFloat32.FPRSqrtStepFused)); - SetDelegateInfo(new SoftFloat32FPSqrt(SoftFloat32.FPSqrt)); - SetDelegateInfo(new SoftFloat32FPSub(SoftFloat32.FPSub)); + var dlgSoftFloat32FPAdd = new SoftFloat32FPAdd(SoftFloat32.FPAdd); + var dlgSoftFloat32FPAddFpscr = new SoftFloat32FPAddFpscr(SoftFloat32.FPAddFpscr); // A32 only. + var dlgSoftFloat32FPCompare = new SoftFloat32FPCompare(SoftFloat32.FPCompare); + var dlgSoftFloat32FPCompareEQ = new SoftFloat32FPCompareEQ(SoftFloat32.FPCompareEQ); + var dlgSoftFloat32FPCompareEQFpscr = new SoftFloat32FPCompareEQFpscr(SoftFloat32.FPCompareEQFpscr); // A32 only. + var dlgSoftFloat32FPCompareGE = new SoftFloat32FPCompareGE(SoftFloat32.FPCompareGE); + var dlgSoftFloat32FPCompareGEFpscr = new SoftFloat32FPCompareGEFpscr(SoftFloat32.FPCompareGEFpscr); // A32 only. + var dlgSoftFloat32FPCompareGT = new SoftFloat32FPCompareGT(SoftFloat32.FPCompareGT); + var dlgSoftFloat32FPCompareGTFpscr = new SoftFloat32FPCompareGTFpscr(SoftFloat32.FPCompareGTFpscr); // A32 only. + var dlgSoftFloat32FPCompareLE = new SoftFloat32FPCompareLE(SoftFloat32.FPCompareLE); + var dlgSoftFloat32FPCompareLEFpscr = new SoftFloat32FPCompareLEFpscr(SoftFloat32.FPCompareLEFpscr); // A32 only. + var dlgSoftFloat32FPCompareLT = new SoftFloat32FPCompareLT(SoftFloat32.FPCompareLT); + var dlgSoftFloat32FPCompareLTFpscr = new SoftFloat32FPCompareLTFpscr(SoftFloat32.FPCompareLTFpscr); // A32 only. + var dlgSoftFloat32FPDiv = new SoftFloat32FPDiv(SoftFloat32.FPDiv); + var dlgSoftFloat32FPMax = new SoftFloat32FPMax(SoftFloat32.FPMax); + var dlgSoftFloat32FPMaxFpscr = new SoftFloat32FPMaxFpscr(SoftFloat32.FPMaxFpscr); // A32 only. + var dlgSoftFloat32FPMaxNum = new SoftFloat32FPMaxNum(SoftFloat32.FPMaxNum); + var dlgSoftFloat32FPMaxNumFpscr = new SoftFloat32FPMaxNumFpscr(SoftFloat32.FPMaxNumFpscr); // A32 only. + var dlgSoftFloat32FPMin = new SoftFloat32FPMin(SoftFloat32.FPMin); + var dlgSoftFloat32FPMinFpscr = new SoftFloat32FPMinFpscr(SoftFloat32.FPMinFpscr); // A32 only. + var dlgSoftFloat32FPMinNum = new SoftFloat32FPMinNum(SoftFloat32.FPMinNum); + var dlgSoftFloat32FPMinNumFpscr = new SoftFloat32FPMinNumFpscr(SoftFloat32.FPMinNumFpscr); // A32 only. + var dlgSoftFloat32FPMul = new SoftFloat32FPMul(SoftFloat32.FPMul); + var dlgSoftFloat32FPMulFpscr = new SoftFloat32FPMulFpscr(SoftFloat32.FPMulFpscr); // A32 only. + var dlgSoftFloat32FPMulAdd = new SoftFloat32FPMulAdd(SoftFloat32.FPMulAdd); + var dlgSoftFloat32FPMulAddFpscr = new SoftFloat32FPMulAddFpscr(SoftFloat32.FPMulAddFpscr); // A32 only. + var dlgSoftFloat32FPMulSub = new SoftFloat32FPMulSub(SoftFloat32.FPMulSub); + var dlgSoftFloat32FPMulSubFpscr = new SoftFloat32FPMulSubFpscr(SoftFloat32.FPMulSubFpscr); // A32 only. + var dlgSoftFloat32FPMulX = new SoftFloat32FPMulX(SoftFloat32.FPMulX); + var dlgSoftFloat32FPNegMulAdd = new SoftFloat32FPNegMulAdd(SoftFloat32.FPNegMulAdd); + var dlgSoftFloat32FPNegMulSub = new SoftFloat32FPNegMulSub(SoftFloat32.FPNegMulSub); + var dlgSoftFloat32FPRecipEstimate = new SoftFloat32FPRecipEstimate(SoftFloat32.FPRecipEstimate); + var dlgSoftFloat32FPRecipEstimateFpscr = new SoftFloat32FPRecipEstimateFpscr(SoftFloat32.FPRecipEstimateFpscr); // A32 only. + var dlgSoftFloat32FPRecipStep = new SoftFloat32FPRecipStep(SoftFloat32.FPRecipStep); // A32 only. + var dlgSoftFloat32FPRecipStepFused = new SoftFloat32FPRecipStepFused(SoftFloat32.FPRecipStepFused); + var dlgSoftFloat32FPRecpX = new SoftFloat32FPRecpX(SoftFloat32.FPRecpX); + var dlgSoftFloat32FPRSqrtEstimate = new SoftFloat32FPRSqrtEstimate(SoftFloat32.FPRSqrtEstimate); + var dlgSoftFloat32FPRSqrtEstimateFpscr = new SoftFloat32FPRSqrtEstimateFpscr(SoftFloat32.FPRSqrtEstimateFpscr); // A32 only. + var dlgSoftFloat32FPRSqrtStep = new SoftFloat32FPRSqrtStep(SoftFloat32.FPRSqrtStep); // A32 only. + var dlgSoftFloat32FPRSqrtStepFused = new SoftFloat32FPRSqrtStepFused(SoftFloat32.FPRSqrtStepFused); + var dlgSoftFloat32FPSqrt = new SoftFloat32FPSqrt(SoftFloat32.FPSqrt); + var dlgSoftFloat32FPSub = new SoftFloat32FPSub(SoftFloat32.FPSub); - SetDelegateInfo(new SoftFloat32_16FPConvert(SoftFloat32_16.FPConvert)); + var dlgSoftFloat32_16FPConvert = new SoftFloat32_16FPConvert(SoftFloat32_16.FPConvert); - SetDelegateInfo(new SoftFloat64FPAdd(SoftFloat64.FPAdd)); - SetDelegateInfo(new SoftFloat64FPAddFpscr(SoftFloat64.FPAddFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat64FPCompare(SoftFloat64.FPCompare)); - SetDelegateInfo(new SoftFloat64FPCompareEQ(SoftFloat64.FPCompareEQ)); - SetDelegateInfo(new SoftFloat64FPCompareEQFpscr(SoftFloat64.FPCompareEQFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat64FPCompareGE(SoftFloat64.FPCompareGE)); - SetDelegateInfo(new SoftFloat64FPCompareGEFpscr(SoftFloat64.FPCompareGEFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat64FPCompareGT(SoftFloat64.FPCompareGT)); - SetDelegateInfo(new SoftFloat64FPCompareGTFpscr(SoftFloat64.FPCompareGTFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat64FPCompareLE(SoftFloat64.FPCompareLE)); - SetDelegateInfo(new SoftFloat64FPCompareLEFpscr(SoftFloat64.FPCompareLEFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat64FPCompareLT(SoftFloat64.FPCompareLT)); - SetDelegateInfo(new SoftFloat64FPCompareLTFpscr(SoftFloat64.FPCompareLTFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat64FPDiv(SoftFloat64.FPDiv)); - SetDelegateInfo(new SoftFloat64FPMax(SoftFloat64.FPMax)); - SetDelegateInfo(new SoftFloat64FPMaxFpscr(SoftFloat64.FPMaxFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat64FPMaxNum(SoftFloat64.FPMaxNum)); - SetDelegateInfo(new SoftFloat64FPMaxNumFpscr(SoftFloat64.FPMaxNumFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat64FPMin(SoftFloat64.FPMin)); - SetDelegateInfo(new SoftFloat64FPMinFpscr(SoftFloat64.FPMinFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat64FPMinNum(SoftFloat64.FPMinNum)); - SetDelegateInfo(new SoftFloat64FPMinNumFpscr(SoftFloat64.FPMinNumFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat64FPMul(SoftFloat64.FPMul)); - SetDelegateInfo(new SoftFloat64FPMulFpscr(SoftFloat64.FPMulFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat64FPMulAdd(SoftFloat64.FPMulAdd)); - SetDelegateInfo(new SoftFloat64FPMulAddFpscr(SoftFloat64.FPMulAddFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat64FPMulSub(SoftFloat64.FPMulSub)); - SetDelegateInfo(new SoftFloat64FPMulSubFpscr(SoftFloat64.FPMulSubFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat64FPMulX(SoftFloat64.FPMulX)); - SetDelegateInfo(new SoftFloat64FPNegMulAdd(SoftFloat64.FPNegMulAdd)); - SetDelegateInfo(new SoftFloat64FPNegMulSub(SoftFloat64.FPNegMulSub)); - SetDelegateInfo(new SoftFloat64FPRecipEstimate(SoftFloat64.FPRecipEstimate)); - SetDelegateInfo(new SoftFloat64FPRecipEstimateFpscr(SoftFloat64.FPRecipEstimateFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat64FPRecipStep(SoftFloat64.FPRecipStep)); // A32 only. - SetDelegateInfo(new SoftFloat64FPRecipStepFused(SoftFloat64.FPRecipStepFused)); - SetDelegateInfo(new SoftFloat64FPRecpX(SoftFloat64.FPRecpX)); - SetDelegateInfo(new SoftFloat64FPRSqrtEstimate(SoftFloat64.FPRSqrtEstimate)); - SetDelegateInfo(new SoftFloat64FPRSqrtEstimateFpscr(SoftFloat64.FPRSqrtEstimateFpscr)); // A32 only. - SetDelegateInfo(new SoftFloat64FPRSqrtStep(SoftFloat64.FPRSqrtStep)); // A32 only. - SetDelegateInfo(new SoftFloat64FPRSqrtStepFused(SoftFloat64.FPRSqrtStepFused)); - SetDelegateInfo(new SoftFloat64FPSqrt(SoftFloat64.FPSqrt)); - SetDelegateInfo(new SoftFloat64FPSub(SoftFloat64.FPSub)); + var dlgSoftFloat64FPAdd = new SoftFloat64FPAdd(SoftFloat64.FPAdd); + var dlgSoftFloat64FPAddFpscr = new SoftFloat64FPAddFpscr(SoftFloat64.FPAddFpscr); // A32 only. + var dlgSoftFloat64FPCompare = new SoftFloat64FPCompare(SoftFloat64.FPCompare); + var dlgSoftFloat64FPCompareEQ = new SoftFloat64FPCompareEQ(SoftFloat64.FPCompareEQ); + var dlgSoftFloat64FPCompareEQFpscr = new SoftFloat64FPCompareEQFpscr(SoftFloat64.FPCompareEQFpscr); // A32 only. + var dlgSoftFloat64FPCompareGE = new SoftFloat64FPCompareGE(SoftFloat64.FPCompareGE); + var dlgSoftFloat64FPCompareGEFpscr = new SoftFloat64FPCompareGEFpscr(SoftFloat64.FPCompareGEFpscr); // A32 only. + var dlgSoftFloat64FPCompareGT = new SoftFloat64FPCompareGT(SoftFloat64.FPCompareGT); + var dlgSoftFloat64FPCompareGTFpscr = new SoftFloat64FPCompareGTFpscr(SoftFloat64.FPCompareGTFpscr); // A32 only. + var dlgSoftFloat64FPCompareLE = new SoftFloat64FPCompareLE(SoftFloat64.FPCompareLE); + var dlgSoftFloat64FPCompareLEFpscr = new SoftFloat64FPCompareLEFpscr(SoftFloat64.FPCompareLEFpscr); // A32 only. + var dlgSoftFloat64FPCompareLT = new SoftFloat64FPCompareLT(SoftFloat64.FPCompareLT); + var dlgSoftFloat64FPCompareLTFpscr = new SoftFloat64FPCompareLTFpscr(SoftFloat64.FPCompareLTFpscr); // A32 only. + var dlgSoftFloat64FPDiv = new SoftFloat64FPDiv(SoftFloat64.FPDiv); + var dlgSoftFloat64FPMax = new SoftFloat64FPMax(SoftFloat64.FPMax); + var dlgSoftFloat64FPMaxFpscr = new SoftFloat64FPMaxFpscr(SoftFloat64.FPMaxFpscr); // A32 only. + var dlgSoftFloat64FPMaxNum = new SoftFloat64FPMaxNum(SoftFloat64.FPMaxNum); + var dlgSoftFloat64FPMaxNumFpscr = new SoftFloat64FPMaxNumFpscr(SoftFloat64.FPMaxNumFpscr); // A32 only. + var dlgSoftFloat64FPMin = new SoftFloat64FPMin(SoftFloat64.FPMin); + var dlgSoftFloat64FPMinFpscr = new SoftFloat64FPMinFpscr(SoftFloat64.FPMinFpscr); // A32 only. + var dlgSoftFloat64FPMinNum = new SoftFloat64FPMinNum(SoftFloat64.FPMinNum); + var dlgSoftFloat64FPMinNumFpscr = new SoftFloat64FPMinNumFpscr(SoftFloat64.FPMinNumFpscr); // A32 only. + var dlgSoftFloat64FPMul = new SoftFloat64FPMul(SoftFloat64.FPMul); + var dlgSoftFloat64FPMulFpscr = new SoftFloat64FPMulFpscr(SoftFloat64.FPMulFpscr); // A32 only. + var dlgSoftFloat64FPMulAdd = new SoftFloat64FPMulAdd(SoftFloat64.FPMulAdd); + var dlgSoftFloat64FPMulAddFpscr = new SoftFloat64FPMulAddFpscr(SoftFloat64.FPMulAddFpscr); // A32 only. + var dlgSoftFloat64FPMulSub = new SoftFloat64FPMulSub(SoftFloat64.FPMulSub); + var dlgSoftFloat64FPMulSubFpscr = new SoftFloat64FPMulSubFpscr(SoftFloat64.FPMulSubFpscr); // A32 only. + var dlgSoftFloat64FPMulX = new SoftFloat64FPMulX(SoftFloat64.FPMulX); + var dlgSoftFloat64FPNegMulAdd = new SoftFloat64FPNegMulAdd(SoftFloat64.FPNegMulAdd); + var dlgSoftFloat64FPNegMulSub = new SoftFloat64FPNegMulSub(SoftFloat64.FPNegMulSub); + var dlgSoftFloat64FPRecipEstimate = new SoftFloat64FPRecipEstimate(SoftFloat64.FPRecipEstimate); + var dlgSoftFloat64FPRecipEstimateFpscr = new SoftFloat64FPRecipEstimateFpscr(SoftFloat64.FPRecipEstimateFpscr); // A32 only. + var dlgSoftFloat64FPRecipStep = new SoftFloat64FPRecipStep(SoftFloat64.FPRecipStep); // A32 only. + var dlgSoftFloat64FPRecipStepFused = new SoftFloat64FPRecipStepFused(SoftFloat64.FPRecipStepFused); + var dlgSoftFloat64FPRecpX = new SoftFloat64FPRecpX(SoftFloat64.FPRecpX); + var dlgSoftFloat64FPRSqrtEstimate = new SoftFloat64FPRSqrtEstimate(SoftFloat64.FPRSqrtEstimate); + var dlgSoftFloat64FPRSqrtEstimateFpscr = new SoftFloat64FPRSqrtEstimateFpscr(SoftFloat64.FPRSqrtEstimateFpscr); // A32 only. + var dlgSoftFloat64FPRSqrtStep = new SoftFloat64FPRSqrtStep(SoftFloat64.FPRSqrtStep); // A32 only. + var dlgSoftFloat64FPRSqrtStepFused = new SoftFloat64FPRSqrtStepFused(SoftFloat64.FPRSqrtStepFused); + var dlgSoftFloat64FPSqrt = new SoftFloat64FPSqrt(SoftFloat64.FPSqrt); + var dlgSoftFloat64FPSub = new SoftFloat64FPSub(SoftFloat64.FPSub); - SetDelegateInfo(new SoftFloat64_16FPConvert(SoftFloat64_16.FPConvert)); + var dlgSoftFloat64_16FPConvert = new SoftFloat64_16FPConvert(SoftFloat64_16.FPConvert); + + SetDelegateInfo(dlgMathAbs, Marshal.GetFunctionPointerForDelegate(dlgMathAbs)); + SetDelegateInfo(dlgMathCeiling, Marshal.GetFunctionPointerForDelegate(dlgMathCeiling)); + SetDelegateInfo(dlgMathFloor, Marshal.GetFunctionPointerForDelegate(dlgMathFloor)); + SetDelegateInfo(dlgMathRound, Marshal.GetFunctionPointerForDelegate(dlgMathRound)); + SetDelegateInfo(dlgMathTruncate, Marshal.GetFunctionPointerForDelegate(dlgMathTruncate)); + + SetDelegateInfo(dlgMathFAbs, Marshal.GetFunctionPointerForDelegate(dlgMathFAbs)); + SetDelegateInfo(dlgMathFCeiling, Marshal.GetFunctionPointerForDelegate(dlgMathFCeiling)); + SetDelegateInfo(dlgMathFFloor, Marshal.GetFunctionPointerForDelegate(dlgMathFFloor)); + SetDelegateInfo(dlgMathFRound, Marshal.GetFunctionPointerForDelegate(dlgMathFRound)); + SetDelegateInfo(dlgMathFTruncate, Marshal.GetFunctionPointerForDelegate(dlgMathFTruncate)); + + SetDelegateInfo(dlgNativeInterfaceBreak, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceBreak)); + SetDelegateInfo(dlgNativeInterfaceCheckSynchronization, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceCheckSynchronization)); + SetDelegateInfo(dlgNativeInterfaceEnqueueForRejit, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceEnqueueForRejit)); + SetDelegateInfo(dlgNativeInterfaceGetCntfrqEl0, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceGetCntfrqEl0)); + SetDelegateInfo(dlgNativeInterfaceGetCntpctEl0, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceGetCntpctEl0)); + SetDelegateInfo(dlgNativeInterfaceGetCntvctEl0, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceGetCntvctEl0)); + SetDelegateInfo(dlgNativeInterfaceGetCtrEl0, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceGetCtrEl0)); + SetDelegateInfo(dlgNativeInterfaceGetDczidEl0, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceGetDczidEl0)); + SetDelegateInfo(dlgNativeInterfaceGetFunctionAddress, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceGetFunctionAddress)); + SetDelegateInfo(dlgNativeInterfaceInvalidateCacheLine, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceInvalidateCacheLine)); + SetDelegateInfo(dlgNativeInterfaceReadByte, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceReadByte)); + SetDelegateInfo(dlgNativeInterfaceReadUInt16, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceReadUInt16)); + SetDelegateInfo(dlgNativeInterfaceReadUInt32, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceReadUInt32)); + SetDelegateInfo(dlgNativeInterfaceReadUInt64, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceReadUInt64)); + SetDelegateInfo(dlgNativeInterfaceReadVector128, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceReadVector128)); + SetDelegateInfo(dlgNativeInterfaceSignalMemoryTracking, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceSignalMemoryTracking)); + SetDelegateInfo(dlgNativeInterfaceSupervisorCall, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceSupervisorCall)); + SetDelegateInfo(dlgNativeInterfaceThrowInvalidMemoryAccess, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceThrowInvalidMemoryAccess)); + SetDelegateInfo(dlgNativeInterfaceUndefined, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceUndefined)); + SetDelegateInfo(dlgNativeInterfaceWriteByte, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceWriteByte)); + SetDelegateInfo(dlgNativeInterfaceWriteUInt16, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceWriteUInt16)); + SetDelegateInfo(dlgNativeInterfaceWriteUInt32, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceWriteUInt32)); + SetDelegateInfo(dlgNativeInterfaceWriteUInt64, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceWriteUInt64)); + SetDelegateInfo(dlgNativeInterfaceWriteVector128, Marshal.GetFunctionPointerForDelegate(dlgNativeInterfaceWriteVector128)); + + SetDelegateInfo(dlgSoftFallbackCountLeadingSigns, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackCountLeadingSigns)); + SetDelegateInfo(dlgSoftFallbackCountLeadingZeros, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackCountLeadingZeros)); + SetDelegateInfo(dlgSoftFallbackCrc32b, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackCrc32b)); + SetDelegateInfo(dlgSoftFallbackCrc32cb, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackCrc32cb)); + SetDelegateInfo(dlgSoftFallbackCrc32ch, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackCrc32ch)); + SetDelegateInfo(dlgSoftFallbackCrc32cw, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackCrc32cw)); + SetDelegateInfo(dlgSoftFallbackCrc32cx, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackCrc32cx)); + SetDelegateInfo(dlgSoftFallbackCrc32h, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackCrc32h)); + SetDelegateInfo(dlgSoftFallbackCrc32w, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackCrc32w)); + SetDelegateInfo(dlgSoftFallbackCrc32x, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackCrc32x)); + SetDelegateInfo(dlgSoftFallbackDecrypt, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackDecrypt)); + SetDelegateInfo(dlgSoftFallbackEncrypt, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackEncrypt)); + SetDelegateInfo(dlgSoftFallbackFixedRotate, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackFixedRotate)); + SetDelegateInfo(dlgSoftFallbackHashChoose, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackHashChoose)); + SetDelegateInfo(dlgSoftFallbackHashLower, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackHashLower)); + SetDelegateInfo(dlgSoftFallbackHashMajority, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackHashMajority)); + SetDelegateInfo(dlgSoftFallbackHashParity, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackHashParity)); + SetDelegateInfo(dlgSoftFallbackHashUpper, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackHashUpper)); + SetDelegateInfo(dlgSoftFallbackInverseMixColumns, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackInverseMixColumns)); + SetDelegateInfo(dlgSoftFallbackMixColumns, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackMixColumns)); + SetDelegateInfo(dlgSoftFallbackPolynomialMult64_128, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackPolynomialMult64_128)); + SetDelegateInfo(dlgSoftFallbackSatF32ToS32, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackSatF32ToS32)); + SetDelegateInfo(dlgSoftFallbackSatF32ToS64, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackSatF32ToS64)); + SetDelegateInfo(dlgSoftFallbackSatF32ToU32, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackSatF32ToU32)); + SetDelegateInfo(dlgSoftFallbackSatF32ToU64, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackSatF32ToU64)); + SetDelegateInfo(dlgSoftFallbackSatF64ToS32, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackSatF64ToS32)); + SetDelegateInfo(dlgSoftFallbackSatF64ToS64, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackSatF64ToS64)); + SetDelegateInfo(dlgSoftFallbackSatF64ToU32, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackSatF64ToU32)); + SetDelegateInfo(dlgSoftFallbackSatF64ToU64, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackSatF64ToU64)); + SetDelegateInfo(dlgSoftFallbackSha1SchedulePart1, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackSha1SchedulePart1)); + SetDelegateInfo(dlgSoftFallbackSha1SchedulePart2, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackSha1SchedulePart2)); + SetDelegateInfo(dlgSoftFallbackSha256SchedulePart1, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackSha256SchedulePart1)); + SetDelegateInfo(dlgSoftFallbackSha256SchedulePart2, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackSha256SchedulePart2)); + SetDelegateInfo(dlgSoftFallbackSignedShrImm64, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackSignedShrImm64)); + SetDelegateInfo(dlgSoftFallbackTbl1, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackTbl1)); + SetDelegateInfo(dlgSoftFallbackTbl2, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackTbl2)); + SetDelegateInfo(dlgSoftFallbackTbl3, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackTbl3)); + SetDelegateInfo(dlgSoftFallbackTbl4, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackTbl4)); + SetDelegateInfo(dlgSoftFallbackTbx1, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackTbx1)); + SetDelegateInfo(dlgSoftFallbackTbx2, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackTbx2)); + SetDelegateInfo(dlgSoftFallbackTbx3, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackTbx3)); + SetDelegateInfo(dlgSoftFallbackTbx4, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackTbx4)); + SetDelegateInfo(dlgSoftFallbackUnsignedShrImm64, Marshal.GetFunctionPointerForDelegate(dlgSoftFallbackUnsignedShrImm64)); + + SetDelegateInfo(dlgSoftFloat16_32FPConvert, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat16_32FPConvert)); + SetDelegateInfo(dlgSoftFloat16_64FPConvert, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat16_64FPConvert)); + + SetDelegateInfo(dlgSoftFloat32FPAdd, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPAdd)); + SetDelegateInfo(dlgSoftFloat32FPAddFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPAddFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPCompare, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPCompare)); + SetDelegateInfo(dlgSoftFloat32FPCompareEQ, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPCompareEQ)); + SetDelegateInfo(dlgSoftFloat32FPCompareEQFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPCompareEQFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPCompareGE, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPCompareGE)); + SetDelegateInfo(dlgSoftFloat32FPCompareGEFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPCompareGEFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPCompareGT, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPCompareGT)); + SetDelegateInfo(dlgSoftFloat32FPCompareGTFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPCompareGTFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPCompareLE, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPCompareLE)); + SetDelegateInfo(dlgSoftFloat32FPCompareLEFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPCompareLEFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPCompareLT, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPCompareLT)); + SetDelegateInfo(dlgSoftFloat32FPCompareLTFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPCompareLTFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPDiv, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPDiv)); + SetDelegateInfo(dlgSoftFloat32FPMax, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPMax)); + SetDelegateInfo(dlgSoftFloat32FPMaxFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPMaxFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPMaxNum, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPMaxNum)); + SetDelegateInfo(dlgSoftFloat32FPMaxNumFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPMaxNumFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPMin, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPMin)); + SetDelegateInfo(dlgSoftFloat32FPMinFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPMinFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPMinNum, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPMinNum)); + SetDelegateInfo(dlgSoftFloat32FPMinNumFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPMinNumFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPMul, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPMul)); + SetDelegateInfo(dlgSoftFloat32FPMulFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPMulFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPMulAdd, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPMulAdd)); + SetDelegateInfo(dlgSoftFloat32FPMulAddFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPMulAddFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPMulSub, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPMulSub)); + SetDelegateInfo(dlgSoftFloat32FPMulSubFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPMulSubFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPMulX, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPMulX)); + SetDelegateInfo(dlgSoftFloat32FPNegMulAdd, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPNegMulAdd)); + SetDelegateInfo(dlgSoftFloat32FPNegMulSub, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPNegMulSub)); + SetDelegateInfo(dlgSoftFloat32FPRecipEstimate, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPRecipEstimate)); + SetDelegateInfo(dlgSoftFloat32FPRecipEstimateFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPRecipEstimateFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPRecipStep, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPRecipStep)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPRecipStepFused, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPRecipStepFused)); + SetDelegateInfo(dlgSoftFloat32FPRecpX, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPRecpX)); + SetDelegateInfo(dlgSoftFloat32FPRSqrtEstimate, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPRSqrtEstimate)); + SetDelegateInfo(dlgSoftFloat32FPRSqrtEstimateFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPRSqrtEstimateFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPRSqrtStep, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPRSqrtStep)); // A32 only. + SetDelegateInfo(dlgSoftFloat32FPRSqrtStepFused, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPRSqrtStepFused)); + SetDelegateInfo(dlgSoftFloat32FPSqrt, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPSqrt)); + SetDelegateInfo(dlgSoftFloat32FPSub, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32FPSub)); + + SetDelegateInfo(dlgSoftFloat32_16FPConvert, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat32_16FPConvert)); + + SetDelegateInfo(dlgSoftFloat64FPAdd, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPAdd)); + SetDelegateInfo(dlgSoftFloat64FPAddFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPAddFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPCompare, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPCompare)); + SetDelegateInfo(dlgSoftFloat64FPCompareEQ, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPCompareEQ)); + SetDelegateInfo(dlgSoftFloat64FPCompareEQFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPCompareEQFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPCompareGE, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPCompareGE)); + SetDelegateInfo(dlgSoftFloat64FPCompareGEFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPCompareGEFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPCompareGT, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPCompareGT)); + SetDelegateInfo(dlgSoftFloat64FPCompareGTFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPCompareGTFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPCompareLE, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPCompareLE)); + SetDelegateInfo(dlgSoftFloat64FPCompareLEFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPCompareLEFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPCompareLT, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPCompareLT)); + SetDelegateInfo(dlgSoftFloat64FPCompareLTFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPCompareLTFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPDiv, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPDiv)); + SetDelegateInfo(dlgSoftFloat64FPMax, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPMax)); + SetDelegateInfo(dlgSoftFloat64FPMaxFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPMaxFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPMaxNum, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPMaxNum)); + SetDelegateInfo(dlgSoftFloat64FPMaxNumFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPMaxNumFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPMin, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPMin)); + SetDelegateInfo(dlgSoftFloat64FPMinFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPMinFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPMinNum, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPMinNum)); + SetDelegateInfo(dlgSoftFloat64FPMinNumFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPMinNumFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPMul, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPMul)); + SetDelegateInfo(dlgSoftFloat64FPMulFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPMulFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPMulAdd, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPMulAdd)); + SetDelegateInfo(dlgSoftFloat64FPMulAddFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPMulAddFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPMulSub, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPMulSub)); + SetDelegateInfo(dlgSoftFloat64FPMulSubFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPMulSubFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPMulX, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPMulX)); + SetDelegateInfo(dlgSoftFloat64FPNegMulAdd, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPNegMulAdd)); + SetDelegateInfo(dlgSoftFloat64FPNegMulSub, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPNegMulSub)); + SetDelegateInfo(dlgSoftFloat64FPRecipEstimate, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPRecipEstimate)); + SetDelegateInfo(dlgSoftFloat64FPRecipEstimateFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPRecipEstimateFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPRecipStep, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPRecipStep)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPRecipStepFused, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPRecipStepFused)); + SetDelegateInfo(dlgSoftFloat64FPRecpX, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPRecpX)); + SetDelegateInfo(dlgSoftFloat64FPRSqrtEstimate, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPRSqrtEstimate)); + SetDelegateInfo(dlgSoftFloat64FPRSqrtEstimateFpscr, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPRSqrtEstimateFpscr)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPRSqrtStep, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPRSqrtStep)); // A32 only. + SetDelegateInfo(dlgSoftFloat64FPRSqrtStepFused, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPRSqrtStepFused)); + SetDelegateInfo(dlgSoftFloat64FPSqrt, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPSqrt)); + SetDelegateInfo(dlgSoftFloat64FPSub, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64FPSub)); + + SetDelegateInfo(dlgSoftFloat64_16FPConvert, Marshal.GetFunctionPointerForDelegate(dlgSoftFloat64_16FPConvert)); } private delegate double MathAbs(double value); diff --git a/src/Ryujinx.Graphics.GAL/IImageArray.cs b/src/Ryujinx.Graphics.GAL/IImageArray.cs index d119aa9fb..d87314eb8 100644 --- a/src/Ryujinx.Graphics.GAL/IImageArray.cs +++ b/src/Ryujinx.Graphics.GAL/IImageArray.cs @@ -4,7 +4,6 @@ namespace Ryujinx.Graphics.GAL { public interface IImageArray : IDisposable { - void SetFormats(int index, Format[] imageFormats); void SetImages(int index, ITexture[] images); } } diff --git a/src/Ryujinx.Graphics.GAL/IPipeline.cs b/src/Ryujinx.Graphics.GAL/IPipeline.cs index 8c71adb9d..1ecbb402c 100644 --- a/src/Ryujinx.Graphics.GAL/IPipeline.cs +++ b/src/Ryujinx.Graphics.GAL/IPipeline.cs @@ -58,7 +58,7 @@ namespace Ryujinx.Graphics.GAL void SetIndexBuffer(BufferRange buffer, IndexType type); - void SetImage(ShaderStage stage, int binding, ITexture texture, Format imageFormat); + void SetImage(ShaderStage stage, int binding, ITexture texture); void SetImageArray(ShaderStage stage, int binding, IImageArray array); void SetImageArraySeparate(ShaderStage stage, int setIndex, IImageArray array); diff --git a/src/Ryujinx.Graphics.GAL/ITexture.cs b/src/Ryujinx.Graphics.GAL/ITexture.cs index 2d9c6b799..2aa4053ff 100644 --- a/src/Ryujinx.Graphics.GAL/ITexture.cs +++ b/src/Ryujinx.Graphics.GAL/ITexture.cs @@ -1,4 +1,4 @@ -using System.Buffers; +using Ryujinx.Common.Memory; namespace Ryujinx.Graphics.GAL { @@ -18,30 +18,30 @@ namespace Ryujinx.Graphics.GAL PinnedSpan GetData(int layer, int level); /// - /// Sets the texture data. The data passed as a will be disposed when + /// Sets the texture data. The data passed as a will be disposed when /// the operation completes. /// /// Texture data bytes - void SetData(IMemoryOwner data); + void SetData(MemoryOwner data); /// - /// Sets the texture data. The data passed as a will be disposed when + /// Sets the texture data. The data passed as a will be disposed when /// the operation completes. /// /// Texture data bytes /// Target layer /// Target level - void SetData(IMemoryOwner data, int layer, int level); + void SetData(MemoryOwner data, int layer, int level); /// - /// Sets the texture data. The data passed as a will be disposed when + /// Sets the texture data. The data passed as a will be disposed when /// the operation completes. /// /// Texture data bytes /// Target layer /// Target level /// Target sub-region of the texture to update - void SetData(IMemoryOwner data, int layer, int level, Rectangle region); + void SetData(MemoryOwner data, int layer, int level, Rectangle region); void SetStorage(BufferRange buffer); diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs b/src/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs index ef227d4a5..a1e6db971 100644 --- a/src/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs +++ b/src/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs @@ -67,7 +67,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading Register(CommandType.CounterEventFlush); Register(CommandType.ImageArrayDispose); - Register(CommandType.ImageArraySetFormats); Register(CommandType.ImageArraySetImages); Register(CommandType.ProgramDispose); diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs b/src/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs index cf3f5d6c1..348c8e462 100644 --- a/src/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs +++ b/src/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs @@ -27,7 +27,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading CounterEventFlush, ImageArrayDispose, - ImageArraySetFormats, ImageArraySetImages, ProgramDispose, diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/Commands/ImageArray/ImageArraySetFormatsCommand.cs b/src/Ryujinx.Graphics.GAL/Multithreading/Commands/ImageArray/ImageArraySetFormatsCommand.cs deleted file mode 100644 index 8e3ba88a8..000000000 --- a/src/Ryujinx.Graphics.GAL/Multithreading/Commands/ImageArray/ImageArraySetFormatsCommand.cs +++ /dev/null @@ -1,26 +0,0 @@ -using Ryujinx.Graphics.GAL.Multithreading.Model; -using Ryujinx.Graphics.GAL.Multithreading.Resources; - -namespace Ryujinx.Graphics.GAL.Multithreading.Commands.ImageArray -{ - struct ImageArraySetFormatsCommand : IGALCommand, IGALCommand - { - public readonly CommandType CommandType => CommandType.ImageArraySetFormats; - private TableRef _imageArray; - private int _index; - private TableRef _imageFormats; - - public void Set(TableRef imageArray, int index, TableRef imageFormats) - { - _imageArray = imageArray; - _index = index; - _imageFormats = imageFormats; - } - - public static void Run(ref ImageArraySetFormatsCommand command, ThreadedRenderer threaded, IRenderer renderer) - { - ThreadedImageArray imageArray = command._imageArray.Get(threaded); - imageArray.Base.SetFormats(command._index, command._imageFormats.Get(threaded)); - } - } -} diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/Commands/SetImageCommand.cs b/src/Ryujinx.Graphics.GAL/Multithreading/Commands/SetImageCommand.cs index 243480a81..2ba9db527 100644 --- a/src/Ryujinx.Graphics.GAL/Multithreading/Commands/SetImageCommand.cs +++ b/src/Ryujinx.Graphics.GAL/Multithreading/Commands/SetImageCommand.cs @@ -10,19 +10,17 @@ namespace Ryujinx.Graphics.GAL.Multithreading.Commands private ShaderStage _stage; private int _binding; private TableRef _texture; - private Format _imageFormat; - public void Set(ShaderStage stage, int binding, TableRef texture, Format imageFormat) + public void Set(ShaderStage stage, int binding, TableRef texture) { _stage = stage; _binding = binding; _texture = texture; - _imageFormat = imageFormat; } public static void Run(ref SetImageCommand command, ThreadedRenderer threaded, IRenderer renderer) { - renderer.Pipeline.SetImage(command._stage, command._binding, command._texture.GetAs(threaded)?.Base, command._imageFormat); + renderer.Pipeline.SetImage(command._stage, command._binding, command._texture.GetAs(threaded)?.Base); } } } diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Texture/TextureSetDataCommand.cs b/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Texture/TextureSetDataCommand.cs index 3aba004df..4449566a7 100644 --- a/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Texture/TextureSetDataCommand.cs +++ b/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Texture/TextureSetDataCommand.cs @@ -1,6 +1,6 @@ +using Ryujinx.Common.Memory; using Ryujinx.Graphics.GAL.Multithreading.Model; using Ryujinx.Graphics.GAL.Multithreading.Resources; -using System.Buffers; namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Texture { @@ -8,9 +8,9 @@ namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Texture { public readonly CommandType CommandType => CommandType.TextureSetData; private TableRef _texture; - private TableRef> _data; + private TableRef> _data; - public void Set(TableRef texture, TableRef> data) + public void Set(TableRef texture, TableRef> data) { _texture = texture; _data = data; diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Texture/TextureSetDataSliceCommand.cs b/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Texture/TextureSetDataSliceCommand.cs index 7ad709a75..3619149e9 100644 --- a/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Texture/TextureSetDataSliceCommand.cs +++ b/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Texture/TextureSetDataSliceCommand.cs @@ -1,6 +1,6 @@ +using Ryujinx.Common.Memory; using Ryujinx.Graphics.GAL.Multithreading.Model; using Ryujinx.Graphics.GAL.Multithreading.Resources; -using System.Buffers; namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Texture { @@ -8,11 +8,11 @@ namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Texture { public readonly CommandType CommandType => CommandType.TextureSetDataSlice; private TableRef _texture; - private TableRef> _data; + private TableRef> _data; private int _layer; private int _level; - public void Set(TableRef texture, TableRef> data, int layer, int level) + public void Set(TableRef texture, TableRef> data, int layer, int level) { _texture = texture; _data = data; diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Texture/TextureSetDataSliceRegionCommand.cs b/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Texture/TextureSetDataSliceRegionCommand.cs index c211931bc..6c6a53636 100644 --- a/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Texture/TextureSetDataSliceRegionCommand.cs +++ b/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Texture/TextureSetDataSliceRegionCommand.cs @@ -1,6 +1,6 @@ +using Ryujinx.Common.Memory; using Ryujinx.Graphics.GAL.Multithreading.Model; using Ryujinx.Graphics.GAL.Multithreading.Resources; -using System.Buffers; namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Texture { @@ -8,12 +8,12 @@ namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Texture { public readonly CommandType CommandType => CommandType.TextureSetDataSliceRegion; private TableRef _texture; - private TableRef> _data; + private TableRef> _data; private int _layer; private int _level; private Rectangle _region; - public void Set(TableRef texture, TableRef> data, int layer, int level, Rectangle region) + public void Set(TableRef texture, TableRef> data, int layer, int level, Rectangle region) { _texture = texture; _data = data; diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedImageArray.cs b/src/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedImageArray.cs index 19bc6f233..82587c189 100644 --- a/src/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedImageArray.cs +++ b/src/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedImageArray.cs @@ -27,12 +27,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading.Resources _renderer.QueueCommand(); } - public void SetFormats(int index, Format[] imageFormats) - { - _renderer.New().Set(Ref(this), index, Ref(imageFormats)); - _renderer.QueueCommand(); - } - public void SetImages(int index, ITexture[] images) { _renderer.New().Set(Ref(this), index, Ref(images)); diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedTexture.cs b/src/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedTexture.cs index 80003b844..fa71d20b3 100644 --- a/src/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedTexture.cs +++ b/src/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedTexture.cs @@ -1,6 +1,6 @@ +using Ryujinx.Common.Memory; using Ryujinx.Graphics.GAL.Multithreading.Commands.Texture; using Ryujinx.Graphics.GAL.Multithreading.Model; -using System.Buffers; namespace Ryujinx.Graphics.GAL.Multithreading.Resources { @@ -111,21 +111,21 @@ namespace Ryujinx.Graphics.GAL.Multithreading.Resources } /// - public void SetData(IMemoryOwner data) + public void SetData(MemoryOwner data) { _renderer.New().Set(Ref(this), Ref(data)); _renderer.QueueCommand(); } /// - public void SetData(IMemoryOwner data, int layer, int level) + public void SetData(MemoryOwner data, int layer, int level) { _renderer.New().Set(Ref(this), Ref(data), layer, level); _renderer.QueueCommand(); } /// - public void SetData(IMemoryOwner data, int layer, int level, Rectangle region) + public void SetData(MemoryOwner data, int layer, int level, Rectangle region) { _renderer.New().Set(Ref(this), Ref(data), layer, level, region); _renderer.QueueCommand(); diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs b/src/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs index 8931e1056..cee3480fe 100644 --- a/src/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs +++ b/src/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs @@ -177,9 +177,9 @@ namespace Ryujinx.Graphics.GAL.Multithreading _renderer.QueueCommand(); } - public void SetImage(ShaderStage stage, int binding, ITexture texture, Format imageFormat) + public void SetImage(ShaderStage stage, int binding, ITexture texture) { - _renderer.New().Set(stage, binding, Ref(texture), imageFormat); + _renderer.New().Set(stage, binding, Ref(texture)); _renderer.QueueCommand(); } diff --git a/src/Ryujinx.Graphics.GAL/UpscaleType.cs b/src/Ryujinx.Graphics.GAL/UpscaleType.cs index ca24199c4..e2482faef 100644 --- a/src/Ryujinx.Graphics.GAL/UpscaleType.cs +++ b/src/Ryujinx.Graphics.GAL/UpscaleType.cs @@ -5,5 +5,6 @@ namespace Ryujinx.Graphics.GAL Bilinear, Nearest, Fsr, + Area, } } diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs index 218db15cf..cdeae0040 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs @@ -1,10 +1,10 @@ using Ryujinx.Common; +using Ryujinx.Common.Memory; using Ryujinx.Graphics.Device; using Ryujinx.Graphics.Gpu.Engine.Threed; using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Texture; using System; -using System.Buffers; using System.Collections.Generic; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -276,8 +276,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma dstBaseOffset += dstStride * (yCount - 1); } - ReadOnlySpan srcSpan = memoryManager.GetSpan(srcGpuVa + (ulong)srcBaseOffset, srcSize, true); - // If remapping is disabled, we always copy the components directly, in order. // If it's enabled, but the mapping is just XYZW, we also copy them in order. bool isIdentityRemap = !remap || @@ -289,6 +287,52 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount); bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount); + // Check if the source texture exists on the GPU, if it does, do a GPU side copy. + // Otherwise, we would need to flush the source texture which is costly. + // We don't expect the source to be linear in such cases, as linear source usually indicates buffer or CPU written data. + + if (completeSource && completeDest && !srcLinear && isIdentityRemap) + { + var source = memoryManager.Physical.TextureCache.FindTexture( + memoryManager, + srcGpuVa, + srcBpp, + srcStride, + src.Height, + xCount, + yCount, + srcLinear, + src.MemoryLayout.UnpackGobBlocksInY(), + src.MemoryLayout.UnpackGobBlocksInZ()); + + if (source != null && source.Height == yCount) + { + source.SynchronizeMemory(); + + var target = memoryManager.Physical.TextureCache.FindOrCreateTexture( + memoryManager, + source.Info.FormatInfo, + dstGpuVa, + xCount, + yCount, + dstStride, + dstLinear, + dst.MemoryLayout.UnpackGobBlocksInY(), + dst.MemoryLayout.UnpackGobBlocksInZ()); + + if (source.ScaleFactor != target.ScaleFactor) + { + target.PropagateScale(source); + } + + source.HostTexture.CopyTo(target.HostTexture, 0, 0); + target.SignalModified(); + return; + } + } + + ReadOnlySpan srcSpan = memoryManager.GetSpan(srcGpuVa + (ulong)srcBaseOffset, srcSize, true); + // Try to set the texture data directly, // but only if we are doing a complete copy, // and not for block linear to linear copies, since those are typically accessed from the CPU. @@ -309,7 +353,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma if (target != null) { - IMemoryOwner data; + MemoryOwner data; if (srcLinear) { data = LayoutConverter.ConvertLinearStridedToLinear( diff --git a/src/Ryujinx.Graphics.Gpu/Engine/ShaderTexture.cs b/src/Ryujinx.Graphics.Gpu/Engine/ShaderTexture.cs index 7bff1c4b8..bdb34180e 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/ShaderTexture.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/ShaderTexture.cs @@ -1,5 +1,6 @@ using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Image; using Ryujinx.Graphics.Shader; namespace Ryujinx.Graphics.Gpu.Engine @@ -61,51 +62,51 @@ namespace Ryujinx.Graphics.Gpu.Engine /// /// Shader image format /// Texture format - public static Format GetFormat(TextureFormat format) + public static FormatInfo GetFormatInfo(TextureFormat format) { return format switch { #pragma warning disable IDE0055 // Disable formatting - TextureFormat.R8Unorm => Format.R8Unorm, - TextureFormat.R8Snorm => Format.R8Snorm, - TextureFormat.R8Uint => Format.R8Uint, - TextureFormat.R8Sint => Format.R8Sint, - TextureFormat.R16Float => Format.R16Float, - TextureFormat.R16Unorm => Format.R16Unorm, - TextureFormat.R16Snorm => Format.R16Snorm, - TextureFormat.R16Uint => Format.R16Uint, - TextureFormat.R16Sint => Format.R16Sint, - TextureFormat.R32Float => Format.R32Float, - TextureFormat.R32Uint => Format.R32Uint, - TextureFormat.R32Sint => Format.R32Sint, - TextureFormat.R8G8Unorm => Format.R8G8Unorm, - TextureFormat.R8G8Snorm => Format.R8G8Snorm, - TextureFormat.R8G8Uint => Format.R8G8Uint, - TextureFormat.R8G8Sint => Format.R8G8Sint, - TextureFormat.R16G16Float => Format.R16G16Float, - TextureFormat.R16G16Unorm => Format.R16G16Unorm, - TextureFormat.R16G16Snorm => Format.R16G16Snorm, - TextureFormat.R16G16Uint => Format.R16G16Uint, - TextureFormat.R16G16Sint => Format.R16G16Sint, - TextureFormat.R32G32Float => Format.R32G32Float, - TextureFormat.R32G32Uint => Format.R32G32Uint, - TextureFormat.R32G32Sint => Format.R32G32Sint, - TextureFormat.R8G8B8A8Unorm => Format.R8G8B8A8Unorm, - TextureFormat.R8G8B8A8Snorm => Format.R8G8B8A8Snorm, - TextureFormat.R8G8B8A8Uint => Format.R8G8B8A8Uint, - TextureFormat.R8G8B8A8Sint => Format.R8G8B8A8Sint, - TextureFormat.R16G16B16A16Float => Format.R16G16B16A16Float, - TextureFormat.R16G16B16A16Unorm => Format.R16G16B16A16Unorm, - TextureFormat.R16G16B16A16Snorm => Format.R16G16B16A16Snorm, - TextureFormat.R16G16B16A16Uint => Format.R16G16B16A16Uint, - TextureFormat.R16G16B16A16Sint => Format.R16G16B16A16Sint, - TextureFormat.R32G32B32A32Float => Format.R32G32B32A32Float, - TextureFormat.R32G32B32A32Uint => Format.R32G32B32A32Uint, - TextureFormat.R32G32B32A32Sint => Format.R32G32B32A32Sint, - TextureFormat.R10G10B10A2Unorm => Format.R10G10B10A2Unorm, - TextureFormat.R10G10B10A2Uint => Format.R10G10B10A2Uint, - TextureFormat.R11G11B10Float => Format.R11G11B10Float, - _ => 0, + TextureFormat.R8Unorm => new(Format.R8Unorm, 1, 1, 1, 1), + TextureFormat.R8Snorm => new(Format.R8Snorm, 1, 1, 1, 1), + TextureFormat.R8Uint => new(Format.R8Uint, 1, 1, 1, 1), + TextureFormat.R8Sint => new(Format.R8Sint, 1, 1, 1, 1), + TextureFormat.R16Float => new(Format.R16Float, 1, 1, 2, 1), + TextureFormat.R16Unorm => new(Format.R16Unorm, 1, 1, 2, 1), + TextureFormat.R16Snorm => new(Format.R16Snorm, 1, 1, 2, 1), + TextureFormat.R16Uint => new(Format.R16Uint, 1, 1, 2, 1), + TextureFormat.R16Sint => new(Format.R16Sint, 1, 1, 2, 1), + TextureFormat.R32Float => new(Format.R32Float, 1, 1, 4, 1), + TextureFormat.R32Uint => new(Format.R32Uint, 1, 1, 4, 1), + TextureFormat.R32Sint => new(Format.R32Sint, 1, 1, 4, 1), + TextureFormat.R8G8Unorm => new(Format.R8G8Unorm, 1, 1, 2, 2), + TextureFormat.R8G8Snorm => new(Format.R8G8Snorm, 1, 1, 2, 2), + TextureFormat.R8G8Uint => new(Format.R8G8Uint, 1, 1, 2, 2), + TextureFormat.R8G8Sint => new(Format.R8G8Sint, 1, 1, 2, 2), + TextureFormat.R16G16Float => new(Format.R16G16Float, 1, 1, 4, 2), + TextureFormat.R16G16Unorm => new(Format.R16G16Unorm, 1, 1, 4, 2), + TextureFormat.R16G16Snorm => new(Format.R16G16Snorm, 1, 1, 4, 2), + TextureFormat.R16G16Uint => new(Format.R16G16Uint, 1, 1, 4, 2), + TextureFormat.R16G16Sint => new(Format.R16G16Sint, 1, 1, 4, 2), + TextureFormat.R32G32Float => new(Format.R32G32Float, 1, 1, 8, 2), + TextureFormat.R32G32Uint => new(Format.R32G32Uint, 1, 1, 8, 2), + TextureFormat.R32G32Sint => new(Format.R32G32Sint, 1, 1, 8, 2), + TextureFormat.R8G8B8A8Unorm => new(Format.R8G8B8A8Unorm, 1, 1, 4, 4), + TextureFormat.R8G8B8A8Snorm => new(Format.R8G8B8A8Snorm, 1, 1, 4, 4), + TextureFormat.R8G8B8A8Uint => new(Format.R8G8B8A8Uint, 1, 1, 4, 4), + TextureFormat.R8G8B8A8Sint => new(Format.R8G8B8A8Sint, 1, 1, 4, 4), + TextureFormat.R16G16B16A16Float => new(Format.R16G16B16A16Float, 1, 1, 8, 4), + TextureFormat.R16G16B16A16Unorm => new(Format.R16G16B16A16Unorm, 1, 1, 8, 4), + TextureFormat.R16G16B16A16Snorm => new(Format.R16G16B16A16Snorm, 1, 1, 8, 4), + TextureFormat.R16G16B16A16Uint => new(Format.R16G16B16A16Uint, 1, 1, 8, 4), + TextureFormat.R16G16B16A16Sint => new(Format.R16G16B16A16Sint, 1, 1, 8, 4), + TextureFormat.R32G32B32A32Float => new(Format.R32G32B32A32Float, 1, 1, 16, 4), + TextureFormat.R32G32B32A32Uint => new(Format.R32G32B32A32Uint, 1, 1, 16, 4), + TextureFormat.R32G32B32A32Sint => new(Format.R32G32B32A32Sint, 1, 1, 16, 4), + TextureFormat.R10G10B10A2Unorm => new(Format.R10G10B10A2Unorm, 1, 1, 4, 4), + TextureFormat.R10G10B10A2Uint => new(Format.R10G10B10A2Uint, 1, 1, 4, 4), + TextureFormat.R11G11B10Float => new(Format.R11G11B10Float, 1, 1, 4, 3), + _ => FormatInfo.Invalid, #pragma warning restore IDE0055 }; } diff --git a/src/Ryujinx.Graphics.Gpu/Image/FormatInfo.cs b/src/Ryujinx.Graphics.Gpu/Image/FormatInfo.cs index 8a9f37bb0..b95c684e4 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/FormatInfo.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/FormatInfo.cs @@ -7,6 +7,11 @@ namespace Ryujinx.Graphics.Gpu.Image /// readonly struct FormatInfo { + /// + /// An invalid texture format. + /// + public static FormatInfo Invalid { get; } = new(0, 0, 0, 0, 0); + /// /// A default, generic RGBA8 texture format. /// @@ -23,7 +28,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// /// Must be 1 for non-compressed formats. /// - public int BlockWidth { get; } + public byte BlockWidth { get; } /// /// The block height for compressed formats. @@ -31,17 +36,17 @@ namespace Ryujinx.Graphics.Gpu.Image /// /// Must be 1 for non-compressed formats. /// - public int BlockHeight { get; } + public byte BlockHeight { get; } /// /// The number of bytes occupied by a single pixel in memory of the texture data. /// - public int BytesPerPixel { get; } + public byte BytesPerPixel { get; } /// /// The maximum number of components this format has defined (in RGBA order). /// - public int Components { get; } + public byte Components { get; } /// /// Whenever or not the texture format is a compressed format. Determined from block size. @@ -57,10 +62,10 @@ namespace Ryujinx.Graphics.Gpu.Image /// The number of bytes occupied by a single pixel in memory of the texture data public FormatInfo( Format format, - int blockWidth, - int blockHeight, - int bytesPerPixel, - int components) + byte blockWidth, + byte blockHeight, + byte bytesPerPixel, + byte components) { Format = format; BlockWidth = blockWidth; diff --git a/src/Ryujinx.Graphics.Gpu/Image/Sampler.cs b/src/Ryujinx.Graphics.Gpu/Image/Sampler.cs index d6a3d975b..b007c1591 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/Sampler.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/Sampler.cs @@ -13,6 +13,11 @@ namespace Ryujinx.Graphics.Gpu.Image /// public bool IsDisposed { get; private set; } + /// + /// True if the sampler has sRGB conversion enabled, false otherwise. + /// + public bool IsSrgb { get; } + /// /// Host sampler object. /// @@ -30,6 +35,8 @@ namespace Ryujinx.Graphics.Gpu.Image /// The Maxwell sampler descriptor public Sampler(GpuContext context, SamplerDescriptor descriptor) { + IsSrgb = descriptor.UnpackSrgb(); + MinFilter minFilter = descriptor.UnpackMinFilter(); MagFilter magFilter = descriptor.UnpackMagFilter(); diff --git a/src/Ryujinx.Graphics.Gpu/Image/SamplerDescriptor.cs b/src/Ryujinx.Graphics.Gpu/Image/SamplerDescriptor.cs index e04c31dfa..836a3260c 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/SamplerDescriptor.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/SamplerDescriptor.cs @@ -113,6 +113,15 @@ namespace Ryujinx.Graphics.Gpu.Image return (CompareOp)(((Word0 >> 10) & 7) + 1); } + /// + /// Unpacks the sampler sRGB format flag. + /// + /// True if the has sampler is sRGB conversion enabled, false otherwise + public readonly bool UnpackSrgb() + { + return (Word0 & (1 << 13)) != 0; + } + /// /// Unpacks and converts the maximum anisotropy value used for texture anisotropic filtering. /// diff --git a/src/Ryujinx.Graphics.Gpu/Image/Texture.cs b/src/Ryujinx.Graphics.Gpu/Image/Texture.cs index 3b6c407cc..7ee2e5cf0 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/Texture.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/Texture.cs @@ -7,7 +7,6 @@ using Ryujinx.Graphics.Texture.Astc; using Ryujinx.Memory; using Ryujinx.Memory.Range; using System; -using System.Buffers; using System.Collections.Generic; using System.Diagnostics; using System.Linq; @@ -662,7 +661,7 @@ namespace Ryujinx.Graphics.Gpu.Image } } - IMemoryOwner result = ConvertToHostCompatibleFormat(data); + MemoryOwner result = ConvertToHostCompatibleFormat(data); if (ScaleFactor != 1f && AllowScaledSetData()) { @@ -685,7 +684,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// Uploads new texture data to the host GPU. /// /// New data - public void SetData(IMemoryOwner data) + public void SetData(MemoryOwner data) { BlacklistScale(); @@ -704,7 +703,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// New data /// Target layer /// Target level - public void SetData(IMemoryOwner data, int layer, int level) + public void SetData(MemoryOwner data, int layer, int level) { BlacklistScale(); @@ -722,7 +721,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// Target layer /// Target level /// Target sub-region of the texture to update - public void SetData(IMemoryOwner data, int layer, int level, Rectangle region) + public void SetData(MemoryOwner data, int layer, int level, Rectangle region) { BlacklistScale(); @@ -740,7 +739,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// Mip level to convert /// True to convert a single slice /// Converted data - public IMemoryOwner ConvertToHostCompatibleFormat(ReadOnlySpan data, int level = 0, bool single = false) + public MemoryOwner ConvertToHostCompatibleFormat(ReadOnlySpan data, int level = 0, bool single = false) { int width = Info.Width; int height = Info.Height; @@ -755,7 +754,7 @@ namespace Ryujinx.Graphics.Gpu.Image int sliceDepth = single ? 1 : depth; - IMemoryOwner linear; + MemoryOwner linear; if (Info.IsLinear) { @@ -788,7 +787,7 @@ namespace Ryujinx.Graphics.Gpu.Image data); } - IMemoryOwner result = linear; + MemoryOwner result = linear; // Handle compressed cases not supported by the host: // - ASTC is usually not supported on desktop cards. @@ -832,19 +831,19 @@ namespace Ryujinx.Graphics.Gpu.Image case Format.Etc2RgbaUnorm: using (result) { - return ETC2Decoder.DecodeRgba(result.Memory.Span, width, height, sliceDepth, levels, layers); + return ETC2Decoder.DecodeRgba(result.Span, width, height, sliceDepth, levels, layers); } case Format.Etc2RgbPtaSrgb: case Format.Etc2RgbPtaUnorm: using (result) { - return ETC2Decoder.DecodePta(result.Memory.Span, width, height, sliceDepth, levels, layers); + return ETC2Decoder.DecodePta(result.Span, width, height, sliceDepth, levels, layers); } case Format.Etc2RgbSrgb: case Format.Etc2RgbUnorm: using (result) { - return ETC2Decoder.DecodeRgb(result.Memory.Span, width, height, sliceDepth, levels, layers); + return ETC2Decoder.DecodeRgb(result.Span, width, height, sliceDepth, levels, layers); } } } @@ -856,43 +855,43 @@ namespace Ryujinx.Graphics.Gpu.Image case Format.Bc1RgbaUnorm: using (result) { - return BCnDecoder.DecodeBC1(result.Memory.Span, width, height, sliceDepth, levels, layers); + return BCnDecoder.DecodeBC1(result.Span, width, height, sliceDepth, levels, layers); } case Format.Bc2Srgb: case Format.Bc2Unorm: using (result) { - return BCnDecoder.DecodeBC2(result.Memory.Span, width, height, sliceDepth, levels, layers); + return BCnDecoder.DecodeBC2(result.Span, width, height, sliceDepth, levels, layers); } case Format.Bc3Srgb: case Format.Bc3Unorm: using (result) { - return BCnDecoder.DecodeBC3(result.Memory.Span, width, height, sliceDepth, levels, layers); + return BCnDecoder.DecodeBC3(result.Span, width, height, sliceDepth, levels, layers); } case Format.Bc4Snorm: case Format.Bc4Unorm: using (result) { - return BCnDecoder.DecodeBC4(result.Memory.Span, width, height, sliceDepth, levels, layers, Format == Format.Bc4Snorm); + return BCnDecoder.DecodeBC4(result.Span, width, height, sliceDepth, levels, layers, Format == Format.Bc4Snorm); } case Format.Bc5Snorm: case Format.Bc5Unorm: using (result) { - return BCnDecoder.DecodeBC5(result.Memory.Span, width, height, sliceDepth, levels, layers, Format == Format.Bc5Snorm); + return BCnDecoder.DecodeBC5(result.Span, width, height, sliceDepth, levels, layers, Format == Format.Bc5Snorm); } case Format.Bc6HSfloat: case Format.Bc6HUfloat: using (result) { - return BCnDecoder.DecodeBC6(result.Memory.Span, width, height, sliceDepth, levels, layers, Format == Format.Bc6HSfloat); + return BCnDecoder.DecodeBC6(result.Span, width, height, sliceDepth, levels, layers, Format == Format.Bc6HSfloat); } case Format.Bc7Srgb: case Format.Bc7Unorm: using (result) { - return BCnDecoder.DecodeBC7(result.Memory.Span, width, height, sliceDepth, levels, layers); + return BCnDecoder.DecodeBC7(result.Span, width, height, sliceDepth, levels, layers); } } } @@ -900,7 +899,7 @@ namespace Ryujinx.Graphics.Gpu.Image { using (result) { - var converted = PixelConverter.ConvertR4G4ToR4G4B4A4(result.Memory.Span, width); + var converted = PixelConverter.ConvertR4G4ToR4G4B4A4(result.Span, width); if (_context.Capabilities.SupportsR4G4B4A4Format) { @@ -910,7 +909,7 @@ namespace Ryujinx.Graphics.Gpu.Image { using (converted) { - return PixelConverter.ConvertR4G4B4A4ToR8G8B8A8(converted.Memory.Span, width); + return PixelConverter.ConvertR4G4B4A4ToR8G8B8A8(converted.Span, width); } } } @@ -921,7 +920,7 @@ namespace Ryujinx.Graphics.Gpu.Image { using (result) { - return PixelConverter.ConvertR4G4B4A4ToR8G8B8A8(result.Memory.Span, width); + return PixelConverter.ConvertR4G4B4A4ToR8G8B8A8(result.Span, width); } } } @@ -933,24 +932,24 @@ namespace Ryujinx.Graphics.Gpu.Image case Format.R5G6B5Unorm: using (result) { - return PixelConverter.ConvertR5G6B5ToR8G8B8A8(result.Memory.Span, width); + return PixelConverter.ConvertR5G6B5ToR8G8B8A8(result.Span, width); } case Format.B5G5R5A1Unorm: case Format.R5G5B5X1Unorm: case Format.R5G5B5A1Unorm: using (result) { - return PixelConverter.ConvertR5G5B5ToR8G8B8A8(result.Memory.Span, width, Format == Format.R5G5B5X1Unorm); + return PixelConverter.ConvertR5G5B5ToR8G8B8A8(result.Span, width, Format == Format.R5G5B5X1Unorm); } case Format.A1B5G5R5Unorm: using (result) { - return PixelConverter.ConvertA1B5G5R5ToR8G8B8A8(result.Memory.Span, width); + return PixelConverter.ConvertA1B5G5R5ToR8G8B8A8(result.Span, width); } case Format.R4G4B4A4Unorm: using (result) { - return PixelConverter.ConvertR4G4B4A4ToR8G8B8A8(result.Memory.Span, width); + return PixelConverter.ConvertR4G4B4A4ToR8G8B8A8(result.Span, width); } } } diff --git a/src/Ryujinx.Graphics.Gpu/Image/TextureBindingInfo.cs b/src/Ryujinx.Graphics.Gpu/Image/TextureBindingInfo.cs index 31abc21e8..e9930405b 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TextureBindingInfo.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TextureBindingInfo.cs @@ -17,7 +17,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// /// For images, indicates the format specified on the shader. /// - public Format Format { get; } + public FormatInfo FormatInfo { get; } /// /// Shader texture host set index. @@ -58,17 +58,17 @@ namespace Ryujinx.Graphics.Gpu.Image /// Constructs the texture binding information structure. /// /// The shader sampler target type - /// Format of the image as declared on the shader + /// Format of the image as declared on the shader /// Shader texture host set index /// The shader texture binding point /// For array of textures, this indicates the length of the array. A value of one indicates it is not an array /// Constant buffer slot where the texture handle is located /// The shader texture handle (read index into the texture constant buffer) /// The texture's usage flags, indicating how it is used in the shader - public TextureBindingInfo(Target target, Format format, int set, int binding, int arrayLength, int cbufSlot, int handle, TextureUsageFlags flags) + public TextureBindingInfo(Target target, FormatInfo formatInfo, int set, int binding, int arrayLength, int cbufSlot, int handle, TextureUsageFlags flags) { Target = target; - Format = format; + FormatInfo = formatInfo; Set = set; Binding = binding; ArrayLength = arrayLength; @@ -96,7 +96,7 @@ namespace Ryujinx.Graphics.Gpu.Image int cbufSlot, int handle, TextureUsageFlags flags, - bool isSamplerOnly) : this(target, 0, set, binding, arrayLength, cbufSlot, handle, flags) + bool isSamplerOnly) : this(target, FormatInfo.Invalid, set, binding, arrayLength, cbufSlot, handle, flags) { IsSamplerOnly = isSamplerOnly; } diff --git a/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs b/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs index 8b9243b1e..72bac75e5 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs @@ -659,7 +659,6 @@ namespace Ryujinx.Graphics.Gpu.Image int length = (isSampler ? samplerPool.MaximumId : texturePool.MaximumId) + 1; length = Math.Min(length, bindingInfo.ArrayLength); - Format[] formats = isImage ? new Format[bindingInfo.ArrayLength] : null; ISampler[] samplers = isImage ? null : new ISampler[bindingInfo.ArrayLength]; ITexture[] textures = new ITexture[bindingInfo.ArrayLength]; @@ -674,7 +673,7 @@ namespace Ryujinx.Graphics.Gpu.Image } else { - ref readonly TextureDescriptor descriptor = ref texturePool.GetForBinding(index, out texture); + ref readonly TextureDescriptor descriptor = ref texturePool.GetForBinding(index, bindingInfo.FormatInfo, out texture); if (texture != null) { @@ -697,8 +696,6 @@ namespace Ryujinx.Graphics.Gpu.Image ITexture hostTexture = texture?.GetTargetTexture(bindingInfo.Target); ISampler hostSampler = sampler?.GetHostSampler(texture); - Format format = bindingInfo.Format; - if (hostTexture != null && texture.Target == Target.TextureBuffer) { // Ensure that the buffer texture is using the correct buffer as storage. @@ -706,26 +703,15 @@ namespace Ryujinx.Graphics.Gpu.Image // to ensure we're not using a old buffer that was already deleted. if (isImage) { - if (format == 0 && texture != null) - { - format = texture.Format; - } - - _channel.BufferManager.SetBufferTextureStorage(stage, entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format); + _channel.BufferManager.SetBufferTextureStorage(stage, entry.ImageArray, hostTexture, texture.Range, bindingInfo, index); } else { - _channel.BufferManager.SetBufferTextureStorage(stage, entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format); + _channel.BufferManager.SetBufferTextureStorage(stage, entry.TextureArray, hostTexture, texture.Range, bindingInfo, index); } } else if (isImage) { - if (format == 0 && texture != null) - { - format = texture.Format; - } - - formats[index] = format; textures[index] = hostTexture; } else @@ -737,7 +723,6 @@ namespace Ryujinx.Graphics.Gpu.Image if (isImage) { - entry.ImageArray.SetFormats(0, formats); entry.ImageArray.SetImages(0, textures); SetImageArray(stage, bindingInfo, entry.ImageArray); @@ -863,7 +848,6 @@ namespace Ryujinx.Graphics.Gpu.Image entry.UpdateData(cachedTextureBuffer, cachedSamplerBuffer, separateSamplerBuffer); - Format[] formats = isImage ? new Format[bindingInfo.ArrayLength] : null; ISampler[] samplers = isImage ? null : new ISampler[bindingInfo.ArrayLength]; ITexture[] textures = new ITexture[bindingInfo.ArrayLength]; @@ -883,7 +867,7 @@ namespace Ryujinx.Graphics.Gpu.Image samplerId = TextureHandle.UnpackSamplerId(packedId); } - ref readonly TextureDescriptor descriptor = ref texturePool.GetForBinding(textureId, out Texture texture); + ref readonly TextureDescriptor descriptor = ref texturePool.GetForBinding(textureId, bindingInfo.FormatInfo, out Texture texture); if (texture != null) { @@ -916,8 +900,6 @@ namespace Ryujinx.Graphics.Gpu.Image hostSampler = sampler?.GetHostSampler(texture); } - Format format = bindingInfo.Format; - if (hostTexture != null && texture.Target == Target.TextureBuffer) { // Ensure that the buffer texture is using the correct buffer as storage. @@ -925,26 +907,15 @@ namespace Ryujinx.Graphics.Gpu.Image // to ensure we're not using a old buffer that was already deleted. if (isImage) { - if (format == 0 && texture != null) - { - format = texture.Format; - } - - _channel.BufferManager.SetBufferTextureStorage(stage, entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format); + _channel.BufferManager.SetBufferTextureStorage(stage, entry.ImageArray, hostTexture, texture.Range, bindingInfo, index); } else { - _channel.BufferManager.SetBufferTextureStorage(stage, entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format); + _channel.BufferManager.SetBufferTextureStorage(stage, entry.TextureArray, hostTexture, texture.Range, bindingInfo, index); } } else if (isImage) { - if (format == 0 && texture != null) - { - format = texture.Format; - } - - formats[index] = format; textures[index] = hostTexture; } else @@ -956,7 +927,6 @@ namespace Ryujinx.Graphics.Gpu.Image if (isImage) { - entry.ImageArray.SetFormats(0, formats); entry.ImageArray.SetImages(0, textures); SetImageArray(stage, bindingInfo, entry.ImageArray); diff --git a/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs b/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs index 9f1f60d95..f96ddfb1b 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs @@ -187,7 +187,9 @@ namespace Ryujinx.Graphics.Gpu.Image { (TexturePool texturePool, SamplerPool samplerPool) = GetPools(); - return (texturePool.Get(textureId), samplerPool.Get(samplerId)); + Sampler sampler = samplerPool?.Get(samplerId); + + return (texturePool.Get(textureId, sampler?.IsSrgb ?? true), sampler); } /// @@ -508,12 +510,12 @@ namespace Ryujinx.Graphics.Gpu.Image state.TextureHandle = textureId; state.SamplerHandle = samplerId; - ref readonly TextureDescriptor descriptor = ref texturePool.GetForBinding(textureId, out Texture texture); + Sampler sampler = samplerPool?.Get(samplerId); + + ref readonly TextureDescriptor descriptor = ref texturePool.GetForBinding(textureId, sampler?.IsSrgb ?? true, out Texture texture); specStateMatches &= specState.MatchesTexture(stage, index, descriptor); - Sampler sampler = samplerPool?.Get(samplerId); - ITexture hostTexture = texture?.GetTargetTexture(bindingInfo.Target); ISampler hostSampler = sampler?.GetHostSampler(texture); @@ -522,7 +524,7 @@ namespace Ryujinx.Graphics.Gpu.Image // Ensure that the buffer texture is using the correct buffer as storage. // Buffers are frequently re-created to accommodate larger data, so we need to re-bind // to ensure we're not using a old buffer that was already deleted. - _channel.BufferManager.SetBufferTextureStorage(stage, hostTexture, texture.Range, bindingInfo, bindingInfo.Format, false); + _channel.BufferManager.SetBufferTextureStorage(stage, hostTexture, texture.Range, bindingInfo, false); // Cache is not used for buffer texture, it must always rebind. state.CachedTexture = null; @@ -616,6 +618,7 @@ namespace Ryujinx.Graphics.Gpu.Image if (!poolModified && state.TextureHandle == textureId && + state.ImageFormat == bindingInfo.FormatInfo.Format && state.CachedTexture != null && state.CachedTexture.InvalidatedSequence == state.InvalidatedSequence) { @@ -629,26 +632,22 @@ namespace Ryujinx.Graphics.Gpu.Image cachedTexture.SignalModified(); } - Format format = bindingInfo.Format == 0 ? cachedTexture.Format : bindingInfo.Format; - - if (state.ImageFormat != format || - ((usageFlags & TextureUsageFlags.NeedsScaleValue) != 0 && - UpdateScale(state.CachedTexture, usageFlags, scaleIndex, stage))) + if ((usageFlags & TextureUsageFlags.NeedsScaleValue) != 0 && UpdateScale(state.CachedTexture, usageFlags, scaleIndex, stage)) { ITexture hostTextureRebind = state.CachedTexture.GetTargetTexture(bindingInfo.Target); state.Texture = hostTextureRebind; - state.ImageFormat = format; - _context.Renderer.Pipeline.SetImage(stage, bindingInfo.Binding, hostTextureRebind, format); + _context.Renderer.Pipeline.SetImage(stage, bindingInfo.Binding, hostTextureRebind); } continue; } state.TextureHandle = textureId; + state.ImageFormat = bindingInfo.FormatInfo.Format; - ref readonly TextureDescriptor descriptor = ref pool.GetForBinding(textureId, out Texture texture); + ref readonly TextureDescriptor descriptor = ref pool.GetForBinding(textureId, bindingInfo.FormatInfo, out Texture texture); specStateMatches &= specState.MatchesImage(stage, index, descriptor); @@ -660,14 +659,7 @@ namespace Ryujinx.Graphics.Gpu.Image // Buffers are frequently re-created to accommodate larger data, so we need to re-bind // to ensure we're not using a old buffer that was already deleted. - Format format = bindingInfo.Format; - - if (format == 0 && texture != null) - { - format = texture.Format; - } - - _channel.BufferManager.SetBufferTextureStorage(stage, hostTexture, texture.Range, bindingInfo, format, true); + _channel.BufferManager.SetBufferTextureStorage(stage, hostTexture, texture.Range, bindingInfo, true); // Cache is not used for buffer texture, it must always rebind. state.CachedTexture = null; @@ -689,16 +681,7 @@ namespace Ryujinx.Graphics.Gpu.Image { state.Texture = hostTexture; - Format format = bindingInfo.Format; - - if (format == 0 && texture != null) - { - format = texture.Format; - } - - state.ImageFormat = format; - - _context.Renderer.Pipeline.SetImage(stage, bindingInfo.Binding, hostTexture, format); + _context.Renderer.Pipeline.SetImage(stage, bindingInfo.Binding, hostTexture); } state.CachedTexture = texture; diff --git a/src/Ryujinx.Graphics.Gpu/Image/TextureCache.cs b/src/Ryujinx.Graphics.Gpu/Image/TextureCache.cs index b6fa842e3..5a3319b06 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TextureCache.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TextureCache.cs @@ -347,6 +347,53 @@ namespace Ryujinx.Graphics.Gpu.Image return texture; } + /// + /// Tries to find an existing texture, or create a new one if not found. + /// + /// GPU memory manager where the texture is mapped + /// Format of the texture + /// GPU virtual address of the texture + /// Texture width in bytes + /// Texture height + /// Texture stride if linear, otherwise ignored + /// Indicates if the texture is linear or block linear + /// GOB blocks in Y for block linear textures + /// GOB blocks in Z for 3D block linear textures + /// The texture + public Texture FindOrCreateTexture( + MemoryManager memoryManager, + FormatInfo formatInfo, + ulong gpuAddress, + int xCount, + int yCount, + int stride, + bool isLinear, + int gobBlocksInY, + int gobBlocksInZ) + { + TextureInfo info = new( + gpuAddress, + xCount / formatInfo.BytesPerPixel, + yCount, + 1, + 1, + 1, + 1, + stride, + isLinear, + gobBlocksInY, + gobBlocksInZ, + 1, + Target.Texture2D, + formatInfo); + + Texture texture = FindOrCreateTexture(memoryManager, TextureSearchFlags.ForCopy, info, 0, sizeHint: new Size(xCount, yCount, 1)); + + texture?.SynchronizeMemory(); + + return texture; + } + /// /// Tries to find an existing texture, or create a new one if not found. /// diff --git a/src/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs b/src/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs index 3cdeac9c5..8bed6363b 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs @@ -739,7 +739,8 @@ namespace Ryujinx.Graphics.Gpu.Image } return (lhsFormat.Format == Format.R8G8B8A8Unorm && rhsFormat.Format == Format.R32G32B32A32Float) || - (lhsFormat.Format == Format.R8Unorm && rhsFormat.Format == Format.R8G8B8A8Unorm); + (lhsFormat.Format == Format.R8Unorm && rhsFormat.Format == Format.R8G8B8A8Unorm) || + (lhsFormat.Format == Format.R8Unorm && rhsFormat.Format == Format.R32Uint); } /// diff --git a/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs b/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs index 06ca2c599..526fc0c24 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs @@ -1,3 +1,4 @@ +using Ryujinx.Common.Memory; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Texture; @@ -5,7 +6,6 @@ using Ryujinx.Memory; using Ryujinx.Memory.Range; using Ryujinx.Memory.Tracking; using System; -using System.Buffers; using System.Collections.Generic; using System.Runtime.CompilerServices; @@ -445,7 +445,7 @@ namespace Ryujinx.Graphics.Gpu.Image ReadOnlySpan data = dataSpan[(offset - spanBase)..]; - IMemoryOwner result = Storage.ConvertToHostCompatibleFormat(data, info.BaseLevel + level, true); + MemoryOwner result = Storage.ConvertToHostCompatibleFormat(data, info.BaseLevel + level, true); Storage.SetData(result, info.BaseLayer + layer, info.BaseLevel + level); } diff --git a/src/Ryujinx.Graphics.Gpu/Image/TexturePool.cs b/src/Ryujinx.Graphics.Gpu/Image/TexturePool.cs index 4ed0a93c1..be7cb0b89 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TexturePool.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TexturePool.cs @@ -75,6 +75,76 @@ namespace Ryujinx.Graphics.Gpu.Image private readonly ConcurrentQueue _dereferenceQueue = new(); private TextureDescriptor _defaultDescriptor; + /// + /// List of textures that shares the same memory region, but have different formats. + /// + private class TextureAliasList + { + /// + /// Alias texture. + /// + /// Texture format + /// Texture + private readonly record struct Alias(Format Format, Texture Texture); + + /// + /// List of texture aliases. + /// + private readonly List _aliases; + + /// + /// Creates a new instance of the texture alias list. + /// + public TextureAliasList() + { + _aliases = new List(); + } + + /// + /// Adds a new texture alias. + /// + /// Alias format + /// Alias texture + public void Add(Format format, Texture texture) + { + _aliases.Add(new Alias(format, texture)); + texture.IncrementReferenceCount(); + } + + /// + /// Finds a texture with the requested format, or returns null if not found. + /// + /// Format to find + /// Texture with the requested format, or null if not found + public Texture Find(Format format) + { + foreach (var alias in _aliases) + { + if (alias.Format == format) + { + return alias.Texture; + } + } + + return null; + } + + /// + /// Removes all alias textures. + /// + public void Destroy() + { + foreach (var entry in _aliases) + { + entry.Texture.DecrementReferenceCount(); + } + + _aliases.Clear(); + } + } + + private readonly Dictionary _aliasLists; + /// /// Linked list node used on the texture pool cache. /// @@ -95,6 +165,7 @@ namespace Ryujinx.Graphics.Gpu.Image public TexturePool(GpuContext context, GpuChannel channel, ulong address, int maximumId) : base(context, channel.MemoryManager.Physical, address, maximumId) { _channel = channel; + _aliasLists = new Dictionary(); } /// @@ -115,14 +186,13 @@ namespace Ryujinx.Graphics.Gpu.Image if (texture == null) { - TextureInfo info = GetInfo(descriptor, out int layerSize); - // The dereference queue can put our texture back on the cache. if ((texture = ProcessDereferenceQueue(id)) != null) { return ref descriptor; } + TextureInfo info = GetInfo(descriptor, out int layerSize); texture = PhysicalMemory.TextureCache.FindOrCreateTexture(_channel.MemoryManager, TextureSearchFlags.ForSampler, info, layerSize); // If this happens, then the texture address is invalid, we can't add it to the cache. @@ -157,6 +227,17 @@ namespace Ryujinx.Graphics.Gpu.Image /// ID of the texture. This is effectively a zero-based index /// The texture with the given ID public override Texture Get(int id) + { + return Get(id, srgbSampler: true); + } + + /// + /// Gets the texture with the given ID. + /// + /// ID of the texture. This is effectively a zero-based index + /// Whether the texture is being accessed with a sampler that has sRGB conversion enabled + /// The texture with the given ID + public Texture Get(int id, bool srgbSampler) { if ((uint)id >= Items.Length) { @@ -170,7 +251,7 @@ namespace Ryujinx.Graphics.Gpu.Image SynchronizeMemory(); } - GetInternal(id, out Texture texture); + GetForBinding(id, srgbSampler, out Texture texture); return texture; } @@ -182,9 +263,10 @@ namespace Ryujinx.Graphics.Gpu.Image /// This method assumes that the pool has been manually synchronized before doing binding. /// /// ID of the texture. This is effectively a zero-based index + /// Whether the texture is being accessed with a sampler that has sRGB conversion enabled /// The texture with the given ID /// The texture descriptor with the given ID - public ref readonly TextureDescriptor GetForBinding(int id, out Texture texture) + public ref readonly TextureDescriptor GetForBinding(int id, bool srgbSampler, out Texture texture) { if ((uint)id >= Items.Length) { @@ -194,9 +276,66 @@ namespace Ryujinx.Graphics.Gpu.Image // When getting for binding, assume the pool has already been synchronized. + if (!srgbSampler) + { + // If the sampler does not have the sRGB bit enabled, then the texture can't use a sRGB format. + ref readonly TextureDescriptor tempDescriptor = ref GetDescriptorRef(id); + + if (tempDescriptor.UnpackSrgb() && FormatTable.TryGetTextureFormat(tempDescriptor.UnpackFormat(), isSrgb: false, out FormatInfo formatInfo)) + { + // Get a view of the texture with the right format. + return ref GetForBinding(id, formatInfo, out texture); + } + } + return ref GetInternal(id, out texture); } + /// + /// Gets the texture descriptor and texture with the given ID. + /// + /// + /// This method assumes that the pool has been manually synchronized before doing binding. + /// + /// ID of the texture. This is effectively a zero-based index + /// Texture format information + /// The texture with the given ID + /// The texture descriptor with the given ID + public ref readonly TextureDescriptor GetForBinding(int id, FormatInfo formatInfo, out Texture texture) + { + if ((uint)id >= Items.Length) + { + texture = null; + return ref _defaultDescriptor; + } + + ref readonly TextureDescriptor descriptor = ref GetInternal(id, out texture); + + if (texture != null && formatInfo.Format != 0 && texture.Format != formatInfo.Format) + { + if (!_aliasLists.TryGetValue(texture, out TextureAliasList aliasList)) + { + _aliasLists.Add(texture, aliasList = new TextureAliasList()); + } + + texture = aliasList.Find(formatInfo.Format); + + if (texture == null) + { + TextureInfo info = GetInfo(descriptor, out int layerSize); + info = ChangeFormat(info, formatInfo); + texture = PhysicalMemory.TextureCache.FindOrCreateTexture(_channel.MemoryManager, TextureSearchFlags.ForSampler, info, layerSize); + + if (texture != null) + { + aliasList.Add(formatInfo.Format, texture); + } + } + } + + return ref descriptor; + } + /// /// Checks if the pool was modified, and returns the last sequence number where a modification was detected. /// @@ -234,6 +373,7 @@ namespace Ryujinx.Graphics.Gpu.Image else { texture.DecrementReferenceCount(); + RemoveAliasList(texture); } } @@ -327,6 +467,8 @@ namespace Ryujinx.Graphics.Gpu.Image { texture.DecrementReferenceCount(); } + + RemoveAliasList(texture); } return null; @@ -369,6 +511,7 @@ namespace Ryujinx.Graphics.Gpu.Image if (Interlocked.Exchange(ref Items[id], null) != null) { texture.DecrementReferenceCount(this, id); + RemoveAliasList(texture); } } } @@ -622,6 +765,57 @@ namespace Ryujinx.Graphics.Gpu.Image component == SwizzleComponent.Green; } + /// + /// Changes the format on the texture information structure, and also adjusts the width for the new format if needed. + /// + /// Texture information + /// New format + /// Texture information with the new format + private static TextureInfo ChangeFormat(in TextureInfo info, FormatInfo dstFormat) + { + int width = info.Width; + + if (info.FormatInfo.BytesPerPixel != dstFormat.BytesPerPixel) + { + int stride = width * info.FormatInfo.BytesPerPixel; + width = stride / dstFormat.BytesPerPixel; + } + + return new TextureInfo( + info.GpuAddress, + width, + info.Height, + info.DepthOrLayers, + info.Levels, + info.SamplesInX, + info.SamplesInY, + info.Stride, + info.IsLinear, + info.GobBlocksInY, + info.GobBlocksInZ, + info.GobBlocksInTileX, + info.Target, + dstFormat, + info.DepthStencilMode, + info.SwizzleR, + info.SwizzleG, + info.SwizzleB, + info.SwizzleA); + } + + /// + /// Removes all aliases for a texture. + /// + /// Texture to have the aliases removed + private void RemoveAliasList(Texture texture) + { + if (_aliasLists.TryGetValue(texture, out TextureAliasList aliasList)) + { + _aliasLists.Remove(texture); + aliasList.Destroy(); + } + } + /// /// Decrements the reference count of the texture. /// This indicates that the texture pool is not using it anymore. @@ -629,7 +823,11 @@ namespace Ryujinx.Graphics.Gpu.Image /// The texture to be deleted protected override void Delete(Texture item) { - item?.DecrementReferenceCount(this); + if (item != null) + { + item.DecrementReferenceCount(this); + RemoveAliasList(item); + } } public override void Dispose() diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs index 26d9501c6..409867e09 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs @@ -509,7 +509,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (binding.IsImage) { - _context.Renderer.Pipeline.SetImage(binding.Stage, binding.BindingInfo.Binding, binding.Texture, binding.Format); + _context.Renderer.Pipeline.SetImage(binding.Stage, binding.BindingInfo.Binding, binding.Texture); } else { @@ -873,12 +873,11 @@ namespace Ryujinx.Graphics.Gpu.Memory ITexture texture, MultiRange range, TextureBindingInfo bindingInfo, - Format format, bool isImage) { _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range, BufferStageUtils.TextureBuffer(stage, bindingInfo.Flags)); - _bufferTextures.Add(new BufferTextureBinding(stage, texture, range, bindingInfo, format, isImage)); + _bufferTextures.Add(new BufferTextureBinding(stage, texture, range, bindingInfo, isImage)); } /// @@ -897,12 +896,11 @@ namespace Ryujinx.Graphics.Gpu.Memory ITexture texture, MultiRange range, TextureBindingInfo bindingInfo, - int index, - Format format) + int index) { _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range, BufferStageUtils.TextureBuffer(stage, bindingInfo.Flags)); - _bufferTextureArrays.Add(new BufferTextureArrayBinding(array, texture, range, bindingInfo, index, format)); + _bufferTextureArrays.Add(new BufferTextureArrayBinding(array, texture, range, bindingInfo, index)); } /// @@ -921,12 +919,11 @@ namespace Ryujinx.Graphics.Gpu.Memory ITexture texture, MultiRange range, TextureBindingInfo bindingInfo, - int index, - Format format) + int index) { _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range, BufferStageUtils.TextureBuffer(stage, bindingInfo.Flags)); - _bufferImageArrays.Add(new BufferTextureArrayBinding(array, texture, range, bindingInfo, index, format)); + _bufferImageArrays.Add(new BufferTextureArrayBinding(array, texture, range, bindingInfo, index)); } /// diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferTextureArrayBinding.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferTextureArrayBinding.cs index fa79e4f92..a5338fa55 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferTextureArrayBinding.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferTextureArrayBinding.cs @@ -34,33 +34,26 @@ namespace Ryujinx.Graphics.Gpu.Memory /// public int Index { get; } - /// - /// The image format for the binding. - /// - public Format Format { get; } - /// /// Create a new buffer texture binding. /// + /// Array /// Buffer texture /// Physical ranges of memory where the buffer texture data is located /// Binding info /// Index of the binding on the array - /// Binding format public BufferTextureArrayBinding( T array, ITexture texture, MultiRange range, TextureBindingInfo bindingInfo, - int index, - Format format) + int index) { Array = array; Texture = texture; Range = range; BindingInfo = bindingInfo; Index = index; - Format = format; } } } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferTextureBinding.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferTextureBinding.cs index bf0beffa2..1a3fde5b6 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferTextureBinding.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferTextureBinding.cs @@ -30,11 +30,6 @@ namespace Ryujinx.Graphics.Gpu.Memory /// public TextureBindingInfo BindingInfo { get; } - /// - /// The image format for the binding. - /// - public Format Format { get; } - /// /// Whether the binding is for an image or a sampler. /// @@ -47,21 +42,18 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Buffer texture /// Physical ranges of memory where the buffer texture data is located /// Binding info - /// Binding format /// Whether the binding is for an image or a sampler public BufferTextureBinding( ShaderStage stage, ITexture texture, MultiRange range, TextureBindingInfo bindingInfo, - Format format, bool isImage) { Stage = stage; Texture = texture; Range = range; BindingInfo = bindingInfo; - Format = format; IsImage = isImage; } } diff --git a/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderBindings.cs b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderBindings.cs index 51be00b6e..018c5fdc0 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderBindings.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderBindings.cs @@ -86,11 +86,11 @@ namespace Ryujinx.Graphics.Gpu.Shader ImageBindings[i] = stage.Info.Images.Select(descriptor => { Target target = ShaderTexture.GetTarget(descriptor.Type); - Format format = ShaderTexture.GetFormat(descriptor.Format); + FormatInfo formatInfo = ShaderTexture.GetFormatInfo(descriptor.Format); var result = new TextureBindingInfo( target, - format, + formatInfo, descriptor.Set, descriptor.Binding, descriptor.ArrayLength, diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs index c1f592011..a5c5abd4b 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMinor = 2; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; - private const uint CodeGenVersion = 7131; + private const uint CodeGenVersion = 7331; private const string SharedTocFileName = "shared.toc"; private const string SharedDataFileName = "shared.data"; diff --git a/src/Ryujinx.Graphics.Gpu/Window.cs b/src/Ryujinx.Graphics.Gpu/Window.cs index 3b2368537..59cd4c8a6 100644 --- a/src/Ryujinx.Graphics.Gpu/Window.cs +++ b/src/Ryujinx.Graphics.Gpu/Window.cs @@ -131,7 +131,7 @@ namespace Ryujinx.Graphics.Gpu bool isLinear, int gobBlocksInY, Format format, - int bytesPerPixel, + byte bytesPerPixel, ImageCrop crop, Action acquireCallback, Action releaseCallback, diff --git a/src/Ryujinx.Graphics.OpenGL/Effects/AreaScalingFilter.cs b/src/Ryujinx.Graphics.OpenGL/Effects/AreaScalingFilter.cs new file mode 100644 index 000000000..9b19f2f26 --- /dev/null +++ b/src/Ryujinx.Graphics.OpenGL/Effects/AreaScalingFilter.cs @@ -0,0 +1,106 @@ +using OpenTK.Graphics.OpenGL; +using Ryujinx.Common; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.OpenGL.Image; +using System; +using static Ryujinx.Graphics.OpenGL.Effects.ShaderHelper; + +namespace Ryujinx.Graphics.OpenGL.Effects +{ + internal class AreaScalingFilter : IScalingFilter + { + private readonly OpenGLRenderer _renderer; + private int _inputUniform; + private int _outputUniform; + private int _srcX0Uniform; + private int _srcX1Uniform; + private int _srcY0Uniform; + private int _scalingShaderProgram; + private int _srcY1Uniform; + private int _dstX0Uniform; + private int _dstX1Uniform; + private int _dstY0Uniform; + private int _dstY1Uniform; + + public float Level { get; set; } + + public AreaScalingFilter(OpenGLRenderer renderer) + { + Initialize(); + + _renderer = renderer; + } + + public void Dispose() + { + if (_scalingShaderProgram != 0) + { + GL.DeleteProgram(_scalingShaderProgram); + } + } + + private void Initialize() + { + var scalingShader = EmbeddedResources.ReadAllText("Ryujinx.Graphics.OpenGL/Effects/Shaders/area_scaling.glsl"); + + _scalingShaderProgram = CompileProgram(scalingShader, ShaderType.ComputeShader); + + _inputUniform = GL.GetUniformLocation(_scalingShaderProgram, "Source"); + _outputUniform = GL.GetUniformLocation(_scalingShaderProgram, "imgOutput"); + + _srcX0Uniform = GL.GetUniformLocation(_scalingShaderProgram, "srcX0"); + _srcX1Uniform = GL.GetUniformLocation(_scalingShaderProgram, "srcX1"); + _srcY0Uniform = GL.GetUniformLocation(_scalingShaderProgram, "srcY0"); + _srcY1Uniform = GL.GetUniformLocation(_scalingShaderProgram, "srcY1"); + _dstX0Uniform = GL.GetUniformLocation(_scalingShaderProgram, "dstX0"); + _dstX1Uniform = GL.GetUniformLocation(_scalingShaderProgram, "dstX1"); + _dstY0Uniform = GL.GetUniformLocation(_scalingShaderProgram, "dstY0"); + _dstY1Uniform = GL.GetUniformLocation(_scalingShaderProgram, "dstY1"); + } + + public void Run( + TextureView view, + TextureView destinationTexture, + int width, + int height, + Extents2D source, + Extents2D destination) + { + int previousProgram = GL.GetInteger(GetPName.CurrentProgram); + int previousUnit = GL.GetInteger(GetPName.ActiveTexture); + GL.ActiveTexture(TextureUnit.Texture0); + int previousTextureBinding = GL.GetInteger(GetPName.TextureBinding2D); + + GL.BindImageTexture(0, destinationTexture.Handle, 0, false, 0, TextureAccess.ReadWrite, SizedInternalFormat.Rgba8); + + int threadGroupWorkRegionDim = 16; + int dispatchX = (width + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchY = (height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + + // Scaling pass + GL.UseProgram(_scalingShaderProgram); + view.Bind(0); + GL.Uniform1(_inputUniform, 0); + GL.Uniform1(_outputUniform, 0); + GL.Uniform1(_srcX0Uniform, (float)source.X1); + GL.Uniform1(_srcX1Uniform, (float)source.X2); + GL.Uniform1(_srcY0Uniform, (float)source.Y1); + GL.Uniform1(_srcY1Uniform, (float)source.Y2); + GL.Uniform1(_dstX0Uniform, (float)destination.X1); + GL.Uniform1(_dstX1Uniform, (float)destination.X2); + GL.Uniform1(_dstY0Uniform, (float)destination.Y1); + GL.Uniform1(_dstY1Uniform, (float)destination.Y2); + GL.DispatchCompute(dispatchX, dispatchY, 1); + + GL.UseProgram(previousProgram); + GL.MemoryBarrier(MemoryBarrierFlags.ShaderImageAccessBarrierBit); + + (_renderer.Pipeline as Pipeline).RestoreImages1And2(); + + GL.ActiveTexture(TextureUnit.Texture0); + GL.BindTexture(TextureTarget.Texture2D, previousTextureBinding); + + GL.ActiveTexture((TextureUnit)previousUnit); + } + } +} diff --git a/src/Ryujinx.Graphics.OpenGL/Effects/FsrScalingFilter.cs b/src/Ryujinx.Graphics.OpenGL/Effects/FsrScalingFilter.cs index 1a130bebb..0522e28e0 100644 --- a/src/Ryujinx.Graphics.OpenGL/Effects/FsrScalingFilter.cs +++ b/src/Ryujinx.Graphics.OpenGL/Effects/FsrScalingFilter.cs @@ -18,7 +18,7 @@ namespace Ryujinx.Graphics.OpenGL.Effects private int _srcY0Uniform; private int _scalingShaderProgram; private int _sharpeningShaderProgram; - private float _scale = 1; + private float _sharpeningLevel = 1; private int _srcY1Uniform; private int _dstX0Uniform; private int _dstX1Uniform; @@ -30,10 +30,10 @@ namespace Ryujinx.Graphics.OpenGL.Effects public float Level { - get => _scale; + get => _sharpeningLevel; set { - _scale = MathF.Max(0.01f, value); + _sharpeningLevel = MathF.Max(0.01f, value); } } diff --git a/src/Ryujinx.Graphics.OpenGL/Effects/ShaderHelper.cs b/src/Ryujinx.Graphics.OpenGL/Effects/ShaderHelper.cs index c25fe5b25..637b2fba8 100644 --- a/src/Ryujinx.Graphics.OpenGL/Effects/ShaderHelper.cs +++ b/src/Ryujinx.Graphics.OpenGL/Effects/ShaderHelper.cs @@ -1,4 +1,5 @@ using OpenTK.Graphics.OpenGL; +using Ryujinx.Common.Logging; namespace Ryujinx.Graphics.OpenGL.Effects { @@ -6,18 +7,7 @@ namespace Ryujinx.Graphics.OpenGL.Effects { public static int CompileProgram(string shaderCode, ShaderType shaderType) { - var shader = GL.CreateShader(shaderType); - GL.ShaderSource(shader, shaderCode); - GL.CompileShader(shader); - - var program = GL.CreateProgram(); - GL.AttachShader(program, shader); - GL.LinkProgram(program); - - GL.DetachShader(program, shader); - GL.DeleteShader(shader); - - return program; + return CompileProgram(new string[] { shaderCode }, shaderType); } public static int CompileProgram(string[] shaders, ShaderType shaderType) @@ -26,6 +16,15 @@ namespace Ryujinx.Graphics.OpenGL.Effects GL.ShaderSource(shader, shaders.Length, shaders, (int[])null); GL.CompileShader(shader); + GL.GetShader(shader, ShaderParameter.CompileStatus, out int isCompiled); + if (isCompiled == 0) + { + string log = GL.GetShaderInfoLog(shader); + Logger.Error?.Print(LogClass.Gpu, $"Failed to compile effect shader:\n\n{log}\n"); + GL.DeleteShader(shader); + return 0; + } + var program = GL.CreateProgram(); GL.AttachShader(program, shader); GL.LinkProgram(program); diff --git a/src/Ryujinx.Graphics.OpenGL/Effects/Shaders/area_scaling.glsl b/src/Ryujinx.Graphics.OpenGL/Effects/Shaders/area_scaling.glsl new file mode 100644 index 000000000..0fe20d3f9 --- /dev/null +++ b/src/Ryujinx.Graphics.OpenGL/Effects/Shaders/area_scaling.glsl @@ -0,0 +1,119 @@ +#version 430 core +precision mediump float; +layout (local_size_x = 16, local_size_y = 16) in; +layout(rgba8, binding = 0, location=0) uniform image2D imgOutput; +layout( location=1 ) uniform sampler2D Source; +layout( location=2 ) uniform float srcX0; +layout( location=3 ) uniform float srcX1; +layout( location=4 ) uniform float srcY0; +layout( location=5 ) uniform float srcY1; +layout( location=6 ) uniform float dstX0; +layout( location=7 ) uniform float dstX1; +layout( location=8 ) uniform float dstY0; +layout( location=9 ) uniform float dstY1; + +/***** Area Sampling *****/ + +// By Sam Belliveau and Filippo Tarpini. Public Domain license. +// Effectively a more accurate sharp bilinear filter when upscaling, +// that also works as a mathematically perfect downscale filter. +// https://entropymine.com/imageworsener/pixelmixing/ +// https://github.com/obsproject/obs-studio/pull/1715 +// https://legacy.imagemagick.org/Usage/filter/ +vec4 AreaSampling(vec2 xy) +{ + // Determine the sizes of the source and target images. + vec2 source_size = vec2(abs(srcX1 - srcX0), abs(srcY1 - srcY0)); + vec2 target_size = vec2(abs(dstX1 - dstX0), abs(dstY1 - dstY0)); + vec2 inverted_target_size = vec2(1.0) / target_size; + + // Compute the top-left and bottom-right corners of the target pixel box. + vec2 t_beg = floor(xy - vec2(dstX0 < dstX1 ? dstX0 : dstX1, dstY0 < dstY1 ? dstY0 : dstY1)); + vec2 t_end = t_beg + vec2(1.0, 1.0); + + // Convert the target pixel box to source pixel box. + vec2 beg = t_beg * inverted_target_size * source_size; + vec2 end = t_end * inverted_target_size * source_size; + + // Compute the top-left and bottom-right corners of the pixel box. + ivec2 f_beg = ivec2(beg); + ivec2 f_end = ivec2(end); + + // Compute how much of the start and end pixels are covered horizontally & vertically. + float area_w = 1.0 - fract(beg.x); + float area_n = 1.0 - fract(beg.y); + float area_e = fract(end.x); + float area_s = fract(end.y); + + // Compute the areas of the corner pixels in the pixel box. + float area_nw = area_n * area_w; + float area_ne = area_n * area_e; + float area_sw = area_s * area_w; + float area_se = area_s * area_e; + + // Initialize the color accumulator. + vec4 avg_color = vec4(0.0, 0.0, 0.0, 0.0); + + // Accumulate corner pixels. + avg_color += area_nw * texelFetch(Source, ivec2(f_beg.x, f_beg.y), 0); + avg_color += area_ne * texelFetch(Source, ivec2(f_end.x, f_beg.y), 0); + avg_color += area_sw * texelFetch(Source, ivec2(f_beg.x, f_end.y), 0); + avg_color += area_se * texelFetch(Source, ivec2(f_end.x, f_end.y), 0); + + // Determine the size of the pixel box. + int x_range = int(f_end.x - f_beg.x - 0.5); + int y_range = int(f_end.y - f_beg.y - 0.5); + + // Accumulate top and bottom edge pixels. + for (int x = f_beg.x + 1; x <= f_beg.x + x_range; ++x) + { + avg_color += area_n * texelFetch(Source, ivec2(x, f_beg.y), 0); + avg_color += area_s * texelFetch(Source, ivec2(x, f_end.y), 0); + } + + // Accumulate left and right edge pixels and all the pixels in between. + for (int y = f_beg.y + 1; y <= f_beg.y + y_range; ++y) + { + avg_color += area_w * texelFetch(Source, ivec2(f_beg.x, y), 0); + avg_color += area_e * texelFetch(Source, ivec2(f_end.x, y), 0); + + for (int x = f_beg.x + 1; x <= f_beg.x + x_range; ++x) + { + avg_color += texelFetch(Source, ivec2(x, y), 0); + } + } + + // Compute the area of the pixel box that was sampled. + float area_corners = area_nw + area_ne + area_sw + area_se; + float area_edges = float(x_range) * (area_n + area_s) + float(y_range) * (area_w + area_e); + float area_center = float(x_range) * float(y_range); + + // Return the normalized average color. + return avg_color / (area_corners + area_edges + area_center); +} + +float insideBox(vec2 v, vec2 bLeft, vec2 tRight) { + vec2 s = step(bLeft, v) - step(tRight, v); + return s.x * s.y; +} + +vec2 translateDest(vec2 pos) { + vec2 translatedPos = vec2(pos.x, pos.y); + translatedPos.x = dstX1 < dstX0 ? dstX1 - translatedPos.x : translatedPos.x; + translatedPos.y = dstY0 > dstY1 ? dstY0 + dstY1 - translatedPos.y - 1 : translatedPos.y; + return translatedPos; +} + +void main() +{ + vec2 bLeft = vec2(dstX0 < dstX1 ? dstX0 : dstX1, dstY0 < dstY1 ? dstY0 : dstY1); + vec2 tRight = vec2(dstX1 > dstX0 ? dstX1 : dstX0, dstY1 > dstY0 ? dstY1 : dstY0); + ivec2 loc = ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y); + if (insideBox(loc, bLeft, tRight) == 0) { + imageStore(imgOutput, loc, vec4(0, 0, 0, 1)); + return; + } + + vec4 outColor = AreaSampling(loc); + imageStore(imgOutput, ivec2(translateDest(loc)), vec4(outColor.rgb, 1)); +} diff --git a/src/Ryujinx.Graphics.OpenGL/Effects/Shaders/fsr_scaling.glsl b/src/Ryujinx.Graphics.OpenGL/Effects/Shaders/fsr_scaling.glsl index 8e8755db2..3c7d485b1 100644 --- a/src/Ryujinx.Graphics.OpenGL/Effects/Shaders/fsr_scaling.glsl +++ b/src/Ryujinx.Graphics.OpenGL/Effects/Shaders/fsr_scaling.glsl @@ -85,4 +85,4 @@ void main() { CurrFilter(gxy); gxy.x -= 8u; CurrFilter(gxy); -} \ No newline at end of file +} diff --git a/src/Ryujinx.Graphics.OpenGL/Image/ImageArray.cs b/src/Ryujinx.Graphics.OpenGL/Image/ImageArray.cs index 6198823d9..3486f29df 100644 --- a/src/Ryujinx.Graphics.OpenGL/Image/ImageArray.cs +++ b/src/Ryujinx.Graphics.OpenGL/Image/ImageArray.cs @@ -1,6 +1,5 @@ using OpenTK.Graphics.OpenGL; using Ryujinx.Graphics.GAL; -using System; namespace Ryujinx.Graphics.OpenGL.Image { @@ -19,14 +18,6 @@ namespace Ryujinx.Graphics.OpenGL.Image _images = new TextureRef[size]; } - public void SetFormats(int index, GAL.Format[] imageFormats) - { - for (int i = 0; i < imageFormats.Length; i++) - { - _images[index + i].Format = imageFormats[i]; - } - } - public void SetImages(int index, ITexture[] images) { for (int i = 0; i < images.Length; i++) @@ -36,6 +27,7 @@ namespace Ryujinx.Graphics.OpenGL.Image if (image is TextureBase imageBase) { _images[index + i].Handle = imageBase.Handle; + _images[index + i].Format = imageBase.Format; } else { diff --git a/src/Ryujinx.Graphics.OpenGL/Image/TextureBuffer.cs b/src/Ryujinx.Graphics.OpenGL/Image/TextureBuffer.cs index a8196541a..22f4c04cd 100644 --- a/src/Ryujinx.Graphics.OpenGL/Image/TextureBuffer.cs +++ b/src/Ryujinx.Graphics.OpenGL/Image/TextureBuffer.cs @@ -1,7 +1,7 @@ using OpenTK.Graphics.OpenGL; +using Ryujinx.Common.Memory; using Ryujinx.Graphics.GAL; using System; -using System.Buffers; namespace Ryujinx.Graphics.OpenGL.Image { @@ -55,9 +55,9 @@ namespace Ryujinx.Graphics.OpenGL.Image } /// - public void SetData(IMemoryOwner data) + public void SetData(MemoryOwner data) { - var dataSpan = data.Memory.Span; + var dataSpan = data.Span; Buffer.SetData(_buffer, _bufferOffset, dataSpan[..Math.Min(dataSpan.Length, _bufferSize)]); @@ -65,13 +65,13 @@ namespace Ryujinx.Graphics.OpenGL.Image } /// - public void SetData(IMemoryOwner data, int layer, int level) + public void SetData(MemoryOwner data, int layer, int level) { throw new NotSupportedException(); } /// - public void SetData(IMemoryOwner data, int layer, int level, Rectangle region) + public void SetData(MemoryOwner data, int layer, int level, Rectangle region) { throw new NotSupportedException(); } diff --git a/src/Ryujinx.Graphics.OpenGL/Image/TextureView.cs b/src/Ryujinx.Graphics.OpenGL/Image/TextureView.cs index 946eb755c..b0859c49e 100644 --- a/src/Ryujinx.Graphics.OpenGL/Image/TextureView.cs +++ b/src/Ryujinx.Graphics.OpenGL/Image/TextureView.cs @@ -1,8 +1,8 @@ using OpenTK.Graphics.OpenGL; using Ryujinx.Common; +using Ryujinx.Common.Memory; using Ryujinx.Graphics.GAL; using System; -using System.Buffers; using System.Diagnostics; namespace Ryujinx.Graphics.OpenGL.Image @@ -448,13 +448,13 @@ namespace Ryujinx.Graphics.OpenGL.Image } } - public void SetData(IMemoryOwner data) + public void SetData(MemoryOwner data) { using (data = EnsureDataFormat(data)) { unsafe { - var dataSpan = data.Memory.Span; + var dataSpan = data.Span; fixed (byte* ptr = dataSpan) { ReadFrom((IntPtr)ptr, dataSpan.Length); @@ -463,13 +463,13 @@ namespace Ryujinx.Graphics.OpenGL.Image } } - public void SetData(IMemoryOwner data, int layer, int level) + public void SetData(MemoryOwner data, int layer, int level) { using (data = EnsureDataFormat(data)) { unsafe { - fixed (byte* ptr = data.Memory.Span) + fixed (byte* ptr = data.Span) { int width = Math.Max(Info.Width >> level, 1); int height = Math.Max(Info.Height >> level, 1); @@ -480,7 +480,7 @@ namespace Ryujinx.Graphics.OpenGL.Image } } - public void SetData(IMemoryOwner data, int layer, int level, Rectangle region) + public void SetData(MemoryOwner data, int layer, int level, Rectangle region) { using (data = EnsureDataFormat(data)) { @@ -489,7 +489,7 @@ namespace Ryujinx.Graphics.OpenGL.Image unsafe { - fixed (byte* ptr = data.Memory.Span) + fixed (byte* ptr = data.Span) { ReadFrom2D( (IntPtr)ptr, @@ -522,13 +522,13 @@ namespace Ryujinx.Graphics.OpenGL.Image ReadFrom2D(data, layer, level, x, y, width, height, mipSize); } - private IMemoryOwner EnsureDataFormat(IMemoryOwner data) + private MemoryOwner EnsureDataFormat(MemoryOwner data) { if (Format == Format.S8UintD24Unorm) { using (data) { - return FormatConverter.ConvertS8D24ToD24S8(data.Memory.Span); + return FormatConverter.ConvertS8D24ToD24S8(data.Span); } } diff --git a/src/Ryujinx.Graphics.OpenGL/Pipeline.cs b/src/Ryujinx.Graphics.OpenGL/Pipeline.cs index de3269572..ac6bc3f17 100644 --- a/src/Ryujinx.Graphics.OpenGL/Pipeline.cs +++ b/src/Ryujinx.Graphics.OpenGL/Pipeline.cs @@ -45,7 +45,7 @@ namespace Ryujinx.Graphics.OpenGL private readonly Vector4[] _fpIsBgra = new Vector4[SupportBuffer.FragmentIsBgraCount]; - private readonly (TextureBase, Format)[] _images; + private readonly TextureBase[] _images; private TextureBase _unit0Texture; private Sampler _unit0Sampler; @@ -78,7 +78,7 @@ namespace Ryujinx.Graphics.OpenGL _fragmentOutputMap = uint.MaxValue; _componentMasks = uint.MaxValue; - _images = new (TextureBase, Format)[SavedImages]; + _images = new TextureBase[SavedImages]; _tfbs = new BufferHandle[Constants.MaxTransformFeedbackBuffers]; _tfbTargets = new BufferRange[Constants.MaxTransformFeedbackBuffers]; @@ -935,11 +935,11 @@ namespace Ryujinx.Graphics.OpenGL SetFrontFace(_frontFace = frontFace.Convert()); } - public void SetImage(ShaderStage stage, int binding, ITexture texture, Format imageFormat) + public void SetImage(ShaderStage stage, int binding, ITexture texture) { if ((uint)binding < SavedImages) { - _images[binding] = (texture as TextureBase, imageFormat); + _images[binding] = texture as TextureBase; } if (texture == null) @@ -950,7 +950,7 @@ namespace Ryujinx.Graphics.OpenGL TextureBase texBase = (TextureBase)texture; - SizedInternalFormat format = FormatTable.GetImageFormat(imageFormat); + SizedInternalFormat format = FormatTable.GetImageFormat(texBase.Format); if (format != 0) { @@ -1622,11 +1622,11 @@ namespace Ryujinx.Graphics.OpenGL { for (int i = 0; i < SavedImages; i++) { - (TextureBase texBase, Format imageFormat) = _images[i]; + TextureBase texBase = _images[i]; if (texBase != null) { - SizedInternalFormat format = FormatTable.GetImageFormat(imageFormat); + SizedInternalFormat format = FormatTable.GetImageFormat(texBase.Format); if (format != 0) { diff --git a/src/Ryujinx.Graphics.OpenGL/Ryujinx.Graphics.OpenGL.csproj b/src/Ryujinx.Graphics.OpenGL/Ryujinx.Graphics.OpenGL.csproj index 3d64da99b..f3071f486 100644 --- a/src/Ryujinx.Graphics.OpenGL/Ryujinx.Graphics.OpenGL.csproj +++ b/src/Ryujinx.Graphics.OpenGL/Ryujinx.Graphics.OpenGL.csproj @@ -21,6 +21,7 @@ + diff --git a/src/Ryujinx.Graphics.OpenGL/Window.cs b/src/Ryujinx.Graphics.OpenGL/Window.cs index 6bcfefa4e..285ab725e 100644 --- a/src/Ryujinx.Graphics.OpenGL/Window.cs +++ b/src/Ryujinx.Graphics.OpenGL/Window.cs @@ -373,6 +373,16 @@ namespace Ryujinx.Graphics.OpenGL _isLinear = false; _scalingFilter.Level = _scalingFilterLevel; + RecreateUpscalingTexture(); + break; + case ScalingFilter.Area: + if (_scalingFilter is not AreaScalingFilter) + { + _scalingFilter?.Dispose(); + _scalingFilter = new AreaScalingFilter(_renderer); + } + _isLinear = false; + RecreateUpscalingTexture(); break; } diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs index 40129252a..3fcb821d3 100644 --- a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs @@ -222,30 +222,14 @@ namespace Ryujinx.Graphics.Shader.Instructions context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}."); } break; - case AtomOp.And: - if (type == AtomSize.S32 || type == AtomSize.U32) + case AtomOp.Min: + if (type == AtomSize.S32) { - res = context.AtomicAnd(storageKind, e0, e1, value); + res = context.AtomicMinS32(storageKind, e0, e1, value); } - else + else if (type == AtomSize.U32) { - context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}."); - } - break; - case AtomOp.Xor: - if (type == AtomSize.S32 || type == AtomSize.U32) - { - res = context.AtomicXor(storageKind, e0, e1, value); - } - else - { - context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}."); - } - break; - case AtomOp.Or: - if (type == AtomSize.S32 || type == AtomSize.U32) - { - res = context.AtomicOr(storageKind, e0, e1, value); + res = context.AtomicMinU32(storageKind, e0, e1, value); } else { @@ -266,20 +250,49 @@ namespace Ryujinx.Graphics.Shader.Instructions context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}."); } break; - case AtomOp.Min: - if (type == AtomSize.S32) + case AtomOp.And: + if (type == AtomSize.S32 || type == AtomSize.U32) { - res = context.AtomicMinS32(storageKind, e0, e1, value); - } - else if (type == AtomSize.U32) - { - res = context.AtomicMinU32(storageKind, e0, e1, value); + res = context.AtomicAnd(storageKind, e0, e1, value); } else { context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}."); } break; + case AtomOp.Or: + if (type == AtomSize.S32 || type == AtomSize.U32) + { + res = context.AtomicOr(storageKind, e0, e1, value); + } + else + { + context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}."); + } + break; + case AtomOp.Xor: + if (type == AtomSize.S32 || type == AtomSize.U32) + { + res = context.AtomicXor(storageKind, e0, e1, value); + } + else + { + context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}."); + } + break; + case AtomOp.Exch: + if (type == AtomSize.S32 || type == AtomSize.U32) + { + res = context.AtomicSwap(storageKind, e0, e1, value); + } + else + { + context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}."); + } + break; + default: + context.TranslatorContext.GpuAccessor.Log($"Invalid atomic operation: {op}."); + break; } return res; diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs index 23180ff82..6ec90fa3c 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs @@ -138,6 +138,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations // Ensure that conditions met for that branch are also met for the current one. // Prefer the latest sources for the phi node. + int undefCount = 0; + for (int i = phiNode.SourcesCount - 1; i >= 0; i--) { BasicBlock phiBlock = phiNode.GetBlock(i); @@ -159,6 +161,26 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations return match; } } + else if (phiSource.Type == OperandType.Undefined) + { + undefCount++; + } + } + + // If all sources but one are undefined, we can assume that the one + // that is not undefined is the right one. + + if (undefCount == phiNode.SourcesCount - 1) + { + for (int i = phiNode.SourcesCount - 1; i >= 0; i--) + { + Operand phiSource = phiNode.GetSource(i); + + if (phiSource.Type != OperandType.Undefined) + { + return phiSource; + } + } } } diff --git a/src/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs b/src/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs index 298526d51..3780dc174 100644 --- a/src/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs +++ b/src/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs @@ -82,7 +82,6 @@ namespace Ryujinx.Graphics.Vulkan private readonly ImageRef[] _imageRefs; private readonly TextureBuffer[] _bufferTextureRefs; private readonly TextureBuffer[] _bufferImageRefs; - private readonly Format[] _bufferImageFormats; private ArrayRef[] _textureArrayRefs; private ArrayRef[] _imageArrayRefs; @@ -141,7 +140,6 @@ namespace Ryujinx.Graphics.Vulkan _imageRefs = new ImageRef[Constants.MaxImageBindings * 2]; _bufferTextureRefs = new TextureBuffer[Constants.MaxTextureBindings * 2]; _bufferImageRefs = new TextureBuffer[Constants.MaxImageBindings * 2]; - _bufferImageFormats = new Format[Constants.MaxImageBindings * 2]; _textureArrayRefs = Array.Empty>(); _imageArrayRefs = Array.Empty>(); @@ -391,17 +389,11 @@ namespace Ryujinx.Graphics.Vulkan _dirty = DirtyFlags.All; } - public void SetImage( - CommandBufferScoped cbs, - ShaderStage stage, - int binding, - ITexture image, - Format imageFormat) + public void SetImage(CommandBufferScoped cbs, ShaderStage stage, int binding, ITexture image) { if (image is TextureBuffer imageBuffer) { _bufferImageRefs[binding] = imageBuffer; - _bufferImageFormats[binding] = imageFormat; } else if (image is TextureView view) { @@ -410,13 +402,12 @@ namespace Ryujinx.Graphics.Vulkan iRef.View?.ClearUsage(FeedbackLoopHazards); view?.PrepareForUsage(cbs, stage.ConvertToPipelineStageFlags(), FeedbackLoopHazards); - iRef = new(stage, view, view.GetView(imageFormat).GetIdentityImageView()); + iRef = new(stage, view, view.GetIdentityImageView()); } else { _imageRefs[binding] = default; _bufferImageRefs[binding] = null; - _bufferImageFormats[binding] = default; } SignalDirty(DirtyFlags.Image); @@ -923,7 +914,7 @@ namespace Ryujinx.Graphics.Vulkan for (int i = 0; i < count; i++) { - bufferImages[i] = _bufferImageRefs[binding + i]?.GetBufferView(cbs, _bufferImageFormats[binding + i], true) ?? default; + bufferImages[i] = _bufferImageRefs[binding + i]?.GetBufferView(cbs, true) ?? default; } tu.Push(bufferImages[..count]); diff --git a/src/Ryujinx.Graphics.Vulkan/Effects/AreaScalingFilter.cs b/src/Ryujinx.Graphics.Vulkan/Effects/AreaScalingFilter.cs new file mode 100644 index 000000000..87b46df80 --- /dev/null +++ b/src/Ryujinx.Graphics.Vulkan/Effects/AreaScalingFilter.cs @@ -0,0 +1,101 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; +using Silk.NET.Vulkan; +using System; +using Extent2D = Ryujinx.Graphics.GAL.Extents2D; +using Format = Silk.NET.Vulkan.Format; +using SamplerCreateInfo = Ryujinx.Graphics.GAL.SamplerCreateInfo; + +namespace Ryujinx.Graphics.Vulkan.Effects +{ + internal class AreaScalingFilter : IScalingFilter + { + private readonly VulkanRenderer _renderer; + private PipelineHelperShader _pipeline; + private ISampler _sampler; + private ShaderCollection _scalingProgram; + private Device _device; + + public float Level { get; set; } + + public AreaScalingFilter(VulkanRenderer renderer, Device device) + { + _device = device; + _renderer = renderer; + + Initialize(); + } + + public void Dispose() + { + _pipeline.Dispose(); + _scalingProgram.Dispose(); + _sampler.Dispose(); + } + + public void Initialize() + { + _pipeline = new PipelineHelperShader(_renderer, _device); + + _pipeline.Initialize(); + + var scalingShader = EmbeddedResources.Read("Ryujinx.Graphics.Vulkan/Effects/Shaders/AreaScaling.spv"); + + var scalingResourceLayout = new ResourceLayoutBuilder() + .Add(ResourceStages.Compute, ResourceType.UniformBuffer, 2) + .Add(ResourceStages.Compute, ResourceType.TextureAndSampler, 1) + .Add(ResourceStages.Compute, ResourceType.Image, 0, true).Build(); + + _sampler = _renderer.CreateSampler(SamplerCreateInfo.Create(MinFilter.Linear, MagFilter.Linear)); + + _scalingProgram = _renderer.CreateProgramWithMinimalLayout(new[] + { + new ShaderSource(scalingShader, ShaderStage.Compute, TargetLanguage.Spirv), + }, scalingResourceLayout); + } + + public void Run( + TextureView view, + CommandBufferScoped cbs, + Auto destinationTexture, + Format format, + int width, + int height, + Extent2D source, + Extent2D destination) + { + _pipeline.SetCommandBuffer(cbs); + _pipeline.SetProgram(_scalingProgram); + _pipeline.SetTextureAndSampler(ShaderStage.Compute, 1, view, _sampler); + + ReadOnlySpan dimensionsBuffer = stackalloc float[] + { + source.X1, + source.X2, + source.Y1, + source.Y2, + destination.X1, + destination.X2, + destination.Y1, + destination.Y2, + }; + + int rangeSize = dimensionsBuffer.Length * sizeof(float); + using var buffer = _renderer.BufferManager.ReserveOrCreate(_renderer, cbs, rangeSize); + buffer.Holder.SetDataUnchecked(buffer.Offset, dimensionsBuffer); + + int threadGroupWorkRegionDim = 16; + int dispatchX = (width + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchY = (height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + + _pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(2, buffer.Range) }); + _pipeline.SetImage(0, destinationTexture); + _pipeline.DispatchCompute(dispatchX, dispatchY, 1); + _pipeline.ComputeBarrier(); + + _pipeline.Finish(); + } + } +} diff --git a/src/Ryujinx.Graphics.Vulkan/Effects/FsrScalingFilter.cs b/src/Ryujinx.Graphics.Vulkan/Effects/FsrScalingFilter.cs index c4501ca17..080dde5e5 100644 --- a/src/Ryujinx.Graphics.Vulkan/Effects/FsrScalingFilter.cs +++ b/src/Ryujinx.Graphics.Vulkan/Effects/FsrScalingFilter.cs @@ -154,7 +154,7 @@ namespace Ryujinx.Graphics.Vulkan.Effects int dispatchY = (height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; _pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(2, buffer.Range) }); - _pipeline.SetImage(ShaderStage.Compute, 0, _intermediaryTexture, FormatTable.ConvertRgba8SrgbToUnorm(view.Info.Format)); + _pipeline.SetImage(ShaderStage.Compute, 0, _intermediaryTexture.GetView(FormatTable.ConvertRgba8SrgbToUnorm(view.Info.Format))); _pipeline.DispatchCompute(dispatchX, dispatchY, 1); _pipeline.ComputeBarrier(); diff --git a/src/Ryujinx.Graphics.Vulkan/Effects/FxaaPostProcessingEffect.cs b/src/Ryujinx.Graphics.Vulkan/Effects/FxaaPostProcessingEffect.cs index 70b3b32a7..26314b7bf 100644 --- a/src/Ryujinx.Graphics.Vulkan/Effects/FxaaPostProcessingEffect.cs +++ b/src/Ryujinx.Graphics.Vulkan/Effects/FxaaPostProcessingEffect.cs @@ -75,7 +75,7 @@ namespace Ryujinx.Graphics.Vulkan.Effects var dispatchX = BitUtils.DivRoundUp(view.Width, IPostProcessingEffect.LocalGroupSize); var dispatchY = BitUtils.DivRoundUp(view.Height, IPostProcessingEffect.LocalGroupSize); - _pipeline.SetImage(ShaderStage.Compute, 0, _texture, FormatTable.ConvertRgba8SrgbToUnorm(view.Info.Format)); + _pipeline.SetImage(ShaderStage.Compute, 0, _texture.GetView(FormatTable.ConvertRgba8SrgbToUnorm(view.Info.Format))); _pipeline.DispatchCompute(dispatchX, dispatchY, 1); _pipeline.ComputeBarrier(); diff --git a/src/Ryujinx.Graphics.Vulkan/Effects/Shaders/AreaScaling.glsl b/src/Ryujinx.Graphics.Vulkan/Effects/Shaders/AreaScaling.glsl new file mode 100644 index 000000000..e34dd77dd --- /dev/null +++ b/src/Ryujinx.Graphics.Vulkan/Effects/Shaders/AreaScaling.glsl @@ -0,0 +1,122 @@ +// Scaling + +#version 430 core +layout (local_size_x = 16, local_size_y = 16) in; +layout( rgba8, binding = 0, set = 3) uniform image2D imgOutput; +layout( binding = 1, set = 2) uniform sampler2D Source; +layout( binding = 2 ) uniform dimensions{ + float srcX0; + float srcX1; + float srcY0; + float srcY1; + float dstX0; + float dstX1; + float dstY0; + float dstY1; +}; + +/***** Area Sampling *****/ + +// By Sam Belliveau and Filippo Tarpini. Public Domain license. +// Effectively a more accurate sharp bilinear filter when upscaling, +// that also works as a mathematically perfect downscale filter. +// https://entropymine.com/imageworsener/pixelmixing/ +// https://github.com/obsproject/obs-studio/pull/1715 +// https://legacy.imagemagick.org/Usage/filter/ +vec4 AreaSampling(vec2 xy) +{ + // Determine the sizes of the source and target images. + vec2 source_size = vec2(abs(srcX1 - srcX0), abs(srcY1 - srcY0)); + vec2 target_size = vec2(abs(dstX1 - dstX0), abs(dstY1 - dstY0)); + vec2 inverted_target_size = vec2(1.0) / target_size; + + // Compute the top-left and bottom-right corners of the target pixel box. + vec2 t_beg = floor(xy - vec2(dstX0 < dstX1 ? dstX0 : dstX1, dstY0 < dstY1 ? dstY0 : dstY1)); + vec2 t_end = t_beg + vec2(1.0, 1.0); + + // Convert the target pixel box to source pixel box. + vec2 beg = t_beg * inverted_target_size * source_size; + vec2 end = t_end * inverted_target_size * source_size; + + // Compute the top-left and bottom-right corners of the pixel box. + ivec2 f_beg = ivec2(beg); + ivec2 f_end = ivec2(end); + + // Compute how much of the start and end pixels are covered horizontally & vertically. + float area_w = 1.0 - fract(beg.x); + float area_n = 1.0 - fract(beg.y); + float area_e = fract(end.x); + float area_s = fract(end.y); + + // Compute the areas of the corner pixels in the pixel box. + float area_nw = area_n * area_w; + float area_ne = area_n * area_e; + float area_sw = area_s * area_w; + float area_se = area_s * area_e; + + // Initialize the color accumulator. + vec4 avg_color = vec4(0.0, 0.0, 0.0, 0.0); + + // Accumulate corner pixels. + avg_color += area_nw * texelFetch(Source, ivec2(f_beg.x, f_beg.y), 0); + avg_color += area_ne * texelFetch(Source, ivec2(f_end.x, f_beg.y), 0); + avg_color += area_sw * texelFetch(Source, ivec2(f_beg.x, f_end.y), 0); + avg_color += area_se * texelFetch(Source, ivec2(f_end.x, f_end.y), 0); + + // Determine the size of the pixel box. + int x_range = int(f_end.x - f_beg.x - 0.5); + int y_range = int(f_end.y - f_beg.y - 0.5); + + // Accumulate top and bottom edge pixels. + for (int x = f_beg.x + 1; x <= f_beg.x + x_range; ++x) + { + avg_color += area_n * texelFetch(Source, ivec2(x, f_beg.y), 0); + avg_color += area_s * texelFetch(Source, ivec2(x, f_end.y), 0); + } + + // Accumulate left and right edge pixels and all the pixels in between. + for (int y = f_beg.y + 1; y <= f_beg.y + y_range; ++y) + { + avg_color += area_w * texelFetch(Source, ivec2(f_beg.x, y), 0); + avg_color += area_e * texelFetch(Source, ivec2(f_end.x, y), 0); + + for (int x = f_beg.x + 1; x <= f_beg.x + x_range; ++x) + { + avg_color += texelFetch(Source, ivec2(x, y), 0); + } + } + + // Compute the area of the pixel box that was sampled. + float area_corners = area_nw + area_ne + area_sw + area_se; + float area_edges = float(x_range) * (area_n + area_s) + float(y_range) * (area_w + area_e); + float area_center = float(x_range) * float(y_range); + + // Return the normalized average color. + return avg_color / (area_corners + area_edges + area_center); +} + +float insideBox(vec2 v, vec2 bLeft, vec2 tRight) { + vec2 s = step(bLeft, v) - step(tRight, v); + return s.x * s.y; +} + +vec2 translateDest(vec2 pos) { + vec2 translatedPos = vec2(pos.x, pos.y); + translatedPos.x = dstX1 < dstX0 ? dstX1 - translatedPos.x : translatedPos.x; + translatedPos.y = dstY0 < dstY1 ? dstY1 + dstY0 - translatedPos.y - 1 : translatedPos.y; + return translatedPos; +} + +void main() +{ + vec2 bLeft = vec2(dstX0 < dstX1 ? dstX0 : dstX1, dstY0 < dstY1 ? dstY0 : dstY1); + vec2 tRight = vec2(dstX1 > dstX0 ? dstX1 : dstX0, dstY1 > dstY0 ? dstY1 : dstY0); + ivec2 loc = ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y); + if (insideBox(loc, bLeft, tRight) == 0) { + imageStore(imgOutput, loc, vec4(0, 0, 0, 1)); + return; + } + + vec4 outColor = AreaSampling(loc); + imageStore(imgOutput, ivec2(translateDest(loc)), vec4(outColor.rgb, 1)); +} diff --git a/src/Ryujinx.Graphics.Vulkan/Effects/Shaders/AreaScaling.spv b/src/Ryujinx.Graphics.Vulkan/Effects/Shaders/AreaScaling.spv new file mode 100644 index 000000000..7d097280f Binary files /dev/null and b/src/Ryujinx.Graphics.Vulkan/Effects/Shaders/AreaScaling.spv differ diff --git a/src/Ryujinx.Graphics.Vulkan/Effects/SmaaPostProcessingEffect.cs b/src/Ryujinx.Graphics.Vulkan/Effects/SmaaPostProcessingEffect.cs index 6d80f4a49..a8e68f429 100644 --- a/src/Ryujinx.Graphics.Vulkan/Effects/SmaaPostProcessingEffect.cs +++ b/src/Ryujinx.Graphics.Vulkan/Effects/SmaaPostProcessingEffect.cs @@ -219,7 +219,7 @@ namespace Ryujinx.Graphics.Vulkan.Effects buffer.Holder.SetDataUnchecked(buffer.Offset, resolutionBuffer); _pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(2, buffer.Range) }); - _pipeline.SetImage(ShaderStage.Compute, 0, _edgeOutputTexture, FormatTable.ConvertRgba8SrgbToUnorm(view.Info.Format)); + _pipeline.SetImage(ShaderStage.Compute, 0, _edgeOutputTexture.GetView(FormatTable.ConvertRgba8SrgbToUnorm(view.Info.Format))); _pipeline.DispatchCompute(dispatchX, dispatchY, 1); _pipeline.ComputeBarrier(); @@ -229,7 +229,7 @@ namespace Ryujinx.Graphics.Vulkan.Effects _pipeline.SetTextureAndSampler(ShaderStage.Compute, 1, _edgeOutputTexture, _samplerLinear); _pipeline.SetTextureAndSampler(ShaderStage.Compute, 3, _areaTexture, _samplerLinear); _pipeline.SetTextureAndSampler(ShaderStage.Compute, 4, _searchTexture, _samplerLinear); - _pipeline.SetImage(ShaderStage.Compute, 0, _blendOutputTexture, FormatTable.ConvertRgba8SrgbToUnorm(view.Info.Format)); + _pipeline.SetImage(ShaderStage.Compute, 0, _blendOutputTexture.GetView(FormatTable.ConvertRgba8SrgbToUnorm(view.Info.Format))); _pipeline.DispatchCompute(dispatchX, dispatchY, 1); _pipeline.ComputeBarrier(); @@ -238,7 +238,7 @@ namespace Ryujinx.Graphics.Vulkan.Effects _pipeline.Specialize(_specConstants); _pipeline.SetTextureAndSampler(ShaderStage.Compute, 3, _blendOutputTexture, _samplerLinear); _pipeline.SetTextureAndSampler(ShaderStage.Compute, 1, view, _samplerLinear); - _pipeline.SetImage(ShaderStage.Compute, 0, _outputTexture, FormatTable.ConvertRgba8SrgbToUnorm(view.Info.Format)); + _pipeline.SetImage(ShaderStage.Compute, 0, _outputTexture.GetView(FormatTable.ConvertRgba8SrgbToUnorm(view.Info.Format))); _pipeline.DispatchCompute(dispatchX, dispatchY, 1); _pipeline.ComputeBarrier(); diff --git a/src/Ryujinx.Graphics.Vulkan/HelperShader.cs b/src/Ryujinx.Graphics.Vulkan/HelperShader.cs index 73aa95c74..b7c42aff0 100644 --- a/src/Ryujinx.Graphics.Vulkan/HelperShader.cs +++ b/src/Ryujinx.Graphics.Vulkan/HelperShader.cs @@ -1039,7 +1039,7 @@ namespace Ryujinx.Graphics.Vulkan var dstView = Create2DLayerView(dst, dstLayer + z, dstLevel + l); _pipeline.SetTextureAndSamplerIdentitySwizzle(ShaderStage.Compute, 0, srcView, null); - _pipeline.SetImage(ShaderStage.Compute, 0, dstView, dstFormat); + _pipeline.SetImage(ShaderStage.Compute, 0, dstView.GetView(dstFormat)); int dispatchX = (Math.Min(srcView.Info.Width, dstView.Info.Width) + 31) / 32; int dispatchY = (Math.Min(srcView.Info.Height, dstView.Info.Height) + 31) / 32; @@ -1168,7 +1168,7 @@ namespace Ryujinx.Graphics.Vulkan var dstView = Create2DLayerView(dst, dstLayer + z, 0); _pipeline.SetTextureAndSamplerIdentitySwizzle(ShaderStage.Compute, 0, srcView, null); - _pipeline.SetImage(ShaderStage.Compute, 0, dstView, format); + _pipeline.SetImage(ShaderStage.Compute, 0, dstView.GetView(format)); _pipeline.DispatchCompute(dispatchX, dispatchY, 1); diff --git a/src/Ryujinx.Graphics.Vulkan/ImageArray.cs b/src/Ryujinx.Graphics.Vulkan/ImageArray.cs index 467b01111..019286d28 100644 --- a/src/Ryujinx.Graphics.Vulkan/ImageArray.cs +++ b/src/Ryujinx.Graphics.Vulkan/ImageArray.cs @@ -13,7 +13,6 @@ namespace Ryujinx.Graphics.Vulkan { public TextureStorage Storage; public TextureView View; - public GAL.Format ImageFormat; } private readonly TextureRef[] _textureRefs; @@ -52,16 +51,6 @@ namespace Ryujinx.Graphics.Vulkan _isBuffer = isBuffer; } - public void SetFormats(int index, GAL.Format[] imageFormats) - { - for (int i = 0; i < imageFormats.Length; i++) - { - _textureRefs[index + i].ImageFormat = imageFormats[i]; - } - - SetDirty(); - } - public void SetImages(int index, ITexture[] images) { for (int i = 0; i < images.Length; i++) @@ -142,7 +131,7 @@ namespace Ryujinx.Graphics.Vulkan ref var texture = ref textures[i]; ref var refs = ref _textureRefs[i]; - if (i > 0 && _textureRefs[i - 1].View == refs.View && _textureRefs[i - 1].ImageFormat == refs.ImageFormat) + if (i > 0 && _textureRefs[i - 1].View == refs.View) { texture = textures[i - 1]; @@ -150,7 +139,7 @@ namespace Ryujinx.Graphics.Vulkan } texture.ImageLayout = ImageLayout.General; - texture.ImageView = refs.View?.GetView(refs.ImageFormat).GetIdentityImageView().Get(cbs).Value ?? default; + texture.ImageView = refs.View?.GetIdentityImageView().Get(cbs).Value ?? default; if (texture.ImageView.Handle == 0) { @@ -167,7 +156,7 @@ namespace Ryujinx.Graphics.Vulkan for (int i = 0; i < bufferTextures.Length; i++) { - bufferTextures[i] = _bufferTextureRefs[i]?.GetBufferView(cbs, _textureRefs[i].ImageFormat, true) ?? default; + bufferTextures[i] = _bufferTextureRefs[i]?.GetBufferView(cbs, true) ?? default; } return bufferTextures; diff --git a/src/Ryujinx.Graphics.Vulkan/PipelineBase.cs b/src/Ryujinx.Graphics.Vulkan/PipelineBase.cs index 8b38bd7ac..5f3158a34 100644 --- a/src/Ryujinx.Graphics.Vulkan/PipelineBase.cs +++ b/src/Ryujinx.Graphics.Vulkan/PipelineBase.cs @@ -677,7 +677,7 @@ namespace Ryujinx.Graphics.Vulkan oldStencilTestEnable = _newState.StencilTestEnable; oldDepthTestEnable = _newState.DepthTestEnable; oldDepthWriteEnable = _newState.DepthWriteEnable; - oldTopology = _newState.Topology; + oldTopology = _topology; oldViewportsCount = _newState.ViewportsCount; } @@ -705,14 +705,13 @@ namespace Ryujinx.Graphics.Vulkan srcRegion, dstRegion); - _newState.Topology = oldTopology; + SetPrimitiveTopology(oldTopology); if (_supportExtDynamic) { DynamicState.SetCullMode(oldCullMode); DynamicState.SetStencilTest(oldStencilTestEnable); DynamicState.SetDepthTestBool(oldDepthTestEnable, oldDepthWriteEnable); - DynamicState.SetPrimitiveTopology(oldTopology); } else { @@ -949,9 +948,9 @@ namespace Ryujinx.Graphics.Vulkan } } - public void SetImage(ShaderStage stage, int binding, ITexture image, Format imageFormat) + public void SetImage(ShaderStage stage, int binding, ITexture image) { - _descriptorSetUpdater.SetImage(Cbs, stage, binding, image, imageFormat); + _descriptorSetUpdater.SetImage(Cbs, stage, binding, image); } public void SetImage(int binding, Auto image) diff --git a/src/Ryujinx.Graphics.Vulkan/Ryujinx.Graphics.Vulkan.csproj b/src/Ryujinx.Graphics.Vulkan/Ryujinx.Graphics.Vulkan.csproj index f6a7be91e..aae28733f 100644 --- a/src/Ryujinx.Graphics.Vulkan/Ryujinx.Graphics.Vulkan.csproj +++ b/src/Ryujinx.Graphics.Vulkan/Ryujinx.Graphics.Vulkan.csproj @@ -15,6 +15,7 @@ + diff --git a/src/Ryujinx.Graphics.Vulkan/TextureBuffer.cs b/src/Ryujinx.Graphics.Vulkan/TextureBuffer.cs index e0694b197..073eee2ca 100644 --- a/src/Ryujinx.Graphics.Vulkan/TextureBuffer.cs +++ b/src/Ryujinx.Graphics.Vulkan/TextureBuffer.cs @@ -1,7 +1,7 @@ +using Ryujinx.Common.Memory; using Ryujinx.Graphics.GAL; using Silk.NET.Vulkan; using System; -using System.Buffers; using System.Collections.Generic; using Format = Ryujinx.Graphics.GAL.Format; using VkFormat = Silk.NET.Vulkan.Format; @@ -16,7 +16,6 @@ namespace Ryujinx.Graphics.Vulkan private int _offset; private int _size; private Auto _bufferView; - private Dictionary> _selfManagedViews; private int _bufferCount; @@ -80,35 +79,25 @@ namespace Ryujinx.Graphics.Vulkan private void ReleaseImpl() { - if (_selfManagedViews != null) - { - foreach (var bufferView in _selfManagedViews.Values) - { - bufferView.Dispose(); - } - - _selfManagedViews = null; - } - _bufferView?.Dispose(); _bufferView = null; } /// - public void SetData(IMemoryOwner data) + public void SetData(MemoryOwner data) { - _gd.SetBufferData(_bufferHandle, _offset, data.Memory.Span); + _gd.SetBufferData(_bufferHandle, _offset, data.Span); data.Dispose(); } /// - public void SetData(IMemoryOwner data, int layer, int level) + public void SetData(MemoryOwner data, int layer, int level) { throw new NotSupportedException(); } /// - public void SetData(IMemoryOwner data, int layer, int level, Rectangle region) + public void SetData(MemoryOwner data, int layer, int level, Rectangle region) { throw new NotSupportedException(); } @@ -137,28 +126,5 @@ namespace Ryujinx.Graphics.Vulkan return _bufferView?.Get(cbs, _offset, _size, write).Value ?? default; } - - public BufferView GetBufferView(CommandBufferScoped cbs, Format format, bool write) - { - var vkFormat = FormatTable.GetFormat(format); - if (vkFormat == VkFormat) - { - return GetBufferView(cbs, write); - } - - if (_selfManagedViews != null && _selfManagedViews.TryGetValue(format, out var bufferView)) - { - return bufferView.Get(cbs, _offset, _size, write).Value; - } - - bufferView = _gd.BufferManager.CreateView(_bufferHandle, vkFormat, _offset, _size, ReleaseImpl); - - if (bufferView != null) - { - (_selfManagedViews ??= new Dictionary>()).Add(format, bufferView); - } - - return bufferView?.Get(cbs, _offset, _size, write).Value ?? default; - } } } diff --git a/src/Ryujinx.Graphics.Vulkan/TextureView.cs b/src/Ryujinx.Graphics.Vulkan/TextureView.cs index 9b3f46662..b7b936809 100644 --- a/src/Ryujinx.Graphics.Vulkan/TextureView.cs +++ b/src/Ryujinx.Graphics.Vulkan/TextureView.cs @@ -1,7 +1,7 @@ +using Ryujinx.Common.Memory; using Ryujinx.Graphics.GAL; using Silk.NET.Vulkan; using System; -using System.Buffers; using System.Collections.Generic; using System.Linq; using System.Threading; @@ -746,23 +746,23 @@ namespace Ryujinx.Graphics.Vulkan } /// - public void SetData(IMemoryOwner data) + public void SetData(MemoryOwner data) { - SetData(data.Memory.Span, 0, 0, Info.GetLayers(), Info.Levels, singleSlice: false); + SetData(data.Span, 0, 0, Info.GetLayers(), Info.Levels, singleSlice: false); data.Dispose(); } /// - public void SetData(IMemoryOwner data, int layer, int level) + public void SetData(MemoryOwner data, int layer, int level) { - SetData(data.Memory.Span, layer, level, 1, 1, singleSlice: true); + SetData(data.Span, layer, level, 1, 1, singleSlice: true); data.Dispose(); } /// - public void SetData(IMemoryOwner data, int layer, int level, Rectangle region) + public void SetData(MemoryOwner data, int layer, int level, Rectangle region) { - SetData(data.Memory.Span, layer, level, 1, 1, singleSlice: true, region); + SetData(data.Span, layer, level, 1, 1, singleSlice: true, region); data.Dispose(); } diff --git a/src/Ryujinx.Graphics.Vulkan/Window.cs b/src/Ryujinx.Graphics.Vulkan/Window.cs index d67362be3..3dc6d4e19 100644 --- a/src/Ryujinx.Graphics.Vulkan/Window.cs +++ b/src/Ryujinx.Graphics.Vulkan/Window.cs @@ -568,6 +568,13 @@ namespace Ryujinx.Graphics.Vulkan _scalingFilter.Level = _scalingFilterLevel; break; + case ScalingFilter.Area: + if (_scalingFilter is not AreaScalingFilter) + { + _scalingFilter?.Dispose(); + _scalingFilter = new AreaScalingFilter(_gd, _device); + } + break; } } } diff --git a/src/Ryujinx.Gtk3/UI/MainWindow.cs b/src/Ryujinx.Gtk3/UI/MainWindow.cs index 66c0afae0..b10dfe3f9 100644 --- a/src/Ryujinx.Gtk3/UI/MainWindow.cs +++ b/src/Ryujinx.Gtk3/UI/MainWindow.cs @@ -647,7 +647,7 @@ namespace Ryujinx.UI } var memoryConfiguration = ConfigurationState.Instance.System.ExpandRam.Value - ? HLE.MemoryConfiguration.MemoryConfiguration6GiB + ? HLE.MemoryConfiguration.MemoryConfiguration8GiB : HLE.MemoryConfiguration.MemoryConfiguration4GiB; IntegrityCheckLevel fsIntegrityCheckLevel = ConfigurationState.Instance.System.EnableFsIntegrityChecks ? IntegrityCheckLevel.ErrorOnInvalid : IntegrityCheckLevel.None; diff --git a/src/Ryujinx.HLE/HOS/Kernel/Common/KSystemControl.cs b/src/Ryujinx.HLE/HOS/Kernel/Common/KSystemControl.cs index 10f0b6f78..3f194e0ed 100644 --- a/src/Ryujinx.HLE/HOS/Kernel/Common/KSystemControl.cs +++ b/src/Ryujinx.HLE/HOS/Kernel/Common/KSystemControl.cs @@ -28,8 +28,8 @@ namespace Ryujinx.HLE.HOS.Kernel.Common MemoryArrange.MemoryArrange4GiBSystemDev or MemoryArrange.MemoryArrange6GiBAppletDev => 3285 * MiB, MemoryArrange.MemoryArrange4GiBAppletDev => 2048 * MiB, - MemoryArrange.MemoryArrange6GiB or - MemoryArrange.MemoryArrange8GiB => 4916 * MiB, + MemoryArrange.MemoryArrange6GiB => 4916 * MiB, + MemoryArrange.MemoryArrange8GiB => 6964 * MiB, _ => throw new ArgumentException($"Invalid memory arrange \"{arrange}\"."), }; } @@ -42,8 +42,8 @@ namespace Ryujinx.HLE.HOS.Kernel.Common MemoryArrange.MemoryArrange4GiBAppletDev => 1554 * MiB, MemoryArrange.MemoryArrange4GiBSystemDev => 448 * MiB, MemoryArrange.MemoryArrange6GiB => 562 * MiB, - MemoryArrange.MemoryArrange6GiBAppletDev or - MemoryArrange.MemoryArrange8GiB => 2193 * MiB, + MemoryArrange.MemoryArrange6GiBAppletDev => 2193 * MiB, + MemoryArrange.MemoryArrange8GiB => 562 * MiB, _ => throw new ArgumentException($"Invalid memory arrange \"{arrange}\"."), }; } diff --git a/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/Parcel.cs b/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/Parcel.cs index 2ca0e1aac..1df280dce 100644 --- a/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/Parcel.cs +++ b/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/Parcel.cs @@ -14,7 +14,7 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger { private readonly MemoryOwner _rawDataOwner; - private Span Raw => _rawDataOwner.Memory.Span; + private Span Raw => _rawDataOwner.Span; private ref ParcelHeader Header => ref MemoryMarshal.Cast(Raw)[0]; diff --git a/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/SurfaceFlinger.cs b/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/SurfaceFlinger.cs index fd517b1ae..4c17e7aed 100644 --- a/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/SurfaceFlinger.cs +++ b/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/SurfaceFlinger.cs @@ -412,9 +412,9 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger Format format = ConvertColorFormat(item.GraphicBuffer.Object.Buffer.Surfaces[0].ColorFormat); - int bytesPerPixel = + byte bytesPerPixel = format == Format.B5G6R5Unorm || - format == Format.R4G4B4A4Unorm ? 2 : 4; + format == Format.R4G4B4A4Unorm ? (byte)2 : (byte)4; int gobBlocksInY = 1 << item.GraphicBuffer.Object.Buffer.Surfaces[0].BlockHeightLog2; diff --git a/src/Ryujinx.Headless.SDL2/Options.cs b/src/Ryujinx.Headless.SDL2/Options.cs index ea2063758..ef8849eea 100644 --- a/src/Ryujinx.Headless.SDL2/Options.cs +++ b/src/Ryujinx.Headless.SDL2/Options.cs @@ -219,7 +219,7 @@ namespace Ryujinx.Headless.SDL2 // Hacks - [Option("expand-ram", Required = false, Default = false, HelpText = "Expands the RAM amount on the emulated system from 4GiB to 6GiB.")] + [Option("expand-ram", Required = false, Default = false, HelpText = "Expands the RAM amount on the emulated system from 4GiB to 8GiB.")] public bool ExpandRAM { get; set; } [Option("ignore-missing-services", Required = false, Default = false, HelpText = "Enable ignoring missing services.")] diff --git a/src/Ryujinx.Headless.SDL2/Program.cs b/src/Ryujinx.Headless.SDL2/Program.cs index 07995dbdd..4ee271203 100644 --- a/src/Ryujinx.Headless.SDL2/Program.cs +++ b/src/Ryujinx.Headless.SDL2/Program.cs @@ -562,7 +562,7 @@ namespace Ryujinx.Headless.SDL2 _userChannelPersistence, renderer, new SDL2HardwareDeviceDriver(), - options.ExpandRAM ? MemoryConfiguration.MemoryConfiguration6GiB : MemoryConfiguration.MemoryConfiguration4GiB, + options.ExpandRAM ? MemoryConfiguration.MemoryConfiguration8GiB : MemoryConfiguration.MemoryConfiguration4GiB, window, options.SystemLanguage, options.SystemRegion, diff --git a/src/Ryujinx.Memory/WritableRegion.cs b/src/Ryujinx.Memory/WritableRegion.cs index 2c21ef4e8..54facb508 100644 --- a/src/Ryujinx.Memory/WritableRegion.cs +++ b/src/Ryujinx.Memory/WritableRegion.cs @@ -1,5 +1,5 @@ +using Ryujinx.Common.Memory; using System; -using System.Buffers; namespace Ryujinx.Memory { @@ -7,7 +7,7 @@ namespace Ryujinx.Memory { private readonly IWritableBlock _block; private readonly ulong _va; - private readonly IMemoryOwner _memoryOwner; + private readonly MemoryOwner _memoryOwner; private readonly bool _tracked; private bool NeedsWriteback => _block != null; @@ -22,7 +22,7 @@ namespace Ryujinx.Memory Memory = memory; } - public WritableRegion(IWritableBlock block, ulong va, IMemoryOwner memoryOwner, bool tracked = false) + public WritableRegion(IWritableBlock block, ulong va, MemoryOwner memoryOwner, bool tracked = false) : this(block, va, memoryOwner.Memory, tracked) { _memoryOwner = memoryOwner; diff --git a/src/Ryujinx.Tests/Cpu/CpuTestAlu32.cs b/src/Ryujinx.Tests/Cpu/CpuTestAlu32.cs index 132ddfd0e..1e66d8112 100644 --- a/src/Ryujinx.Tests/Cpu/CpuTestAlu32.cs +++ b/src/Ryujinx.Tests/Cpu/CpuTestAlu32.cs @@ -29,6 +29,7 @@ namespace Ryujinx.Tests.Cpu { return new[] { + 0xe6200f10u, // QADD16 R0, R0, R0 0xe6600f10u, // UQADD16 R0, R0, R0 0xe6600f70u, // UQSUB16 R0, R0, R0 }; diff --git a/src/Ryujinx.Tests/Cpu/CpuTestSimd32.cs b/src/Ryujinx.Tests/Cpu/CpuTestSimd32.cs index f843fd561..08202c9e1 100644 --- a/src/Ryujinx.Tests/Cpu/CpuTestSimd32.cs +++ b/src/Ryujinx.Tests/Cpu/CpuTestSimd32.cs @@ -328,6 +328,29 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise, Description("VSHLL. {}, , #")] + public void Vshll([Values(0u, 2u)] uint rd, + [Values(1u, 0u)] uint rm, + [Values(0u, 1u, 2u)] uint size, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b) + { + uint opcode = 0xf3b20300u; // VSHLL.I8 Q0, D0, #8 + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= size << 18; + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + [Test, Pairwise, Description("VSWP D0, D0")] public void Vswp([Values(0u, 1u)] uint rd, [Values(0u, 1u)] uint rm, diff --git a/src/Ryujinx.UI.Common/Configuration/ConfigurationFileFormat.cs b/src/Ryujinx.UI.Common/Configuration/ConfigurationFileFormat.cs index af3ad0a1d..8a0be4028 100644 --- a/src/Ryujinx.UI.Common/Configuration/ConfigurationFileFormat.cs +++ b/src/Ryujinx.UI.Common/Configuration/ConfigurationFileFormat.cs @@ -238,7 +238,7 @@ namespace Ryujinx.UI.Common.Configuration public MemoryManagerMode MemoryManagerMode { get; set; } /// - /// Expands the RAM amount on the emulated system from 4GiB to 6GiB + /// Expands the RAM amount on the emulated system from 4GiB to 8GiB /// public bool ExpandRam { get; set; } diff --git a/src/Ryujinx/AppHost.cs b/src/Ryujinx/AppHost.cs index 0db8ef414..f4bfd1169 100644 --- a/src/Ryujinx/AppHost.cs +++ b/src/Ryujinx/AppHost.cs @@ -845,7 +845,7 @@ namespace Ryujinx.Ava Logger.Info?.PrintMsg(LogClass.Gpu, $"Backend Threading ({threadingMode}): {isGALThreaded}"); // Initialize Configuration. - var memoryConfiguration = ConfigurationState.Instance.System.ExpandRam.Value ? MemoryConfiguration.MemoryConfiguration6GiB : MemoryConfiguration.MemoryConfiguration4GiB; + var memoryConfiguration = ConfigurationState.Instance.System.ExpandRam.Value ? MemoryConfiguration.MemoryConfiguration8GiB : MemoryConfiguration.MemoryConfiguration4GiB; HLEConfiguration configuration = new(VirtualFileSystem, _viewModel.LibHacHorizonManager, diff --git a/src/Ryujinx/Assets/Locales/en_US.json b/src/Ryujinx/Assets/Locales/en_US.json index 74e18056b..b3cab7f5f 100644 --- a/src/Ryujinx/Assets/Locales/en_US.json +++ b/src/Ryujinx/Assets/Locales/en_US.json @@ -145,7 +145,7 @@ "SettingsTabSystemAudioBackendSDL2": "SDL2", "SettingsTabSystemHacks": "Hacks", "SettingsTabSystemHacksNote": "May cause instability", - "SettingsTabSystemExpandDramSize": "Use alternative memory layout (Developers)", + "SettingsTabSystemExpandDramSize": "Expand DRAM to 8GiB", "SettingsTabSystemIgnoreMissingServices": "Ignore Missing Services", "SettingsTabGraphics": "Graphics", "SettingsTabGraphicsAPI": "Graphics API", @@ -575,7 +575,7 @@ "MemoryManagerHostTooltip": "Directly map memory in the host address space. Much faster JIT compilation and execution.", "MemoryManagerUnsafeTooltip": "Directly map memory, but do not mask the address within the guest address space before access. Faster, but at the cost of safety. The guest application can access memory from anywhere in Ryujinx, so only run programs you trust with this mode.", "UseHypervisorTooltip": "Use Hypervisor instead of JIT. Greatly improves performance when available, but can be unstable in its current state.", - "DRamTooltip": "Utilizes an alternative MemoryMode layout to mimic a Switch development model.\n\nThis is only useful for higher-resolution texture packs or 4k resolution mods. Does NOT improve performance.\n\nLeave OFF if unsure.", + "DRamTooltip": "Utilizes an alternative memory mode with 8GiB of DRAM to mimic a Switch development model.\n\nThis is only useful for higher-resolution texture packs or 4k resolution mods. Does NOT improve performance.\n\nLeave OFF if unsure.", "IgnoreMissingServicesTooltip": "Ignores unimplemented Horizon OS services. This may help in bypassing crashes when booting certain games.\n\nLeave OFF if unsure.", "GraphicsBackendThreadingTooltip": "Executes graphics backend commands on a second thread.\n\nSpeeds up shader compilation, reduces stuttering, and improves performance on GPU drivers without multithreading support of their own. Slightly better performance on drivers with multithreading.\n\nSet to AUTO if unsure.", "GalThreadingTooltip": "Executes graphics backend commands on a second thread.\n\nSpeeds up shader compilation, reduces stuttering, and improves performance on GPU drivers without multithreading support of their own. Slightly better performance on drivers with multithreading.\n\nSet to AUTO if unsure.", @@ -758,10 +758,11 @@ "GraphicsAATooltip": "Applies anti-aliasing to the game render.\n\nFXAA will blur most of the image, while SMAA will attempt to find jagged edges and smooth them out.\n\nNot recommended to use in conjunction with the FSR scaling filter.\n\nThis option can be changed while a game is running by clicking \"Apply\" below; you can simply move the settings window aside and experiment until you find your preferred look for a game.\n\nLeave on NONE if unsure.", "GraphicsAALabel": "Anti-Aliasing:", "GraphicsScalingFilterLabel": "Scaling Filter:", - "GraphicsScalingFilterTooltip": "Choose the scaling filter that will be applied when using resolution scale.\n\nBilinear works well for 3D games and is a safe default option.\n\nNearest is recommended for pixel art games.\n\nFSR 1.0 is merely a sharpening filter, not recommended for use with FXAA or SMAA.\n\nThis option can be changed while a game is running by clicking \"Apply\" below; you can simply move the settings window aside and experiment until you find your preferred look for a game.\n\nLeave on BILINEAR if unsure.", + "GraphicsScalingFilterTooltip": "Choose the scaling filter that will be applied when using resolution scale.\n\nBilinear works well for 3D games and is a safe default option.\n\nNearest is recommended for pixel art games.\n\nFSR 1.0 is merely a sharpening filter, not recommended for use with FXAA or SMAA.\n\nArea scaling is recommended when downscaling resolutions that are larger than the output window. It can be used to achieve a supersampled anti-aliasing effect when downscaling by more than 2x.\n\nThis option can be changed while a game is running by clicking \"Apply\" below; you can simply move the settings window aside and experiment until you find your preferred look for a game.\n\nLeave on BILINEAR if unsure.", "GraphicsScalingFilterBilinear": "Bilinear", "GraphicsScalingFilterNearest": "Nearest", "GraphicsScalingFilterFsr": "FSR", + "GraphicsScalingFilterArea": "Area", "GraphicsScalingFilterLevelLabel": "Level", "GraphicsScalingFilterLevelTooltip": "Set FSR 1.0 sharpening level. Higher is sharper.", "SmaaLow": "SMAA Low", diff --git a/src/Ryujinx/UI/Views/Settings/SettingsGraphicsView.axaml b/src/Ryujinx/UI/Views/Settings/SettingsGraphicsView.axaml index 5cffc6848..0a12575ad 100644 --- a/src/Ryujinx/UI/Views/Settings/SettingsGraphicsView.axaml +++ b/src/Ryujinx/UI/Views/Settings/SettingsGraphicsView.axaml @@ -1,4 +1,4 @@ - + + +