Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save HansKristian-Work/81802214b726ae1c24c5bb0ec5997243 to your computer and use it in GitHub Desktop.

Select an option

Save HansKristian-Work/81802214b726ae1c24c5bb0ec5997243 to your computer and use it in GitHub Desktop.
struct VSOut { float4 pos : SV_Position; float4 uv : UV; };
#define MyRS "RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), DescriptorTable(SRV(t0), SRV(t1), SRV(t2), CBV(b0)), DescriptorTable(Sampler(s0))"
[RootSignature(MyRS)]
VSOut vs_main(float4 pos : POSITION, float2 uv : TEXCOORD)
{
VSOut vsout;
vsout.pos = pos;
vsout.uv = float4(uv, pos.xy * uv);
return vsout;
}
cbuffer c : register(b0)
{
float2 constant_uv;
};
Texture2D<float> A : register(t0);
Texture2D<float> B : register(t1);
Texture2D<float> C : register(t2);
SamplerState D : register(s0);
[RootSignature(MyRS)]
float ps_main(VSOut vin) : SV_Target
{
[branch]
if (vin.pos.z > 0.5) { return A.Sample(D, vin.uv.xy); }
else if (vin.pos.z < 0.2) { return B.Sample(D, constant_uv); } // According to D3D11 functional spec, this is well defined. Statically accessed constant value should work.
return C.Sample(D, vin.uv.zw);
}
; D3D12 Shader Hash 0xb4a27ddc82ac3216979d9e0851569e68
; API PSO Hash 0x3bfb684a72adb5b4
; Driver Internal Pipeline Hash 0xbf9430b7bd00c5c3
; -------- Disassembly --------------------
shader main
asic(GFX10_3)
type(PS)
sgpr_count(30)
vgpr_count(4)
wave_size(64)
// s_ps_state in s0
s_inst_prefetch 0x0003 // 000000000000: BFA00003
s_mov_b32 m0, s4 // 000000000004: BEFC0304
s_mov_b64 s[24:25], exec // 000000000008: BE98047E
s_wqm_b64 exec, exec // 00000000000C: BEFE0A7E
s_getpc_b64 s[0:1] // 000000000010: BE801F80
s_mov_b32 s4, s3 // 000000000014: BE840303
s_mov_b32 s5, s1 // 000000000018: BE850301
s_mov_b32 s0, s2 // 00000000001C: BE800302
v_cmp_nlt_f32 vcc, 0.5, v2 // 000000000020: 7C1C04F0
s_and_saveexec_b64 s[2:3], vcc // 000000000024: BE82246A
s_cbranch_execz label_00C4 // 000000000028: BF880026
s_mov_b64 s[6:7], exec // 00000000002C: BE86047E
v_cmpx_nlt_f32 exec, v2, lit(0x3e4ccccd) // 000000000030: D41E007E 0001FF02 3E4CCCCD
s_cbranch_execz label_0078 // 00000000003C: BF88000E
// Behavior quirk: helper lanes are lit up when computing v_interp.
s_mov_b64 vcc, exec // 000000000040: BEEA047E
s_wqm_b64 exec, vcc // 000000000044: BEFE0A6A
v_interp_p1_f32 v2, v0, attr0.z // 000000000048: C8080200
v_interp_p1_f32 v3, v0, attr0.w // 00000000004C: C80C0300
v_interp_p2_f32 v2, v1, attr0.z // 000000000050: C8090201
v_interp_p2_f32 v3, v1, attr0.w // 000000000054: C80D0301
s_mov_b64 exec, vcc // 000000000058: BEFE046A
s_load_dwordx8 s[8:15], s[0:1], 0x000040 // 00000000005C: F40C0200 FA000040
s_load_dwordx4 s[16:19], s[4:5], null // 000000000064: F4080402 FA000000
s_waitcnt lgkmcnt(0) // 00000000006C: BF8CC07F
image_sample v3, v[2:3], s[8:15], s[16:19] dim:SQ_RSRC_IMG_2D // 000000000070: F0800108 00820302
label_0078:
s_andn2_b64 exec, s[6:7], exec // 000000000078: 8AFE7E06
s_cbranch_execz label_00C0 // 00000000007C: BF880010
s_load_dwordx4 s[8:11], s[0:1], 0x000060 // 000000000080: F4080200 FA000060
s_mov_b64 vcc, exec // 000000000088: BEEA047E
s_mov_b64 exec, vcc // 00000000008C: BEFE046A
s_waitcnt lgkmcnt(0) // 000000000090: BF8CC07F
s_buffer_load_dwordx2 s[8:9], s[8:11], null // 000000000094: F4240204 FA000000
s_load_dwordx8 s[12:19], s[0:1], 0x000020 // 00000000009C: F40C0300 FA000020
s_load_dwordx4 s[20:23], s[4:5], null // 0000000000A4: F4080502 FA000000
s_waitcnt lgkmcnt(0) // 0000000000AC: BF8CC07F
// It would appear that v2 and v3 are not written in helper lanes?
v_mov_b32 v2, s8 // 0000000000B0: 7E040208
v_mov_b32 v3, s9 // 0000000000B4: 7E060209
// This will probably break.
image_sample v3, v[2:3], s[12:19], s[20:23] dim:SQ_RSRC_IMG_2D // 0000000000B8: F0800108 00A30302
label_00C0:
s_mov_b64 exec, s[6:7] // 0000000000C0: BEFE0406
label_00C4:
s_andn2_b64 exec, s[2:3], exec // 0000000000C4: 8AFE7E02
s_cbranch_execz label_0108 // 0000000000C8: BF88000F
// Again, helper lanes are lit up when computing v_interp.
s_mov_b64 vcc, exec // 0000000000CC: BEEA047E
s_wqm_b64 exec, vcc // 0000000000D0: BEFE0A6A
v_interp_p1_f32 v2, v0, attr0.x // 0000000000D4: C8080000
v_interp_p1_f32 v0, v0, attr0.y // 0000000000D8: C8000100
v_interp_p2_f32 v2, v1, attr0.x // 0000000000DC: C8090001
v_interp_p2_f32 v0, v1, attr0.y // 0000000000E0: C8010101
s_mov_b64 exec, vcc // 0000000000E4: BEFE046A
s_load_dwordx8 s[8:15], s[0:1], null // 0000000000E8: F40C0200 FA000000
s_load_dwordx4 s[4:7], s[4:5], null // 0000000000F0: F4080102 FA000000
s_waitcnt lgkmcnt(0) // 0000000000F8: BF8CC07F
image_sample v3, [v2,v0], s[8:15], s[4:7] dim:SQ_RSRC_IMG_2D // 0000000000FC: F080010A 00220302 00000000
label_0108:
s_mov_b64 exec, s[2:3] // 000000000108: BEFE0402
s_and_b64 exec, exec, s[24:25] // 00000000010C: 87FE187E
s_waitcnt vmcnt(0) // 000000000110: BF8C3F70
v_cvt_pkrtz_f16_f32 v1, v3, 0 // 000000000114: D52F0001 00010103
s_mov_b64 exec, s[24:25] // 00000000011C: BEFE0418
exp mrt0, v1, v1, off, off done compr vm // 000000000120: F8001C03 00000001
s_endpgm // 000000000128: BF810000
s_code_end // 00000000012C: BF9F0000
s_code_end // 000000000130: BF9F0000
s_code_end // 000000000134: BF9F0000
s_code_end // 000000000138: BF9F0000
s_code_end // 00000000013C: BF9F0000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment