Last active
January 15, 2020 01:37
-
-
Save reinsteam/d0b525b7bbb1a72e506e35d75f39b405 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
set isa_file=%~1.isa | |
set analysis_file=%~1.a | |
set isa_file | |
rga --define COMPILER_AMD_RGA=1 --source-kind hlsl --asic Pitcairn --profile cs_5_0 --function %2 --intrinsics --isa %isa_file% %1 | |
*/ | |
#if COMPILER_AMD_RGA | |
#include "ags_shader_intrinsics_dx11.hlsl" | |
uint2 ballot(bool pred) | |
{ | |
return AmdDxExtShaderIntrinsics_Ballot(pred); | |
} | |
uint ReadFirstLane(uint x) | |
{ | |
return AmdDxExtShaderIntrinsics_ReadfirstlaneU(x); | |
} | |
float ReadFirstLane(float x) | |
{ | |
return AmdDxExtShaderIntrinsics_ReadfirstlaneF(x); | |
} | |
uint ReadLane(uint x, uint laneId) | |
{ | |
return AmdDxExtShaderIntrinsics_ReadlaneU(x, laneId); | |
} | |
bool AnyExecSet() | |
{ | |
return any(ballot(true)); | |
} | |
#endif | |
Texture2D<uint> InTexture : register(t0); | |
RWTexture2D<uint> OutTexture : register(u0); | |
[numthreads(8, 8, 1)] | |
void main(uint2 TexelId : SV_DispatchThreadId) | |
{ | |
const uint VgprData = InTexture[TexelId]; | |
uint NumDivergentGroups = 0; | |
#if 1 | |
for (; ; ++NumDivergentGroups) | |
{ | |
const uint SgprData = ReadFirstLane(VgprData); | |
if (SgprData == VgprData) | |
{ | |
break; | |
} | |
if (AnyExecSet() == false) | |
{ | |
break; | |
} | |
} | |
#endif | |
OutTexture[TexelId] = NumDivergentGroups; | |
} | |
/* | |
; -------- Disassembly -------------------- | |
shader main | |
asic(SI) | |
type(CS) | |
v_mad_u32_u24 v0, s12, 8, v0 // 00000000: D2860000 0401100C | |
v_mad_u32_u24 v1, s13, 8, v1 // 00000008: D2860001 0405100D | |
s_load_dwordx8 s[12:19], s[2:3], 0x00 // 00000010: C0C60300 | |
s_waitcnt lgkmcnt(0) // 00000014: BF8C007F | |
image_load v2, v[0:3], s[12:19] unorm // 00000018: F0001100 00030200 | |
s_mov_b64 s[0:1], exec // 00000020: BE80047E | |
s_mov_b64 s[2:3], exec // 00000024: BE82047E | |
v_mov_b32 v3, 0 // 00000028: 7E060280 | |
s_nop 0x0000 // 0000002C: BF800000 | |
s_nop 0x0000 // 00000030: BF800000 | |
s_nop 0x0000 // 00000034: BF800000 | |
s_nop 0x0000 // 00000038: BF800000 | |
s_nop 0x0000 // 0000003C: BF800000 | |
label_0010: | |
s_waitcnt vmcnt(0) // 00000040: BF8C0F70 | |
v_readfirstlane_b32 s12, v2 // 00000044: 7E180502 | |
v_cmp_eq_i32 vcc, s12, v2 // 00000048: 7D04040C | |
s_and_saveexec_b64 s[12:13], vcc // 0000004C: BE8C246A | |
s_andn2_b64 s[2:3], s[2:3], exec // 00000050: 8A827E02 | |
s_cbranch_scc0 label_001C // 00000054: BF840006 | |
s_and_b64 exec, s[12:13], s[2:3] // 00000058: 87FE020C | |
s_or_b32 s12, exec_lo, exec_hi // 0000005C: 880C7F7E | |
s_cmp_eq_i32 s12, 0 // 00000060: BF00800C | |
s_cbranch_scc1 label_001C // 00000064: BF850002 | |
v_add_i32 v3, vcc, 1, v3 // 00000068: 4A060681 | |
s_branch label_0010 // 0000006C: BF82FFF4 | |
label_001C: | |
s_mov_b64 exec, s[0:1] // 00000070: BEFE0400 | |
v_mov_b32 v2, v3 // 00000074: 7E040303 | |
v_mov_b32 v4, v3 // 00000078: 7E080303 | |
v_mov_b32 v5, v3 // 0000007C: 7E0A0303 | |
image_store v[2:5], v[0:3], s[4:11] dmask:0xf unorm glc // 00000080: F0203F00 00010200 | |
s_endpgm // 00000088: BF810000 | |
end | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment