|
.amdgcn_target "amdgcn-amd-amdhsa--gfx942" |
|
.amdhsa_code_object_version 6 |
|
.text |
|
.globl kernel |
|
.p2align 8 |
|
.type kernel,@function |
|
kernel: |
|
kernel$local: |
|
.type kernel$local,@function |
|
v_mov_b32_dpp v1, v0 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v1, v1 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v1 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
v_mbcnt_lo_u32_b32 v1, -1, 0 |
|
v_mbcnt_hi_u32_b32 v1, -1, v1 |
|
v_and_b32_e32 v4, 64, v1 |
|
v_xor_b32_e32 v2, 1, v1 |
|
v_add_u32_e32 v4, 64, v4 |
|
v_cmp_lt_u32_e32 vcc, v2, v4 |
|
s_nop 1 |
|
v_cndmask_b32_e32 v2, v1, v2, vcc |
|
v_lshlrev_b32_e32 v2, 2, v2 |
|
ds_bpermute_b32 v3, v2, v3 |
|
v_cmp_eq_u32_e32 vcc, 0, v1 |
|
s_waitcnt lgkmcnt(0) |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v3, v2, v3 |
|
s_waitcnt lgkmcnt(0) |
|
s_nop 0 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
s_nop 1 |
|
v_mov_b32_dpp v3, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf bound_ctrl:1 |
|
ds_bpermute_b32 v2, v2, v3 |
|
s_and_saveexec_b64 s[4:5], vcc |
|
s_cbranch_execz .LBB0_2 |
|
s_load_dword s6, s[0:1], 0xc |
|
s_load_dwordx2 s[4:5], s[0:1], 0x0 |
|
s_ashr_i32 s3, s2, 31 |
|
v_lshrrev_b32_e32 v0, 6, v0 |
|
v_mov_b32_e32 v1, 0 |
|
s_waitcnt lgkmcnt(0) |
|
s_ashr_i32 s0, s6, 31 |
|
v_lshl_add_u64 v[0:1], s[2:3], 3, v[0:1] |
|
v_mul_lo_u32 v3, v1, s6 |
|
v_mul_lo_u32 v4, v0, s0 |
|
v_mad_u64_u32 v[0:1], s[0:1], v0, s6, 0 |
|
v_add3_u32 v1, v1, v4, v3 |
|
v_lshl_add_u64 v[0:1], v[0:1], 2, s[4:5] |
|
global_store_dword v[0:1], v2, off |
|
.LBB0_2: |
|
s_endpgm |
|
.section .rodata,"a",@progbits |
|
.p2align 6, 0x0 |
|
.amdhsa_kernel kernel |
|
.amdhsa_group_segment_fixed_size 0 |
|
.amdhsa_private_segment_fixed_size 0 |
|
.amdhsa_kernarg_size 24 |
|
.amdhsa_user_sgpr_count 2 |
|
.amdhsa_user_sgpr_dispatch_ptr 0 |
|
.amdhsa_user_sgpr_queue_ptr 0 |
|
.amdhsa_user_sgpr_kernarg_segment_ptr 1 |
|
.amdhsa_user_sgpr_dispatch_id 0 |
|
.amdhsa_user_sgpr_kernarg_preload_length 0 |
|
.amdhsa_user_sgpr_kernarg_preload_offset 0 |
|
.amdhsa_user_sgpr_private_segment_size 0 |
|
.amdhsa_uses_dynamic_stack 0 |
|
.amdhsa_enable_private_segment 0 |
|
.amdhsa_system_sgpr_workgroup_id_x 1 |
|
.amdhsa_system_sgpr_workgroup_id_y 0 |
|
.amdhsa_system_sgpr_workgroup_id_z 0 |
|
.amdhsa_system_sgpr_workgroup_info 0 |
|
.amdhsa_system_vgpr_workitem_id 0 |
|
.amdhsa_next_free_vgpr 5 |
|
.amdhsa_next_free_sgpr 7 |
|
.amdhsa_accum_offset 8 |
|
.amdhsa_reserve_vcc 1 |
|
.amdhsa_float_round_mode_32 0 |
|
.amdhsa_float_round_mode_16_64 0 |
|
.amdhsa_float_denorm_mode_32 3 |
|
.amdhsa_float_denorm_mode_16_64 3 |
|
.amdhsa_dx10_clamp 1 |
|
.amdhsa_ieee_mode 1 |
|
.amdhsa_fp16_overflow 0 |
|
.amdhsa_tg_split 0 |
|
.amdhsa_exception_fp_ieee_invalid_op 0 |
|
.amdhsa_exception_fp_denorm_src 0 |
|
.amdhsa_exception_fp_ieee_div_zero 0 |
|
.amdhsa_exception_fp_ieee_overflow 0 |
|
.amdhsa_exception_fp_ieee_underflow 0 |
|
.amdhsa_exception_fp_ieee_inexact 0 |
|
.amdhsa_exception_int_div_zero 0 |
|
.end_amdhsa_kernel |
|
.text |
|
.Lfunc_end0: |
|
.size kernel, .Lfunc_end0-kernel |
|
.size kernel$local, .Lfunc_end0-kernel |
|
|
|
.set kernel.num_vgpr, 5 |
|
.set kernel.num_agpr, 0 |
|
.set kernel.numbered_sgpr, 7 |
|
.set kernel.private_seg_size, 0 |
|
.set kernel.uses_vcc, 1 |
|
.set kernel.uses_flat_scratch, 0 |
|
.set kernel.has_dyn_sized_stack, 0 |
|
.set kernel.has_recursion, 0 |
|
.set kernel.has_indirect_call, 0 |
|
.p2alignl 6, 3212836864 |
|
.fill 256, 4, 3212836864 |
|
.section .AMDGPU.gpr_maximums,"",@progbits |
|
.set amdgpu.max_num_vgpr, 0 |
|
.set amdgpu.max_num_agpr, 0 |
|
.set amdgpu.max_num_sgpr, 0 |
|
.text |
|
.section ".note.GNU-stack","",@progbits |
|
.amdgpu_metadata |
|
--- |
|
amdhsa.kernels: |
|
- .agpr_count: 0 |
|
.args: |
|
- .offset: 0 |
|
.size: 24 |
|
.value_kind: by_value |
|
.group_segment_fixed_size: 0 |
|
.kernarg_segment_align: 8 |
|
.kernarg_segment_size: 24 |
|
.max_flat_workgroup_size: 1024 |
|
.name: kernel |
|
.private_segment_fixed_size: 0 |
|
.sgpr_count: 13 |
|
.sgpr_spill_count: 0 |
|
.symbol: kernel.kd |
|
.uses_dynamic_stack: false |
|
.vgpr_count: 5 |
|
.vgpr_spill_count: 0 |
|
.wavefront_size: 64 |
|
amdhsa.target: amdgcn-amd-amdhsa--gfx942 |
|
amdhsa.version: |
|
- 1 |
|
- 2 |
|
... |
|
|
|
.end_amdgpu_metadata |