-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Closed
Labels
Description
Reproducer:
define amdgpu_kernel void @ker_TestDynamicAllocInAllThreads_CodeObj(i64 %perThreadSize) {
entry:
%mul4 = shl i64 %perThreadSize, 1
%call.i = tail call fastcc ptr addrspace(1) @__ockl_dm_alloc(i64 %mul4)
ret void
}
declare fastcc ptr addrspace(1) @__ockl_dm_alloc(i64 inreg)
Machine function:
# Machine code for function ker_TestDynamicAllocInAllThreads_CodeObj: NoPHIs, TracksLiveness, NoVRegs, TiedOpsRewritten, TracksDebugUserValues
Function Live Ins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr12, $sgpr13, $sgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr15, $sgpr10_sgpr11
bb.0.entry:
liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr15, $sgpr10_sgpr11
$sgpr32 = S_MOV_B32 0
$sgpr10 = S_ADD_U32 $sgpr10, $sgpr15, implicit-def $scc
$sgpr11 = S_ADDC_U32 $sgpr11, 0, implicit-def dead $scc, implicit $scc
S_SETREG_B32 $sgpr10, -2028, implicit-def $mode, implicit $mode
S_SETREG_B32 $sgpr11, -2027, implicit-def $mode, implicit $mode
$sgpr0 = S_ADD_U32 $sgpr0, $sgpr15, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
$sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
renamable $sgpr10_sgpr11 = COPY $sgpr8_sgpr9
renamable $sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM renamable $sgpr6_sgpr7, 0, 0 :: (dereferenceable invariant load (s64) from %ir.perThreadSize.kernarg.offset1, align 16, addrspace 4)
$sgpr0_sgpr1 = S_LSHL_B64 renamable $sgpr16_sgpr17, 1, implicit-def dead $scc
renamable $sgpr8 = S_ADD_U32 renamable $sgpr6, 8, implicit-def $scc
renamable $sgpr9 = S_ADDC_U32 killed renamable $sgpr7, 0, implicit-def dead $scc, implicit $scc
renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @__ockl_dm_alloc, target-flags(amdgpu-gotprel32-hi) @__ockl_dm_alloc, implicit-def dead $scc
renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM killed renamable $sgpr6_sgpr7, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
renamable $vgpr2 = V_LSHLREV_B32_e64 20, killed $vgpr2, implicit $exec
renamable $vgpr1 = V_LSHLREV_B32_e64 10, killed $vgpr1, implicit $exec
renamable $vgpr3 = V_ALIGNBIT_B32_e64 killed $sgpr17, $sgpr16, 31, implicit $exec
renamable $vgpr31 = V_OR3_B32_e64 killed $vgpr0, killed $vgpr1, killed $vgpr2, implicit $exec
$sgpr1 = COPY killed renamable $vgpr3
dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @__ockl_dm_alloc, <regmask $sgpr_null $sgpr_null_hi $src_private_base $src_private_base_hi $src_private_base_lo $src_private_limit $src_private_limit_hi $src_private_limit_lo $src_shared_base $src_shared_base_hi $src_shared_base_lo $src_shared_limit $src_shared_limit_hi $src_shared_limit_lo $sgpr30 $sgpr31 $sgpr32 $sgpr33 $sgpr34 $sgpr35 $sgpr36 $sgpr37 $sgpr38 $sgpr39 $sgpr40 $sgpr41 $sgpr42 $sgpr43 $sgpr44 $sgpr45 $sgpr46 $sgpr47 $sgpr48 and 1194 more...>, implicit $sgpr4_sgpr5, implicit undef $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit undef $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0, implicit $sgpr1, implicit-def dead $vgpr0, implicit-def dead $vgpr1
S_ENDPGM 0
# End machine code for function ker_TestDynamicAllocInAllThreads_CodeObj.
Target/faulty machine instruction:
$sgpr1 = COPY killed renamable $vgpr3
Command:
llc -mtriple amdgcn-amd-amdhsa -mcpu gfx1030 reduced.ll -o -
This was exposed by #101609 when compiling hip-tests.