@@ -46,9 +46,8 @@ body: |
4646 ; GFX90A: liveins: $vgpr0_vgpr1
4747 ; GFX90A-NEXT: {{ $}}
4848 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
49- ; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
50- ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[DEF]]
51- ; GFX90A-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec
49+ ; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
50+ ; GFX90A-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF]], 0, 0, implicit $exec
5251 %0:vreg_64_align2 = COPY $vgpr0_vgpr1
5352 %1:vreg_64 = IMPLICIT_DEF
5453 %2:vreg_64_align2 = COPY killed %1
@@ -148,9 +147,8 @@ body: |
148147 ; GFX90A: liveins: $vgpr0_vgpr1
149148 ; GFX90A-NEXT: {{ $}}
150149 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
151- ; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_96 = IMPLICIT_DEF
152- ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vreg_96_align2 = COPY killed [[DEF]]
153- ; GFX90A-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec
150+ ; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_96_align2 = IMPLICIT_DEF
151+ ; GFX90A-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[DEF]], 0, 0, implicit $exec
154152 %0:vreg_64_align2 = COPY $vgpr0_vgpr1
155153 %1:vreg_96 = IMPLICIT_DEF
156154 %2:vreg_96_align2 = COPY killed %1
@@ -326,11 +324,59 @@ body: |
326324 ; GFX90A: liveins: $vgpr0_vgpr1
327325 ; GFX90A-NEXT: {{ $}}
328326 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
329- ; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
330- ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY killed [[DEF]]
331- ; GFX90A-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec
327+ ; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
328+ ; GFX90A-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[DEF]], 0, 0, implicit $exec
332329 %0:vreg_64_align2 = COPY $vgpr0_vgpr1
333330 %1:vreg_128 = IMPLICIT_DEF
334331 %2:vreg_128_align2 = COPY killed %1
335332 GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
336333 ...
334+
335+ # Make sure the alignment requirement is respected for VS_64 operand
336+ # uses.
337+ ---
338+ name : aligned_vgpr_vs_64_constraint
339+ tracksRegLiveness : true
340+ isSSA : true
341+ body : |
342+ bb.0.entry:
343+ liveins: $vgpr0, $sgpr8_sgpr9
344+
345+ ; GFX908-LABEL: name: aligned_vgpr_vs_64_constraint
346+ ; GFX908: liveins: $vgpr0, $sgpr8_sgpr9
347+ ; GFX908-NEXT: {{ $}}
348+ ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
349+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
350+ ; GFX908-NEXT: [[GLOBAL_LOAD_DWORDX3_SADDR:%[0-9]+]]:vreg_96_align2 = GLOBAL_LOAD_DWORDX3_SADDR [[COPY]], [[COPY1]], 16, 0, implicit $exec :: (load (s96), align 4, addrspace 1)
351+ ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX3_SADDR]].sub0
352+ ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
353+ ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
354+ ; GFX908-NEXT: [[V_PK_ADD_F32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, killed [[REG_SEQUENCE]], 0, [[GLOBAL_LOAD_DWORDX3_SADDR]].sub1_sub2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
355+ ; GFX908-NEXT: DS_WRITE_B64_gfx9 [[V_MOV_B32_e32_]], killed [[V_PK_ADD_F32_]], 0, 0, implicit $exec :: (store (s64), addrspace 3)
356+ ; GFX908-NEXT: S_ENDPGM 0
357+ ;
358+ ; GFX90A-LABEL: name: aligned_vgpr_vs_64_constraint
359+ ; GFX90A: liveins: $vgpr0, $sgpr8_sgpr9
360+ ; GFX90A-NEXT: {{ $}}
361+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
362+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
363+ ; GFX90A-NEXT: [[GLOBAL_LOAD_DWORDX3_SADDR:%[0-9]+]]:vreg_96_align2 = GLOBAL_LOAD_DWORDX3_SADDR [[COPY]], [[COPY1]], 16, 0, implicit $exec :: (load (s96), align 4, addrspace 1)
364+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX3_SADDR]].sub0
365+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:vreg_64_align2 = COPY killed [[GLOBAL_LOAD_DWORDX3_SADDR]].sub1_sub2
366+ ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
367+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
368+ ; GFX90A-NEXT: [[V_PK_ADD_F32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, killed [[REG_SEQUENCE]], 0, killed [[COPY3]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
369+ ; GFX90A-NEXT: DS_WRITE_B64_gfx9 [[V_MOV_B32_e32_]], killed [[V_PK_ADD_F32_]], 0, 0, implicit $exec :: (store (s64), addrspace 3)
370+ ; GFX90A-NEXT: S_ENDPGM 0
371+ %0:sgpr_64 = COPY $sgpr8_sgpr9
372+ %1:vgpr_32 = COPY $vgpr0
373+ %2:vreg_96_align2 = GLOBAL_LOAD_DWORDX3_SADDR %0, %1, 16, 0, implicit $exec :: (load (s96), align 4, addrspace 1)
374+ %3:vgpr_32 = COPY %2.sub0
375+ %4:vreg_64_align2 = COPY killed %2.sub1_sub2
376+ %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
377+ %6:vreg_64_align2 = REG_SEQUENCE %3, %subreg.sub0, %5, %subreg.sub1
378+ %7:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, killed %6, 0, killed %4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
379+ DS_WRITE_B64_gfx9 %5, killed %7, 0, 0, implicit $exec :: (store (s64), addrspace 3)
380+ S_ENDPGM 0
381+
382+ ...
0 commit comments