Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2935,6 +2935,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
widenScalarDst(MI, WideTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could it use setInsertPr(.., MI) or MI->getIterator()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. That would insert before the original instruction, this needs to be after the instruction (and after the other already inserted instruction)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

widenScalarDst would increment it again so that it inserts after. It would just put the truncs in a different order.

widenScalarDst(MI, WideTy, 1);
Observer.changedInstr(MI);
return Legalized;
Expand Down Expand Up @@ -2972,6 +2973,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
widenScalarDst(MI, WideTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
widenScalarDst(MI, WideTy, 1);
Observer.changedInstr(MI);
return Legalized;
Expand Down
125 changes: 125 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-divrem.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s

---
name: test_sdivrem_s16
body: |
bb.0:
liveins: $vgpr0, $vgpr1

; GFX8-LABEL: name: test_sdivrem_s16
; GFX8: liveins: $vgpr0, $vgpr1
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16
; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]]
; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32)
; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]]
; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]]
; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]]
; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]]
; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]]
; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]]
; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]]
; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]]
; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]]
; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[XOR1]]
; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT1]]
; GFX8-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]]
; GFX8-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]]
; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
; GFX8-NEXT: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[ASHR]]
; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SUB4]](s32)
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB5]](s32)
; GFX8-NEXT: S_NOP 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16)
; GFX8-NEXT: $vgpr0 = COPY [[SUB4]](s32)
; GFX8-NEXT: $vgpr0 = COPY [[SUB5]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s16) = G_TRUNC %0
%3:_(s16) = G_TRUNC %1
%4:_(s16), %5:_(s16) = G_SDIVREM %2, %3
S_NOP 0, implicit %4, implicit %5
%6:_(s32) = G_ANYEXT %4
%7:_(s32) = G_ANYEXT %5
$vgpr0 = COPY %6
$vgpr0 = COPY %7

...

---
name: test_udivrem_s16
body: |
bb.0:
liveins: $vgpr0, $vgpr1

; GFX8-LABEL: name: test_udivrem_s16
; GFX8: liveins: $vgpr0, $vgpr1
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
; GFX8-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32)
; GFX8-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]]
; GFX8-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]]
; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]]
; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]]
; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]]
; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]]
; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]]
; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]]
; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]]
; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]]
; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[AND1]]
; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT1]]
; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT2]](s32)
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT3]](s32)
; GFX8-NEXT: S_NOP 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16)
; GFX8-NEXT: $vgpr0 = COPY [[SELECT2]](s32)
; GFX8-NEXT: $vgpr0 = COPY [[SELECT3]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s16) = G_TRUNC %0
%3:_(s16) = G_TRUNC %1
%4:_(s16), %5:_(s16) = G_UDIVREM %2, %3
S_NOP 0, implicit %4, implicit %5
%6:_(s32) = G_ANYEXT %4
%7:_(s32) = G_ANYEXT %5
$vgpr0 = COPY %6
$vgpr0 = COPY %7

...