Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 4c690f3

Browse files
committed
[X86] Teach X86FixupBWInsts to promote MOV8rr/MOV16rr to MOV32rr.
This re-applies r268760, reverted in r268794. Fixes http://llvm.org/PR27670 The original imp-defs assertion was way overzealous: forward all implicit operands, except imp-defs of the new super-reg def (r268787 for GR64, but also possible for GR16->GR32), or imp-uses of the new super-reg use. While there, mark the source use as Undef, and add an imp-use of the old source reg: that should cover any case of dead super-regs. At the stage the pass runs, flags are unlikely to matter anyway; still, let's be as correct as possible. Also add MIR tests for the various interesting cases. Original commit message: Codesize is less (16) or equal (8), and we avoid partial dependencies. Differential Revision: http://reviews.llvm.org/D19999 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268831 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 9ea4c64 commit 4c690f3

18 files changed

+503
-227
lines changed

lib/Target/X86/X86FixupBWInsts.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,11 @@ class FixupBWInstPass : public MachineFunctionPass {
9090
/// OK, otherwise return nullptr.
9191
MachineInstr *tryReplaceLoad(unsigned New32BitOpcode, MachineInstr *MI) const;
9292

93+
/// Change the MachineInstr \p MI into the equivalent 32-bit copy if it is
94+
/// safe to do so. Return the replacement instruction if OK, otherwise return
95+
/// nullptr.
96+
MachineInstr *tryReplaceCopy(MachineInstr *MI) const;
97+
9398
public:
9499
static char ID;
95100

@@ -226,6 +231,42 @@ MachineInstr *FixupBWInstPass::tryReplaceLoad(unsigned New32BitOpcode,
226231
return MIB;
227232
}
228233

234+
MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const {
235+
assert(MI->getNumExplicitOperands() == 2);
236+
auto &OldDest = MI->getOperand(0);
237+
auto &OldSrc = MI->getOperand(1);
238+
239+
unsigned NewDestReg;
240+
if (!getSuperRegDestIfDead(MI, NewDestReg))
241+
return nullptr;
242+
243+
unsigned NewSrcReg = getX86SubSuperRegister(OldSrc.getReg(), 32);
244+
245+
// This is only correct if we access the same subregister index: otherwise,
246+
// we could try to replace "movb %ah, %al" with "movl %eax, %eax".
247+
auto *TRI = &TII->getRegisterInfo();
248+
if (TRI->getSubRegIndex(NewSrcReg, OldSrc.getReg()) !=
249+
TRI->getSubRegIndex(NewDestReg, OldDest.getReg()))
250+
return nullptr;
251+
252+
// Safe to change the instruction.
253+
// Don't set src flags, as we don't know if we're also killing the superreg.
254+
// However, the superregister might not be defined; make it explicit that
255+
// we don't care about the higher bits by reading it as Undef, and adding
256+
// an imp-use on the original subregister.
257+
MachineInstrBuilder MIB =
258+
BuildMI(*MF, MI->getDebugLoc(), TII->get(X86::MOV32rr), NewDestReg)
259+
.addReg(NewSrcReg, RegState::Undef)
260+
.addReg(OldSrc.getReg(), RegState::Implicit);
261+
262+
// Drop imp-defs/uses that would be redundant with the new def/use.
263+
for (auto &Op : MI->implicit_operands())
264+
if (Op.getReg() != (Op.isDef() ? NewDestReg : NewSrcReg))
265+
MIB.addOperand(Op);
266+
267+
return MIB;
268+
}
269+
229270
void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
230271
MachineBasicBlock &MBB) {
231272

@@ -272,6 +313,15 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
272313
NewMI = tryReplaceLoad(X86::MOVZX32rm16, MI);
273314
break;
274315

316+
case X86::MOV8rr:
317+
case X86::MOV16rr:
318+
// Always try to replace 8/16 bit copies with a 32 bit copy.
319+
// Code size is either less (16) or equal (8), and there is sometimes a
320+
// perf advantage from eliminating a false dependence on the upper portion
321+
// of the register.
322+
NewMI = tryReplaceCopy(MI);
323+
break;
324+
275325
default:
276326
// nothing to do here.
277327
break;

test/CodeGen/X86/2011-06-14-PreschedRegalias.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
define i8 @f(i8 %v1, i8 %v2) nounwind {
77
entry:
88
; CHECK: callq
9-
; CHECK: movb %{{.*}}, %al
9+
; CHECK: movl %{{.*}}, %eax
1010
; CHECK: mulb
1111
; CHECK: mulb
1212
%rval = tail call i8 @bar() nounwind

test/CodeGen/X86/anyext.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ define i32 @bar(i32 %p, i16 zeroext %x) nounwind {
3939
; X64-LABEL: bar:
4040
; X64: # BB#0:
4141
; X64-NEXT: xorl %edx, %edx
42-
; X64-NEXT: movw %di, %ax
42+
; X64-NEXT: movl %edi, %eax
4343
; X64-NEXT: divw %si
4444
; X64-NEXT: andl $1, %eax
4545
; X64-NEXT: retq

test/CodeGen/X86/avx512-calling-conv.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ define i32 @test12(i32 %a1, i32 %a2, i32 %b1) {
461461
; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
462462
; KNL_X32-NEXT: movl %edi, (%esp)
463463
; KNL_X32-NEXT: calll _test11
464-
; KNL_X32-NEXT: movb %al, %bl
464+
; KNL_X32-NEXT: movl %eax, %ebx
465465
; KNL_X32-NEXT: movzbl %bl, %eax
466466
; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
467467
; KNL_X32-NEXT: movl %esi, {{[0-9]+}}(%esp)

test/CodeGen/X86/avx512-mask-op.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ define i16 @mand16(i16 %x, i16 %y) {
8181
; CHECK-NEXT: xorl %esi, %eax
8282
; CHECK-NEXT: andl %esi, %edi
8383
; CHECK-NEXT: orl %eax, %edi
84-
; CHECK-NEXT: movw %di, %ax
84+
; CHECK-NEXT: movl %edi, %eax
8585
; CHECK-NEXT: retq
8686
%ma = bitcast i16 %x to <16 x i1>
8787
%mb = bitcast i16 %y to <16 x i1>

test/CodeGen/X86/avx512-select.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ define i8 @select05(i8 %a.0, i8 %m) {
7272
; CHECK-LABEL: select05:
7373
; CHECK: ## BB#0:
7474
; CHECK-NEXT: orl %esi, %edi
75-
; CHECK-NEXT: movb %dil, %al
75+
; CHECK-NEXT: movl %edi, %eax
7676
; CHECK-NEXT: retq
7777
%mask = bitcast i8 %m to <8 x i1>
7878
%a = bitcast i8 %a.0 to <8 x i1>
@@ -102,7 +102,7 @@ define i8 @select06(i8 %a.0, i8 %m) {
102102
; CHECK-LABEL: select06:
103103
; CHECK: ## BB#0:
104104
; CHECK-NEXT: andl %esi, %edi
105-
; CHECK-NEXT: movb %dil, %al
105+
; CHECK-NEXT: movl %edi, %eax
106106
; CHECK-NEXT: retq
107107
%mask = bitcast i8 %m to <8 x i1>
108108
%a = bitcast i8 %a.0 to <8 x i1>

test/CodeGen/X86/avx512dq-mask-op.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ define i8 @mand8(i8 %x, i8 %y) {
3636
; CHECK-NEXT: xorl %esi, %eax
3737
; CHECK-NEXT: andl %esi, %edi
3838
; CHECK-NEXT: orl %eax, %edi
39-
; CHECK-NEXT: movb %dil, %al
39+
; CHECK-NEXT: movl %edi, %eax
4040
; CHECK-NEXT: retq
4141
%ma = bitcast i8 %x to <8 x i1>
4242
%mb = bitcast i8 %y to <8 x i1>

test/CodeGen/X86/cmovcmov.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -250,14 +250,14 @@ attributes #0 = { nounwind }
250250
; CMOV-DAG: movb $20, %al
251251
; CMOV-DAG: movb $20, %dl
252252
; CMOV: jl [[BB0:.LBB[0-9_]+]]
253-
; CMOV: movb %cl, %dl
253+
; CMOV: movl %ecx, %edx
254254
; CMOV: [[BB0]]:
255255
; CMOV: jg [[BB1:.LBB[0-9_]+]]
256-
; CMOV: movb %dl, %al
256+
; CMOV: movl %edx, %eax
257257
; CMOV: [[BB1]]:
258258
; CMOV: testl %edi, %edi
259259
; CMOV: je [[BB2:.LBB[0-9_]+]]
260-
; CMOV: movb %dl, %al
260+
; CMOV: movl %edx, %eax
261261
; CMOV: [[BB2]]:
262262
; CMOV: movb %al, g8(%rip)
263263
; CMOV: retq

test/CodeGen/X86/fixup-bw-copy.ll

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
2+
; RUN: llc -verify-machineinstrs -fixup-byte-word-insts=1 -mtriple=x86_64-- < %s | FileCheck --check-prefix=X64 --check-prefix=BWON64 %s
3+
; RUN: llc -verify-machineinstrs -fixup-byte-word-insts=0 -mtriple=x86_64-- < %s | FileCheck --check-prefix=X64 --check-prefix=BWOFF64 %s
4+
; RUN: llc -verify-machineinstrs -fixup-byte-word-insts=1 -mtriple=i386-- < %s | FileCheck --check-prefix=X32 --check-prefix=BWON32 %s
5+
; RUN: llc -verify-machineinstrs -fixup-byte-word-insts=0 -mtriple=i386-- < %s | FileCheck --check-prefix=X32 --check-prefix=BWOFF32 %s
6+
7+
target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
8+
9+
define i8 @test_movb(i8 %a0) {
10+
; BWON64-LABEL: test_movb:
11+
; BWON64: # BB#0:
12+
; BWON64-NEXT: movl %edi, %eax
13+
; BWON64-NEXT: retq
14+
;
15+
; BWOFF64-LABEL: test_movb:
16+
; BWOFF64: # BB#0:
17+
; BWOFF64-NEXT: movb %dil, %al
18+
; BWOFF64-NEXT: retq
19+
;
20+
; X32-LABEL: test_movb:
21+
; X32: # BB#0:
22+
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
23+
; X32-NEXT: retl
24+
ret i8 %a0
25+
}
26+
27+
define i16 @test_movw(i16 %a0) {
28+
; BWON64-LABEL: test_movw:
29+
; BWON64: # BB#0:
30+
; BWON64-NEXT: movl %edi, %eax
31+
; BWON64-NEXT: retq
32+
;
33+
; BWOFF64-LABEL: test_movw:
34+
; BWOFF64: # BB#0:
35+
; BWOFF64-NEXT: movw %di, %ax
36+
; BWOFF64-NEXT: retq
37+
;
38+
; BWON32-LABEL: test_movw:
39+
; BWON32: # BB#0:
40+
; BWON32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
41+
; BWON32-NEXT: retl
42+
;
43+
; BWOFF32-LABEL: test_movw:
44+
; BWOFF32: # BB#0:
45+
; BWOFF32-NEXT: movw {{[0-9]+}}(%esp), %ax
46+
; BWOFF32-NEXT: retl
47+
ret i16 %a0
48+
}
49+
50+
; Verify we don't mess with H-reg copies (only generated in 32-bit mode).
51+
define i8 @test_movb_hreg(i16 %a0) {
52+
; X64-LABEL: test_movb_hreg:
53+
; X64: # BB#0:
54+
; X64-NEXT: movl %edi, %eax
55+
; X64-NEXT: shrl $8, %eax
56+
; X64-NEXT: addb %dil, %al
57+
; X64-NEXT: retq
58+
;
59+
; X32-LABEL: test_movb_hreg:
60+
; X32: # BB#0:
61+
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
62+
; X32-NEXT: addb %al, %ah
63+
; X32-NEXT: movb %ah, %al
64+
; X32-NEXT: retl
65+
%tmp0 = trunc i16 %a0 to i8
66+
%tmp1 = lshr i16 %a0, 8
67+
%tmp2 = trunc i16 %tmp1 to i8
68+
%tmp3 = add i8 %tmp0, %tmp2
69+
ret i8 %tmp3
70+
}

test/CodeGen/X86/fixup-bw-copy.mir

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
# RUN: llc -run-pass x86-fixup-bw-insts -mtriple=x86_64-- -o /dev/null %s 2>&1 | FileCheck %s
2+
3+
# Verify that we correctly deal with the flag edge cases when replacing
4+
# copies by bigger copies, which is a pretty unusual transform.
5+
6+
--- |
7+
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
8+
9+
define i8 @test_movb_killed(i8 %a0) {
10+
ret i8 %a0
11+
}
12+
13+
define i8 @test_movb_impuse(i8 %a0) {
14+
ret i8 %a0
15+
}
16+
17+
define i8 @test_movb_impdef_gr64(i8 %a0) {
18+
ret i8 %a0
19+
}
20+
21+
define i8 @test_movb_impdef_gr32(i8 %a0) {
22+
ret i8 %a0
23+
}
24+
25+
define i8 @test_movb_impdef_gr16(i8 %a0) {
26+
ret i8 %a0
27+
}
28+
29+
define i16 @test_movw_impdef_gr32(i16 %a0) {
30+
ret i16 %a0
31+
}
32+
33+
define i16 @test_movw_impdef_gr64(i16 %a0) {
34+
ret i16 %a0
35+
}
36+
37+
...
38+
39+
---
40+
name: test_movb_killed
41+
allVRegsAllocated: true
42+
isSSA: false
43+
tracksRegLiveness: true
44+
liveins:
45+
- { reg: '%edi' }
46+
body: |
47+
bb.0 (%ir-block.0):
48+
liveins: %edi
49+
50+
; CHECK: %eax = MOV32rr undef %edi, implicit %dil
51+
%al = MOV8rr killed %dil
52+
RETQ killed %al
53+
54+
...
55+
56+
---
57+
name: test_movb_impuse
58+
allVRegsAllocated: true
59+
isSSA: false
60+
tracksRegLiveness: true
61+
liveins:
62+
- { reg: '%edi' }
63+
body: |
64+
bb.0 (%ir-block.0):
65+
liveins: %edi
66+
67+
; CHECK: %eax = MOV32rr undef %edi, implicit %dil
68+
%al = MOV8rr %dil, implicit %edi
69+
RETQ killed %al
70+
71+
...
72+
73+
---
74+
name: test_movb_impdef_gr64
75+
allVRegsAllocated: true
76+
isSSA: false
77+
tracksRegLiveness: true
78+
liveins:
79+
- { reg: '%edi' }
80+
body: |
81+
bb.0 (%ir-block.0):
82+
liveins: %edi
83+
84+
; CHECK: %eax = MOV32rr undef %edi, implicit %dil, implicit-def %rax
85+
%al = MOV8rr %dil, implicit-def %rax
86+
RETQ killed %al
87+
88+
...
89+
90+
---
91+
name: test_movb_impdef_gr32
92+
allVRegsAllocated: true
93+
isSSA: false
94+
tracksRegLiveness: true
95+
liveins:
96+
- { reg: '%edi' }
97+
body: |
98+
bb.0 (%ir-block.0):
99+
liveins: %edi
100+
101+
; CHECK: %eax = MOV32rr undef %edi, implicit %dil
102+
%al = MOV8rr %dil, implicit-def %eax
103+
RETQ killed %al
104+
105+
...
106+
107+
---
108+
name: test_movb_impdef_gr16
109+
allVRegsAllocated: true
110+
isSSA: false
111+
tracksRegLiveness: true
112+
liveins:
113+
- { reg: '%edi' }
114+
body: |
115+
bb.0 (%ir-block.0):
116+
liveins: %edi
117+
118+
; CHECK: %eax = MOV32rr undef %edi, implicit %dil
119+
%al = MOV8rr %dil, implicit-def %ax
120+
RETQ killed %al
121+
122+
...
123+
124+
---
125+
name: test_movw_impdef_gr32
126+
allVRegsAllocated: true
127+
isSSA: false
128+
tracksRegLiveness: true
129+
liveins:
130+
- { reg: '%edi' }
131+
body: |
132+
bb.0 (%ir-block.0):
133+
liveins: %edi
134+
135+
; CHECK: %eax = MOV32rr undef %edi, implicit %di
136+
%ax = MOV16rr %di, implicit-def %eax
137+
RETQ killed %ax
138+
139+
...
140+
141+
---
142+
name: test_movw_impdef_gr64
143+
allVRegsAllocated: true
144+
isSSA: false
145+
tracksRegLiveness: true
146+
liveins:
147+
- { reg: '%edi' }
148+
body: |
149+
bb.0 (%ir-block.0):
150+
liveins: %edi
151+
152+
; CHECK: %eax = MOV32rr undef %edi, implicit %di, implicit-def %rax
153+
%ax = MOV16rr %di, implicit-def %rax
154+
RETQ killed %ax
155+
156+
...

0 commit comments

Comments
 (0)