Skip to content

Commit 154ed2a

Browse files
author
Guy Blank
committed
[X86][FastISel] Use a COPY from K register to a GPR instead of a K operation
The KORTEST was introduced due to a bug where a TEST instruction used a K register. but, turns out that the opposite case of KORTEST using a GPR is now happening The change removes the KORTEST flow and adds a COPY instruction from the K reg to a GPR. Differential Revision: https://reviews.llvm.org/D24953 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282580 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 926244f commit 154ed2a

File tree

4 files changed

+54
-31
lines changed

4 files changed

+54
-31
lines changed

lib/Target/X86/X86FastISel.cpp

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1731,15 +1731,17 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
17311731
unsigned OpReg = getRegForValue(BI->getCondition());
17321732
if (OpReg == 0) return false;
17331733

1734-
// In case OpReg is a K register, kortest against itself.
1735-
if (MRI.getRegClass(OpReg) == &X86::VK1RegClass)
1736-
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::KORTESTWrr))
1737-
.addReg(OpReg)
1738-
.addReg(OpReg);
1739-
else
1740-
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1741-
.addReg(OpReg)
1742-
.addImm(1);
1734+
// In case OpReg is a K register, COPY to a GPR
1735+
if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1736+
unsigned KOpReg = OpReg;
1737+
OpReg = createResultReg(&X86::GR8RegClass);
1738+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1739+
TII.get(TargetOpcode::COPY), OpReg)
1740+
.addReg(KOpReg);
1741+
}
1742+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1743+
.addReg(OpReg)
1744+
.addImm(1);
17431745
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
17441746
.addMBB(TrueMBB);
17451747
finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
@@ -2073,16 +2075,17 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
20732075
return false;
20742076
bool CondIsKill = hasTrivialKill(Cond);
20752077

2076-
// In case OpReg is a K register, kortest against itself.
2077-
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass)
2078+
// In case OpReg is a K register, COPY to a GPR
2079+
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2080+
unsigned KCondReg = CondReg;
2081+
CondReg = createResultReg(&X86::GR8RegClass);
20782082
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2079-
TII.get(X86::KORTESTWrr))
2080-
.addReg(CondReg, getKillRegState(CondIsKill))
2081-
.addReg(CondReg);
2082-
else
2083-
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2084-
.addReg(CondReg, getKillRegState(CondIsKill))
2085-
.addImm(1);
2083+
TII.get(TargetOpcode::COPY), CondReg)
2084+
.addReg(KCondReg, getKillRegState(CondIsKill));
2085+
}
2086+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2087+
.addReg(CondReg, getKillRegState(CondIsKill))
2088+
.addImm(1);
20862089
}
20872090

20882091
const Value *LHS = I->getOperand(1);
@@ -2254,16 +2257,17 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
22542257
return false;
22552258
bool CondIsKill = hasTrivialKill(Cond);
22562259

2257-
// In case OpReg is a K register, kortest against itself.
2258-
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass)
2260+
// In case OpReg is a K register, COPY to a GPR
2261+
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2262+
unsigned KCondReg = CondReg;
2263+
CondReg = createResultReg(&X86::GR8RegClass);
22592264
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2260-
TII.get(X86::KORTESTWrr))
2261-
.addReg(CondReg, getKillRegState(CondIsKill))
2262-
.addReg(CondReg);
2263-
else
2264-
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2265-
.addReg(CondReg, getKillRegState(CondIsKill))
2266-
.addImm(1);
2265+
TII.get(TargetOpcode::COPY), CondReg)
2266+
.addReg(KCondReg, getKillRegState(CondIsKill));
2267+
}
2268+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2269+
.addReg(CondReg, getKillRegState(CondIsKill))
2270+
.addImm(1);
22672271
}
22682272

22692273
const Value *LHS = I->getOperand(1);

test/CodeGen/X86/avx512-fsel.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ define i32 @test(float %a, float %b) {
2626
; CHECK-NEXT: movb %dil, %r8b
2727
; CHECK-NEXT: andl $1, %r8d
2828
; CHECK-NEXT: kmovw %r8d, %k1
29-
; CHECK-NEXT: kortestw %k1, %k1
29+
; CHECK-NEXT: kmovw %k1, %ecx
30+
; CHECK-NEXT: testb $1, %cl
3031
; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) ## 1-byte Spill
3132
; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill
3233
; CHECK-NEXT: jne LBB0_1
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s
2+
3+
define i1 @test_i1(i1* %b) {
4+
; CHECK-LABEL: test_i1:
5+
; CHECK: # BB#0: # %entry
6+
; CHECK-NEXT: testb $1, (%rdi)
7+
entry:
8+
%0 = load i1, i1* %b, align 1
9+
br i1 %0, label %in, label %out
10+
in:
11+
ret i1 0
12+
out:
13+
ret i1 1
14+
}
15+

test/CodeGen/X86/fast-isel-select-cmov.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ define zeroext i16 @select_cmov_i16(i1 zeroext %cond, i16 zeroext %a, i16 zeroex
1616
; AVX512-LABEL: select_cmov_i16:
1717
; AVX512: ## BB#0:
1818
; AVX512-NEXT: kmovw %edi, %k0
19-
; AVX512-NEXT: kortestw %k0, %k0
19+
; AVX512-NEXT: kmovw %k0, %eax
20+
; AVX512-NEXT: testb $1, %al
2021
; AVX512-NEXT: cmovew %dx, %si
2122
; AVX512-NEXT: movzwl %si, %eax
2223
; AVX512-NEXT: retq
@@ -47,7 +48,8 @@ define i32 @select_cmov_i32(i1 zeroext %cond, i32 %a, i32 %b) {
4748
; AVX512-LABEL: select_cmov_i32:
4849
; AVX512: ## BB#0:
4950
; AVX512-NEXT: kmovw %edi, %k0
50-
; AVX512-NEXT: kortestw %k0, %k0
51+
; AVX512-NEXT: kmovw %k0, %eax
52+
; AVX512-NEXT: testb $1, %al
5153
; AVX512-NEXT: cmovel %edx, %esi
5254
; AVX512-NEXT: movl %esi, %eax
5355
; AVX512-NEXT: retq
@@ -78,7 +80,8 @@ define i64 @select_cmov_i64(i1 zeroext %cond, i64 %a, i64 %b) {
7880
; AVX512-LABEL: select_cmov_i64:
7981
; AVX512: ## BB#0:
8082
; AVX512-NEXT: kmovw %edi, %k0
81-
; AVX512-NEXT: kortestw %k0, %k0
83+
; AVX512-NEXT: kmovw %k0, %eax
84+
; AVX512-NEXT: testb $1, %al
8285
; AVX512-NEXT: cmoveq %rdx, %rsi
8386
; AVX512-NEXT: movq %rsi, %rax
8487
; AVX512-NEXT: retq

0 commit comments

Comments
 (0)