Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -2798,6 +2798,12 @@ class TargetLoweringBase {
Type *Ty, unsigned AddrSpace,
Instruction *I = nullptr) const;

/// Returns true if the targets addressing mode can target thread local
/// storage (TLS).
virtual bool addressingModeSupportsTLS(const GlobalValue &) const {
return false;
}

/// Return the prefered common base offset.
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
int64_t MaxOffset) const {
Expand Down
29 changes: 25 additions & 4 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5082,6 +5082,15 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
}
return true;
}
case Instruction::Call:
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
if (TLI.addressingModeSupportsTLS(GV))
return matchAddr(AddrInst->getOperand(0), Depth);
}
}
break;
}
return false;
}
Expand Down Expand Up @@ -5620,11 +5629,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
return Modified;
}

if (AddrMode.BaseGV) {
GlobalValue *BaseGV = AddrMode.BaseGV;
if (BaseGV != nullptr) {
if (ResultPtr)
return Modified;

ResultPtr = AddrMode.BaseGV;
if (BaseGV->isThreadLocal()) {
ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
} else {
ResultPtr = BaseGV;
}
}

// If the real base value actually came from an inttoptr, then the matcher
Expand Down Expand Up @@ -5789,8 +5803,15 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
}

// Add in the BaseGV if present.
if (AddrMode.BaseGV) {
Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
GlobalValue *BaseGV = AddrMode.BaseGV;
if (BaseGV != nullptr) {
Value *BaseGVPtr;
if (BaseGV->isThreadLocal()) {
BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
} else {
BaseGVPtr = BaseGV;
}
Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
if (Result)
Result = Builder.CreateAdd(Result, V, "sunkaddr");
else
Expand Down
24 changes: 24 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18928,6 +18928,30 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("TLS not implemented for this target.");
}

bool X86TargetLowering::addressingModeSupportsTLS(const GlobalValue &GV) const {
if (Subtarget.is64Bit() && Subtarget.isTargetELF()) {
const TargetMachine &TM = getTargetMachine();
TLSModel::Model Model = TM.getTLSModel(&GV);
switch (Model) {
case TLSModel::LocalExec:
case TLSModel::InitialExec:
// We can include the %fs segment register in addressing modes.
return true;
case TLSModel::LocalDynamic:
case TLSModel::GeneralDynamic:
// These models do not result in %fs relative addresses unless
// TLS descriptior are used.
//
// Even in the case of TLS descriptors we currently have no way to model
// the difference between %fs access and the computations needed for the
// offset and returning `true` for TLS-desc currently duplicates both
// which is detrimental :-/
return false;
}
}
return false;
}

/// Lower SRA_PARTS and friends, which return two i32 values
/// and take a 2 x i32 value to shift plus a shift amount.
/// TODO: Can this be moved to general expansion code?
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1323,6 +1323,8 @@ namespace llvm {
Type *Ty, unsigned AS,
Instruction *I = nullptr) const override;

bool addressingModeSupportsTLS(const GlobalValue &GV) const override;

/// Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
/// compare a register against the immediate without having to materialize
Expand Down
193 changes: 193 additions & 0 deletions llvm/test/CodeGen/X86/codegen-prepare-addrmode-tls.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -o - %s | FileCheck %s --check-prefix=NOPIC
; RUN: llc -o - %s -relocation-model=pic | FileCheck %s --check-prefix=PIC
; RUN: llc -o - %s -relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=TLSDESC

target triple = "x86_64--linux-gnu"

declare void @effect()
declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)

@foo_local = dso_local thread_local(localexec) global i32 0, align 4

define i32 @func_local_tls(i32 %arg0, i64 %arg1) nounwind {
; NOPIC-LABEL: func_local_tls:
; NOPIC: # %bb.0: # %entry
; NOPIC-NEXT: pushq %rbp
; NOPIC-NEXT: pushq %rbx
; NOPIC-NEXT: pushq %rax
; NOPIC-NEXT: movl %fs:foo_local@TPOFF, %ebp
; NOPIC-NEXT: testl %edi, %edi
; NOPIC-NEXT: movl %ebp, %eax
; NOPIC-NEXT: jne .LBB0_2
; NOPIC-NEXT: # %bb.1: # %if.then
; NOPIC-NEXT: movq %rsi, %rbx
; NOPIC-NEXT: callq effect@PLT
; NOPIC-NEXT: movl %fs:foo_local@TPOFF+168(,%rbx,4), %eax
; NOPIC-NEXT: .LBB0_2: # %if.end
; NOPIC-NEXT: addl %ebp, %eax
; NOPIC-NEXT: addq $8, %rsp
; NOPIC-NEXT: popq %rbx
; NOPIC-NEXT: popq %rbp
; NOPIC-NEXT: retq
;
; PIC-LABEL: func_local_tls:
; PIC: # %bb.0: # %entry
; PIC-NEXT: pushq %rbp
; PIC-NEXT: pushq %r14
; PIC-NEXT: pushq %rbx
; PIC-NEXT: movl %fs:.Lfoo_local$local@TPOFF, %ebp
; PIC-NEXT: testl %edi, %edi
; PIC-NEXT: movl %ebp, %eax
; PIC-NEXT: jne .LBB0_2
; PIC-NEXT: # %bb.1: # %if.then
; PIC-NEXT: movq %rsi, %rbx
; PIC-NEXT: movq %fs:0, %rax
; PIC-NEXT: leaq .Lfoo_local$local@TPOFF(%rax), %r14
; PIC-NEXT: callq effect@PLT
; PIC-NEXT: movl 168(%r14,%rbx,4), %eax
; PIC-NEXT: .LBB0_2: # %if.end
; PIC-NEXT: addl %ebp, %eax
; PIC-NEXT: popq %rbx
; PIC-NEXT: popq %r14
; PIC-NEXT: popq %rbp
; PIC-NEXT: retq
;
; TLSDESC-LABEL: func_local_tls:
; TLSDESC: # %bb.0: # %entry
; TLSDESC-NEXT: pushq %rbp
; TLSDESC-NEXT: pushq %r14
; TLSDESC-NEXT: pushq %rbx
; TLSDESC-NEXT: movl %fs:.Lfoo_local$local@TPOFF, %ebp
; TLSDESC-NEXT: testl %edi, %edi
; TLSDESC-NEXT: movl %ebp, %eax
; TLSDESC-NEXT: jne .LBB0_2
; TLSDESC-NEXT: # %bb.1: # %if.then
; TLSDESC-NEXT: movq %rsi, %rbx
; TLSDESC-NEXT: movq %fs:0, %rax
; TLSDESC-NEXT: leaq .Lfoo_local$local@TPOFF(%rax), %r14
; TLSDESC-NEXT: callq effect@PLT
; TLSDESC-NEXT: movl 168(%r14,%rbx,4), %eax
; TLSDESC-NEXT: .LBB0_2: # %if.end
; TLSDESC-NEXT: addl %ebp, %eax
; TLSDESC-NEXT: popq %rbx
; TLSDESC-NEXT: popq %r14
; TLSDESC-NEXT: popq %rbp
; TLSDESC-NEXT: retq
entry:
%addr = tail call ptr @llvm.threadlocal.address.p0(ptr @foo_local)
%load0 = load i32, ptr %addr, align 4
%cond = icmp eq i32 %arg0, 0
br i1 %cond, label %if.then, label %if.end

if.then:
tail call void @effect()
%x = add i64 %arg1, 42
%addr1 = getelementptr inbounds i32, ptr %addr, i64 %x
%load1 = load i32, ptr %addr1, align 4
br label %if.end

if.end:
%phi = phi i32 [ %load1, %if.then ], [ %load0, %entry ]
%ret = add i32 %phi, %load0
ret i32 %ret
}

@foo_nonlocal = thread_local global i32 0, align 4

define i32 @func_nonlocal_tls(i32 %arg0, i64 %arg1) nounwind {
; NOPIC-LABEL: func_nonlocal_tls:
; NOPIC: # %bb.0: # %entry
; NOPIC-NEXT: pushq %rbp
; NOPIC-NEXT: pushq %r14
; NOPIC-NEXT: pushq %rbx
; NOPIC-NEXT: movq foo_nonlocal@GOTTPOFF(%rip), %r14
; NOPIC-NEXT: movl %fs:(%r14), %ebp
; NOPIC-NEXT: testl %edi, %edi
; NOPIC-NEXT: movl %ebp, %eax
; NOPIC-NEXT: jne .LBB1_2
; NOPIC-NEXT: # %bb.1: # %if.then
; NOPIC-NEXT: movq %rsi, %rbx
; NOPIC-NEXT: callq effect@PLT
; NOPIC-NEXT: movl %fs:168(%r14,%rbx,4), %eax
; NOPIC-NEXT: .LBB1_2: # %if.end
; NOPIC-NEXT: addl %ebp, %eax
; NOPIC-NEXT: popq %rbx
; NOPIC-NEXT: popq %r14
; NOPIC-NEXT: popq %rbp
; NOPIC-NEXT: retq
;
; PIC-LABEL: func_nonlocal_tls:
; PIC: # %bb.0: # %entry
; PIC-NEXT: pushq %rbp
; PIC-NEXT: pushq %r15
; PIC-NEXT: pushq %r14
; PIC-NEXT: pushq %rbx
; PIC-NEXT: pushq %rax
; PIC-NEXT: movq %rsi, %rbx
; PIC-NEXT: movl %edi, %ebp
; PIC-NEXT: data16
; PIC-NEXT: leaq foo_nonlocal@TLSGD(%rip), %rdi
; PIC-NEXT: data16
; PIC-NEXT: data16
; PIC-NEXT: rex64
; PIC-NEXT: callq __tls_get_addr@PLT
; PIC-NEXT: movq %rax, %r14
; PIC-NEXT: movl (%rax), %r15d
; PIC-NEXT: testl %ebp, %ebp
; PIC-NEXT: movl %r15d, %eax
; PIC-NEXT: jne .LBB1_2
; PIC-NEXT: # %bb.1: # %if.then
; PIC-NEXT: callq effect@PLT
; PIC-NEXT: movl 168(%r14,%rbx,4), %eax
; PIC-NEXT: .LBB1_2: # %if.end
; PIC-NEXT: addl %r15d, %eax
; PIC-NEXT: addq $8, %rsp
; PIC-NEXT: popq %rbx
; PIC-NEXT: popq %r14
; PIC-NEXT: popq %r15
; PIC-NEXT: popq %rbp
; PIC-NEXT: retq
;
; TLSDESC-LABEL: func_nonlocal_tls:
; TLSDESC: # %bb.0: # %entry
; TLSDESC-NEXT: pushq %rbp
; TLSDESC-NEXT: pushq %r14
; TLSDESC-NEXT: pushq %rbx
; TLSDESC-NEXT: leaq foo_nonlocal@tlsdesc(%rip), %rax
; TLSDESC-NEXT: callq *foo_nonlocal@tlscall(%rax)
; TLSDESC-NEXT: movl %fs:(%rax), %ebp
; TLSDESC-NEXT: testl %edi, %edi
; TLSDESC-NEXT: movl %ebp, %ecx
; TLSDESC-NEXT: jne .LBB1_2
; TLSDESC-NEXT: # %bb.1: # %if.then
; TLSDESC-NEXT: movq %rsi, %rbx
; TLSDESC-NEXT: addq %fs:0, %rax
; TLSDESC-NEXT: movq %rax, %r14
; TLSDESC-NEXT: callq effect@PLT
; TLSDESC-NEXT: movl 168(%r14,%rbx,4), %ecx
; TLSDESC-NEXT: .LBB1_2: # %if.end
; TLSDESC-NEXT: addl %ebp, %ecx
; TLSDESC-NEXT: movl %ecx, %eax
; TLSDESC-NEXT: popq %rbx
; TLSDESC-NEXT: popq %r14
; TLSDESC-NEXT: popq %rbp
; TLSDESC-NEXT: retq
entry:
%addr = tail call ptr @llvm.threadlocal.address.p0(ptr @foo_nonlocal)
%load0 = load i32, ptr %addr, align 4
%cond = icmp eq i32 %arg0, 0
br i1 %cond, label %if.then, label %if.end

if.then:
tail call void @effect()
%x = add i64 %arg1, 42
%addr1 = getelementptr inbounds i32, ptr %addr, i64 %x
%load1 = load i32, ptr %addr1, align 4
br label %if.end

if.end:
%phi = phi i32 [ %load1, %if.then ], [ %load0, %entry ]
%ret = add i32 %phi, %load0
ret i32 %ret
}
86 changes: 86 additions & 0 deletions llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-tls.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' %s | FileCheck %s

target triple = "x86_64--linux-gnu"

@foo = dso_local thread_local(localexec) global i32 0, align 4

declare void @effect()
declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)

define i32 @func0(i32 %arg) {
; CHECK-LABEL: define i32 @func0(
; CHECK-SAME: i32 [[ARG:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADDR:%.*]] = tail call ptr @llvm.threadlocal.address.p0(ptr @foo)
; CHECK-NEXT: [[LOAD0:%.*]] = load i32, ptr [[ADDR]], align 4
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[ARG]], 0
; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: tail call void @effect()
; CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @foo)
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[TMP0]], align 4
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[LOAD1]], [[IF_THEN]] ], [ [[LOAD0]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[RET:%.*]] = add i32 [[PHI]], [[LOAD0]]
; CHECK-NEXT: ret i32 [[RET]]
;
entry:
%addr = tail call ptr @llvm.threadlocal.address.p0(ptr @foo)
%load0 = load i32, ptr %addr, align 4
%cond = icmp eq i32 %arg, 0
br i1 %cond, label %if.then, label %if.end

if.then:
tail call void @effect()
%load1 = load i32, ptr %addr, align 4
br label %if.end

if.end:
%phi = phi i32 [ %load1, %if.then ], [ %load0, %entry ]
%ret = add i32 %phi, %load0
ret i32 %ret
}

define i32 @func1(i32 %arg0, i32 %arg1) {
; CHECK-LABEL: define i32 @func1(
; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADDR:%.*]] = tail call ptr @llvm.threadlocal.address.p0(ptr @foo)
; CHECK-NEXT: [[LOAD0:%.*]] = load i32, ptr [[ADDR]], align 4
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[ARG0]], 0
; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: tail call void @effect()
; CHECK-NEXT: [[X:%.*]] = add i32 [[ARG1]], 42
; CHECK-NEXT: [[X64:%.*]] = sext i32 [[X]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @foo)
; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[X64]], 4
; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 [[SUNKADDR]]
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[ADDR1]], align 4
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[LOAD1]], [[IF_THEN]] ], [ [[LOAD0]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[RET:%.*]] = add i32 [[PHI]], [[LOAD0]]
; CHECK-NEXT: ret i32 [[RET]]
;
entry:
%addr = tail call ptr @llvm.threadlocal.address.p0(ptr @foo)
%load0 = load i32, ptr %addr, align 4
%cond = icmp eq i32 %arg0, 0
br i1 %cond, label %if.then, label %if.end

if.then:
tail call void @effect()
%x = add i32 %arg1, 42
%x64 = sext i32 %x to i64
%addr1 = getelementptr inbounds i32, ptr %addr, i64 %x64
%load1 = load i32, ptr %addr1, align 4
br label %if.end

if.end:
%phi = phi i32 [ %load1, %if.then ], [ %load0, %entry ]
%ret = add i32 %phi, %load0
ret i32 %ret
}