Skip to content

Commit 37f4aee

Browse files
committed
Fixups
1 parent c7decfc commit 37f4aee

File tree

2 files changed

+131
-17
lines changed

2 files changed

+131
-17
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2351,9 +2351,7 @@ void SelectionDAGLegalize::ExpandSinCosLibCall(
23512351
SDNode *Node, SmallVectorImpl<SDValue> &Results) {
23522352
EVT RetVT = Node->getValueType(0);
23532353
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2354-
2355-
TargetLowering::ArgListTy Args;
2356-
TargetLowering::ArgListEntry Entry{};
2354+
RTLIB::Libcall LC = RTLIB::getFSINCOS(RetVT);
23572355

23582356
// Find users of the node that store the results. The destination pointers
23592357
// can be used instead of creating stack allocations.
@@ -2366,17 +2364,15 @@ void SelectionDAGLegalize::ExpandSinCosLibCall(
23662364
if (!ISD::isNormalStore(User))
23672365
continue;
23682366
auto *ST = cast<StoreSDNode>(User);
2367+
if (!ST->isSimple() || ST->getPointerInfo().getAddrSpace() != 0 ||
2368+
ST->getAlign() < DAG.getDataLayout().getABITypeAlign(RetTy))
2369+
continue;
23692370
if (Use.getResNo() == 0)
23702371
SinST = ST;
23712372
if (Use.getResNo() == 1)
23722373
CosST = ST;
23732374
}
23742375

2375-
// Pass the argument.
2376-
Entry.Node = Node->getOperand(0);
2377-
Entry.Ty = RetTy;
2378-
Args.push_back(Entry);
2379-
23802376
auto GetOrCreateOutPointer = [&](StoreSDNode *MaybeStore) {
23812377
if (MaybeStore)
23822378
return std::make_pair(MaybeStore->getBasePtr(),
@@ -2388,6 +2384,14 @@ void SelectionDAGLegalize::ExpandSinCosLibCall(
23882384
return std::make_pair(StackSlot, PtrInfo);
23892385
};
23902386

2387+
TargetLowering::ArgListTy Args;
2388+
TargetLowering::ArgListEntry Entry{};
2389+
2390+
// Pass the argument.
2391+
Entry.Node = Node->getOperand(0);
2392+
Entry.Ty = RetTy;
2393+
Args.push_back(Entry);
2394+
23912395
// Pass the return address of sin.
23922396
auto SinPtr = GetOrCreateOutPointer(SinST);
23932397
Entry.Node = SinPtr.first;
@@ -2400,18 +2404,35 @@ void SelectionDAGLegalize::ExpandSinCosLibCall(
24002404
Entry.Ty = PointerType::getUnqual(RetTy->getContext());
24012405
Args.push_back(Entry);
24022406

2403-
RTLIB::Libcall LC = RTLIB::getFSINCOS(RetVT);
2404-
auto [Call, Chain] = ExpandLibCall(LC, Node, std::move(Args), false);
2405-
2406-
// Replace explict stores with the library call.
2407+
// Combine any input chains from the stores.
2408+
SmallVector<SDValue, 2> InChains{};
24072409
for (StoreSDNode *ST : {SinST, CosST}) {
24082410
if (ST)
2409-
DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), Chain);
2411+
InChains.push_back(ST->getChain());
24102412
}
2413+
if (InChains.empty())
2414+
InChains.push_back(DAG.getEntryNode());
24112415

24122416
SDLoc DL(Node);
2417+
SDValue InChain = DAG.getTokenFactor(DL, InChains);
2418+
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
2419+
TLI.getPointerTy(DAG.getDataLayout()));
2420+
TargetLowering::CallLoweringInfo CLI(DAG);
2421+
CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
2422+
TLI.getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), Callee,
2423+
std::move(Args));
2424+
2425+
auto [Call, OutChain] = TLI.LowerCallTo(CLI);
2426+
2427+
// Replace the stores with the library call.
2428+
for (StoreSDNode *ST : {SinST, CosST}) {
2429+
if (!ST)
2430+
continue;
2431+
DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
2432+
}
2433+
24132434
for (auto [Ptr, PtrInfo] : {SinPtr, CosPtr}) {
2414-
SDValue LoadExp = DAG.getLoad(RetVT, DL, Chain, Ptr, PtrInfo);
2435+
SDValue LoadExp = DAG.getLoad(RetVT, DL, OutChain, Ptr, PtrInfo);
24152436
Results.push_back(LoadExp);
24162437
}
24172438
}

llvm/test/CodeGen/AArch64/sincos-stack-slots.ll

Lines changed: 96 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
33

4+
; This file tests eliding stack slots when lowering the FSINCOS ISD node.
5+
46
define { float, float } @sincos_f32_value_return(float %x) {
57
; CHECK-LABEL: sincos_f32_value_return:
68
; CHECK: // %bb.0: // %entry
@@ -91,8 +93,8 @@ define void @sincos_f64_ptr_return(double %x, ptr %out_sin, ptr %out_cos) {
9193
entry:
9294
%sin = tail call double @llvm.sin.f64(double %x)
9395
%cos = tail call double @llvm.cos.f64(double %x)
94-
store double %sin, ptr %out_sin, align 4
95-
store double %cos, ptr %out_cos, align 4
96+
store double %sin, ptr %out_sin, align 8
97+
store double %cos, ptr %out_cos, align 8
9698
ret void
9799
}
98100

@@ -110,6 +112,97 @@ define double @sincos_f64_mixed_return(double %x, ptr %out_sin) {
110112
entry:
111113
%sin = tail call double @llvm.sin.f64(double %x)
112114
%cos = tail call double @llvm.cos.f64(double %x)
113-
store double %sin, ptr %out_sin, align 4
115+
store double %sin, ptr %out_sin, align 8
114116
ret double %cos
115117
}
118+
119+
; Negative test. We can't fold volatile stores into the library call.
120+
define void @sincos_volatile_result_stores(float %x, ptr %out_sin, ptr %out_cos) {
121+
; CHECK-LABEL: negative_fold_sincos_volatile_store:
122+
; CHECK: // %bb.0: // %entry
123+
; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
124+
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
125+
; CHECK-NEXT: .cfi_def_cfa_offset 32
126+
; CHECK-NEXT: .cfi_offset w19, -8
127+
; CHECK-NEXT: .cfi_offset w20, -16
128+
; CHECK-NEXT: .cfi_offset w30, -32
129+
; CHECK-NEXT: mov x19, x1
130+
; CHECK-NEXT: mov x20, x0
131+
; CHECK-NEXT: add x0, sp, #12
132+
; CHECK-NEXT: add x1, sp, #8
133+
; CHECK-NEXT: bl sincosf
134+
; CHECK-NEXT: ldp s1, s0, [sp, #8]
135+
; CHECK-NEXT: str s0, [x20]
136+
; CHECK-NEXT: str s1, [x19]
137+
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
138+
; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
139+
; CHECK-NEXT: ret
140+
entry:
141+
%sin = tail call float @llvm.sin.f32(float %x)
142+
%cos = tail call float @llvm.cos.f32(float %x)
143+
store volatile float %sin, ptr %out_sin, align 4
144+
store volatile float %cos, ptr %out_cos, align 4
145+
ret void
146+
}
147+
148+
; Negative test. We can't fold atomic stores into the library call.
149+
define void @sincos_atomic_result_stores(float %x, ptr %out_sin, ptr %out_cos) {
150+
; CHECK-LABEL: negative_fold_sincos_atomic_store:
151+
; CHECK: // %bb.0: // %entry
152+
; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
153+
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
154+
; CHECK-NEXT: .cfi_def_cfa_offset 32
155+
; CHECK-NEXT: .cfi_offset w19, -8
156+
; CHECK-NEXT: .cfi_offset w20, -16
157+
; CHECK-NEXT: .cfi_offset w30, -32
158+
; CHECK-NEXT: mov x19, x1
159+
; CHECK-NEXT: mov x20, x0
160+
; CHECK-NEXT: add x0, sp, #12
161+
; CHECK-NEXT: add x1, sp, #8
162+
; CHECK-NEXT: bl sincosf
163+
; CHECK-NEXT: ldr w8, [sp, #12]
164+
; CHECK-NEXT: str w8, [x20]
165+
; CHECK-NEXT: ldr w8, [sp, #8]
166+
; CHECK-NEXT: str w8, [x19]
167+
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
168+
; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
169+
; CHECK-NEXT: ret
170+
entry:
171+
%sin = tail call float @llvm.sin.f32(float %x)
172+
%cos = tail call float @llvm.cos.f32(float %x)
173+
store atomic float %sin, ptr %out_sin unordered, align 4
174+
store atomic float %cos, ptr %out_cos unordered, align 4
175+
ret void
176+
}
177+
178+
; Negative test. We can't fold misaligned stores into the library call.
179+
define void @sincos_misaligned_result_stores(double %x, ptr %out_sin, ptr %out_cos) {
180+
; CHECK-LABEL: negative_sincos_bad_alignment:
181+
; CHECK: // %bb.0: // %entry
182+
; CHECK-NEXT: sub sp, sp, #48
183+
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
184+
; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
185+
; CHECK-NEXT: .cfi_def_cfa_offset 48
186+
; CHECK-NEXT: .cfi_offset w19, -8
187+
; CHECK-NEXT: .cfi_offset w20, -16
188+
; CHECK-NEXT: .cfi_offset w30, -32
189+
; CHECK-NEXT: mov x19, x1
190+
; CHECK-NEXT: mov x20, x0
191+
; CHECK-NEXT: add x0, sp, #24
192+
; CHECK-NEXT: add x1, sp, #8
193+
; CHECK-NEXT: bl sincos
194+
; CHECK-NEXT: ldr d0, [sp, #24]
195+
; CHECK-NEXT: ldr d1, [sp, #8]
196+
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
197+
; CHECK-NEXT: str d0, [x20]
198+
; CHECK-NEXT: str d1, [x19]
199+
; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
200+
; CHECK-NEXT: add sp, sp, #48
201+
; CHECK-NEXT: ret
202+
entry:
203+
%sin = tail call double @llvm.sin.f64(double %x)
204+
%cos = tail call double @llvm.cos.f64(double %x)
205+
store double %sin, ptr %out_sin, align 4
206+
store double %cos, ptr %out_cos, align 4
207+
ret void
208+
}

0 commit comments

Comments
 (0)