Skip to content

Commit addfc38

Browse files
Andrew Murraywilldeacon
authored andcommitted
arm64: atomics: avoid out-of-line ll/sc atomics
When building for LSE atomics (CONFIG_ARM64_LSE_ATOMICS), if the hardware or toolchain doesn't support it the existing code will fallback to ll/sc atomics. It achieves this by branching from inline assembly to a function that is built with special compile flags. Further this results in the clobbering of registers even when the fallback isn't used increasing register pressure. Improve this by providing inline implementations of both LSE and ll/sc and use a static key to select between them, which allows for the compiler to generate better atomics code. Put the LL/SC fallback atomics in their own subsection to improve icache performance. Signed-off-by: Andrew Murray <[email protected]> Signed-off-by: Will Deacon <[email protected]>
1 parent 580fa1b commit addfc38

File tree

6 files changed

+329
-328
lines changed

6 files changed

+329
-328
lines changed

arch/arm64/include/asm/atomic.h

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,7 @@
1717

1818
#ifdef __KERNEL__
1919

20-
#define __ARM64_IN_ATOMIC_IMPL
21-
22-
#if defined(CONFIG_ARM64_LSE_ATOMICS) && defined(CONFIG_AS_LSE)
23-
#include <asm/atomic_lse.h>
24-
#else
25-
#include <asm/atomic_ll_sc.h>
26-
#endif
27-
28-
#undef __ARM64_IN_ATOMIC_IMPL
29-
20+
#include <asm/atomic_arch.h>
3021
#include <asm/cmpxchg.h>
3122

3223
#define ATOMIC_INIT(i) { (i) }
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Selection between LSE and LL/SC atomics.
4+
*
5+
* Copyright (C) 2018 ARM Ltd.
6+
* Author: Andrew Murray <[email protected]>
7+
*/
8+
9+
#ifndef __ASM_ATOMIC_ARCH_H
10+
#define __ASM_ATOMIC_ARCH_H
11+
12+
13+
#include <linux/jump_label.h>
14+
15+
#include <asm/cpucaps.h>
16+
#include <asm/atomic_ll_sc.h>
17+
#include <asm/atomic_lse.h>
18+
19+
extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
20+
extern struct static_key_false arm64_const_caps_ready;
21+
22+
static inline bool system_uses_lse_atomics(void)
23+
{
24+
return (IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS) &&
25+
IS_ENABLED(CONFIG_AS_LSE) &&
26+
static_branch_likely(&arm64_const_caps_ready)) &&
27+
static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]);
28+
}
29+
30+
#define __lse_ll_sc_body(op, ...) \
31+
({ \
32+
system_uses_lse_atomics() ? \
33+
__lse_##op(__VA_ARGS__) : \
34+
__ll_sc_##op(__VA_ARGS__); \
35+
})
36+
37+
#define ATOMIC_OP(op) \
38+
static inline void arch_##op(int i, atomic_t *v) \
39+
{ \
40+
__lse_ll_sc_body(op, i, v); \
41+
}
42+
43+
ATOMIC_OP(atomic_andnot)
44+
ATOMIC_OP(atomic_or)
45+
ATOMIC_OP(atomic_xor)
46+
ATOMIC_OP(atomic_add)
47+
ATOMIC_OP(atomic_and)
48+
ATOMIC_OP(atomic_sub)
49+
50+
51+
#define ATOMIC_FETCH_OP(name, op) \
52+
static inline int arch_##op##name(int i, atomic_t *v) \
53+
{ \
54+
return __lse_ll_sc_body(op##name, i, v); \
55+
}
56+
57+
#define ATOMIC_FETCH_OPS(op) \
58+
ATOMIC_FETCH_OP(_relaxed, op) \
59+
ATOMIC_FETCH_OP(_acquire, op) \
60+
ATOMIC_FETCH_OP(_release, op) \
61+
ATOMIC_FETCH_OP( , op)
62+
63+
ATOMIC_FETCH_OPS(atomic_fetch_andnot)
64+
ATOMIC_FETCH_OPS(atomic_fetch_or)
65+
ATOMIC_FETCH_OPS(atomic_fetch_xor)
66+
ATOMIC_FETCH_OPS(atomic_fetch_add)
67+
ATOMIC_FETCH_OPS(atomic_fetch_and)
68+
ATOMIC_FETCH_OPS(atomic_fetch_sub)
69+
ATOMIC_FETCH_OPS(atomic_add_return)
70+
ATOMIC_FETCH_OPS(atomic_sub_return)
71+
72+
73+
#define ATOMIC64_OP(op) \
74+
static inline void arch_##op(long i, atomic64_t *v) \
75+
{ \
76+
__lse_ll_sc_body(op, i, v); \
77+
}
78+
79+
ATOMIC64_OP(atomic64_andnot)
80+
ATOMIC64_OP(atomic64_or)
81+
ATOMIC64_OP(atomic64_xor)
82+
ATOMIC64_OP(atomic64_add)
83+
ATOMIC64_OP(atomic64_and)
84+
ATOMIC64_OP(atomic64_sub)
85+
86+
87+
#define ATOMIC64_FETCH_OP(name, op) \
88+
static inline long arch_##op##name(long i, atomic64_t *v) \
89+
{ \
90+
return __lse_ll_sc_body(op##name, i, v); \
91+
}
92+
93+
#define ATOMIC64_FETCH_OPS(op) \
94+
ATOMIC64_FETCH_OP(_relaxed, op) \
95+
ATOMIC64_FETCH_OP(_acquire, op) \
96+
ATOMIC64_FETCH_OP(_release, op) \
97+
ATOMIC64_FETCH_OP( , op)
98+
99+
ATOMIC64_FETCH_OPS(atomic64_fetch_andnot)
100+
ATOMIC64_FETCH_OPS(atomic64_fetch_or)
101+
ATOMIC64_FETCH_OPS(atomic64_fetch_xor)
102+
ATOMIC64_FETCH_OPS(atomic64_fetch_add)
103+
ATOMIC64_FETCH_OPS(atomic64_fetch_and)
104+
ATOMIC64_FETCH_OPS(atomic64_fetch_sub)
105+
ATOMIC64_FETCH_OPS(atomic64_add_return)
106+
ATOMIC64_FETCH_OPS(atomic64_sub_return)
107+
108+
109+
static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
110+
{
111+
return __lse_ll_sc_body(atomic64_dec_if_positive, v);
112+
}
113+
114+
#define __CMPXCHG_CASE(name, sz) \
115+
static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \
116+
u##sz old, \
117+
u##sz new) \
118+
{ \
119+
return __lse_ll_sc_body(_cmpxchg_case_##name##sz, \
120+
ptr, old, new); \
121+
}
122+
123+
__CMPXCHG_CASE( , 8)
124+
__CMPXCHG_CASE( , 16)
125+
__CMPXCHG_CASE( , 32)
126+
__CMPXCHG_CASE( , 64)
127+
__CMPXCHG_CASE(acq_, 8)
128+
__CMPXCHG_CASE(acq_, 16)
129+
__CMPXCHG_CASE(acq_, 32)
130+
__CMPXCHG_CASE(acq_, 64)
131+
__CMPXCHG_CASE(rel_, 8)
132+
__CMPXCHG_CASE(rel_, 16)
133+
__CMPXCHG_CASE(rel_, 32)
134+
__CMPXCHG_CASE(rel_, 64)
135+
__CMPXCHG_CASE(mb_, 8)
136+
__CMPXCHG_CASE(mb_, 16)
137+
__CMPXCHG_CASE(mb_, 32)
138+
__CMPXCHG_CASE(mb_, 64)
139+
140+
141+
#define __CMPXCHG_DBL(name) \
142+
static inline long __cmpxchg_double##name(unsigned long old1, \
143+
unsigned long old2, \
144+
unsigned long new1, \
145+
unsigned long new2, \
146+
volatile void *ptr) \
147+
{ \
148+
return __lse_ll_sc_body(_cmpxchg_double##name, \
149+
old1, old2, new1, new2, ptr); \
150+
}
151+
152+
__CMPXCHG_DBL( )
153+
__CMPXCHG_DBL(_mb)
154+
155+
#endif /* __ASM_ATOMIC_LSE_H */

0 commit comments

Comments
 (0)