Skip to content

Commit 5486f5b

Browse files
chleroydavem330
authored andcommitted
net: Force inlining of checksum functions in net/checksum.h
All functions defined as static inline in net/checksum.h are meant to be inlined for performance reason. But since commit ac7c3e4 ("compiler: enable CONFIG_OPTIMIZE_INLINING forcibly") the compiler is allowed to uninline functions when it wants. Fair enough in the general case, but for tiny performance critical checksum helpers that's counter-productive. The problem mainly arises when selecting CONFIG_CC_OPTIMISE_FOR_SIZE, Those helpers being 'static inline' in header files you suddenly find them duplicated many times in the resulting vmlinux. Here is a typical exemple when building powerpc pmac32_defconfig with CONFIG_CC_OPTIMISE_FOR_SIZE. csum_sub() appears 4 times: c04a23cc <csum_sub>: c04a23cc: 7c 84 20 f8 not r4,r4 c04a23d0: 7c 63 20 14 addc r3,r3,r4 c04a23d4: 7c 63 01 94 addze r3,r3 c04a23d8: 4e 80 00 20 blr ... c04a2ce8: 4b ff f6 e5 bl c04a23cc <csum_sub> ... c04a2d2c: 4b ff f6 a1 bl c04a23cc <csum_sub> ... c04a2d54: 4b ff f6 79 bl c04a23cc <csum_sub> ... c04a754c <csum_sub>: c04a754c: 7c 84 20 f8 not r4,r4 c04a7550: 7c 63 20 14 addc r3,r3,r4 c04a7554: 7c 63 01 94 addze r3,r3 c04a7558: 4e 80 00 20 blr ... c04ac930: 4b ff ac 1d bl c04a754c <csum_sub> ... c04ad264: 4b ff a2 e9 bl c04a754c <csum_sub> ... c04e3b08 <csum_sub>: c04e3b08: 7c 84 20 f8 not r4,r4 c04e3b0c: 7c 63 20 14 addc r3,r3,r4 c04e3b10: 7c 63 01 94 addze r3,r3 c04e3b14: 4e 80 00 20 blr ... c04e5788: 4b ff e3 81 bl c04e3b08 <csum_sub> ... c04e65c8: 4b ff d5 41 bl c04e3b08 <csum_sub> ... c0512d34 <csum_sub>: c0512d34: 7c 84 20 f8 not r4,r4 c0512d38: 7c 63 20 14 addc r3,r3,r4 c0512d3c: 7c 63 01 94 addze r3,r3 c0512d40: 4e 80 00 20 blr ... c0512dfc: 4b ff ff 39 bl c0512d34 <csum_sub> ... c05138bc: 4b ff f4 79 bl c0512d34 <csum_sub> ... Restore the expected behaviour by using __always_inline for all functions defined in net/checksum.h vmlinux size is even reduced by 256 bytes with this patch: text data bss dec hex filename 6980022 2515362 194384 9689768 93daa8 vmlinux.before 6979862 2515266 194384 9689512 93d9a8 vmlinux.now Fixes: ac7c3e4 ("compiler: enable CONFIG_OPTIMIZE_INLINING forcibly") Cc: Masahiro Yamada <[email protected]> Cc: Nick Desaulniers <[email protected]> Cc: Andrew Morton <[email protected]> Signed-off-by: Christophe Leroy <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 0033fce commit 5486f5b

File tree

1 file changed

+24
-23
lines changed

1 file changed

+24
-23
lines changed

include/net/checksum.h

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
#include <asm/checksum.h>
2323

2424
#ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
25-
static inline
25+
static __always_inline
2626
__wsum csum_and_copy_from_user (const void __user *src, void *dst,
2727
int len)
2828
{
@@ -33,7 +33,7 @@ __wsum csum_and_copy_from_user (const void __user *src, void *dst,
3333
#endif
3434

3535
#ifndef HAVE_CSUM_COPY_USER
36-
static __inline__ __wsum csum_and_copy_to_user
36+
static __always_inline __wsum csum_and_copy_to_user
3737
(const void *src, void __user *dst, int len)
3838
{
3939
__wsum sum = csum_partial(src, len, ~0U);
@@ -45,7 +45,7 @@ static __inline__ __wsum csum_and_copy_to_user
4545
#endif
4646

4747
#ifndef _HAVE_ARCH_CSUM_AND_COPY
48-
static inline __wsum
48+
static __always_inline __wsum
4949
csum_partial_copy_nocheck(const void *src, void *dst, int len)
5050
{
5151
memcpy(dst, src, len);
@@ -54,76 +54,77 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len)
5454
#endif
5555

5656
#ifndef HAVE_ARCH_CSUM_ADD
57-
static inline __wsum csum_add(__wsum csum, __wsum addend)
57+
static __always_inline __wsum csum_add(__wsum csum, __wsum addend)
5858
{
5959
u32 res = (__force u32)csum;
6060
res += (__force u32)addend;
6161
return (__force __wsum)(res + (res < (__force u32)addend));
6262
}
6363
#endif
6464

65-
static inline __wsum csum_sub(__wsum csum, __wsum addend)
65+
static __always_inline __wsum csum_sub(__wsum csum, __wsum addend)
6666
{
6767
return csum_add(csum, ~addend);
6868
}
6969

70-
static inline __sum16 csum16_add(__sum16 csum, __be16 addend)
70+
static __always_inline __sum16 csum16_add(__sum16 csum, __be16 addend)
7171
{
7272
u16 res = (__force u16)csum;
7373

7474
res += (__force u16)addend;
7575
return (__force __sum16)(res + (res < (__force u16)addend));
7676
}
7777

78-
static inline __sum16 csum16_sub(__sum16 csum, __be16 addend)
78+
static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend)
7979
{
8080
return csum16_add(csum, ~addend);
8181
}
8282

83-
static inline __wsum csum_shift(__wsum sum, int offset)
83+
static __always_inline __wsum csum_shift(__wsum sum, int offset)
8484
{
8585
/* rotate sum to align it with a 16b boundary */
8686
if (offset & 1)
8787
return (__force __wsum)ror32((__force u32)sum, 8);
8888
return sum;
8989
}
9090

91-
static inline __wsum
91+
static __always_inline __wsum
9292
csum_block_add(__wsum csum, __wsum csum2, int offset)
9393
{
9494
return csum_add(csum, csum_shift(csum2, offset));
9595
}
9696

97-
static inline __wsum
97+
static __always_inline __wsum
9898
csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len)
9999
{
100100
return csum_block_add(csum, csum2, offset);
101101
}
102102

103-
static inline __wsum
103+
static __always_inline __wsum
104104
csum_block_sub(__wsum csum, __wsum csum2, int offset)
105105
{
106106
return csum_block_add(csum, ~csum2, offset);
107107
}
108108

109-
static inline __wsum csum_unfold(__sum16 n)
109+
static __always_inline __wsum csum_unfold(__sum16 n)
110110
{
111111
return (__force __wsum)n;
112112
}
113113

114-
static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum)
114+
static __always_inline
115+
__wsum csum_partial_ext(const void *buff, int len, __wsum sum)
115116
{
116117
return csum_partial(buff, len, sum);
117118
}
118119

119120
#define CSUM_MANGLED_0 ((__force __sum16)0xffff)
120121

121-
static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
122+
static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
122123
{
123124
*sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
124125
}
125126

126-
static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
127+
static __always_inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
127128
{
128129
__wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from);
129130

@@ -136,7 +137,7 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
136137
* m : old value of a 16bit field
137138
* m' : new value of a 16bit field
138139
*/
139-
static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new)
140+
static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new)
140141
{
141142
*sum = ~csum16_add(csum16_sub(~(*sum), old), new);
142143
}
@@ -150,16 +151,16 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
150151
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
151152
__wsum diff, bool pseudohdr);
152153

153-
static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
154-
__be16 from, __be16 to,
155-
bool pseudohdr)
154+
static __always_inline
155+
void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
156+
__be16 from, __be16 to, bool pseudohdr)
156157
{
157158
inet_proto_csum_replace4(sum, skb, (__force __be32)from,
158159
(__force __be32)to, pseudohdr);
159160
}
160161

161-
static inline __wsum remcsum_adjust(void *ptr, __wsum csum,
162-
int start, int offset)
162+
static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum,
163+
int start, int offset)
163164
{
164165
__sum16 *psum = (__sum16 *)(ptr + offset);
165166
__wsum delta;
@@ -175,12 +176,12 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum,
175176
return delta;
176177
}
177178

178-
static inline void remcsum_unadjust(__sum16 *psum, __wsum delta)
179+
static __always_inline void remcsum_unadjust(__sum16 *psum, __wsum delta)
179180
{
180181
*psum = csum_fold(csum_sub(delta, (__force __wsum)*psum));
181182
}
182183

183-
static inline __wsum wsum_negate(__wsum val)
184+
static __always_inline __wsum wsum_negate(__wsum val)
184185
{
185186
return (__force __wsum)-((__force u32)val);
186187
}

0 commit comments

Comments
 (0)