Skip to content

Commit 50b1219

Browse files
puranjaymohanKernel Patches Daemon
authored andcommitted
arm64, bpf: add internal-only MOV instruction to resolve per-CPU addrs
Support an instruction for resolving absolute addresses of per-CPU data from their per-CPU offsets. This instruction is internal-only and users are not allowed to use them directly. They will only be used for internal inlining optimizations for now between BPF verifier and BPF JITs. Since commit 7158627 ("arm64: percpu: implement optimised pcpu access using tpidr_el1"), the per-cpu offset for the CPU is stored in the tpidr_el1/2 register of that CPU. To support this BPF instruction in the ARM64 JIT, the following ARM64 instructions are emitted: mov dst, src // Move src to dst, if src != dst mrs tmp, tpidr_el1/2 // Move per-cpu offset of the current cpu in tmp. add dst, dst, tmp // Add the per cpu offset to the dst. To measure the performance improvement provided by this change, the benchmark in [1] was used: Before: glob-arr-inc : 23.597 ± 0.012M/s arr-inc : 23.173 ± 0.019M/s hash-inc : 12.186 ± 0.028M/s After: glob-arr-inc : 23.819 ± 0.034M/s arr-inc : 23.285 ± 0.017M/s hash-inc : 12.419 ± 0.011M/s [1] anakryiko/linux@8dec900975ef Signed-off-by: Puranjay Mohan <[email protected]> Acked-by: Andrii Nakryiko <[email protected]>
1 parent 1d6b48f commit 50b1219

File tree

4 files changed

+38
-0
lines changed

4 files changed

+38
-0
lines changed

arch/arm64/include/asm/insn.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,11 @@ enum aarch64_insn_special_register {
135135
AARCH64_INSN_SPCLREG_SP_EL2 = 0xF210
136136
};
137137

138+
enum aarch64_insn_system_register {
139+
AARCH64_INSN_SYSREG_TPIDR_EL1 = 0x4684,
140+
AARCH64_INSN_SYSREG_TPIDR_EL2 = 0x6682,
141+
};
142+
138143
enum aarch64_insn_variant {
139144
AARCH64_INSN_VARIANT_32BIT,
140145
AARCH64_INSN_VARIANT_64BIT
@@ -686,6 +691,8 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
686691
}
687692
#endif
688693
u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type);
694+
u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result,
695+
enum aarch64_insn_system_register sysreg);
689696

690697
s32 aarch64_get_branch_offset(u32 insn);
691698
u32 aarch64_set_branch_offset(u32 insn, s32 offset);

arch/arm64/lib/insn.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,3 +1515,14 @@ u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)
15151515

15161516
return insn;
15171517
}
1518+
1519+
u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result,
1520+
enum aarch64_insn_system_register sysreg)
1521+
{
1522+
u32 insn = aarch64_insn_get_mrs_value();
1523+
1524+
insn &= ~GENMASK(19, 0);
1525+
insn |= sysreg << 5;
1526+
return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT,
1527+
insn, result);
1528+
}

arch/arm64/net/bpf_jit.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,4 +297,10 @@
297297
#define A64_ADR(Rd, offset) \
298298
aarch64_insn_gen_adr(0, offset, Rd, AARCH64_INSN_ADR_TYPE_ADR)
299299

300+
/* MRS */
301+
#define A64_MRS_TPIDR_EL1(Rt) \
302+
aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_TPIDR_EL1)
303+
#define A64_MRS_TPIDR_EL2(Rt) \
304+
aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_TPIDR_EL2)
305+
300306
#endif /* _BPF_JIT_H */

arch/arm64/net/bpf_jit_comp.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -877,6 +877,15 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
877877
emit(A64_ORR(1, tmp, dst, tmp), ctx);
878878
emit(A64_MOV(1, dst, tmp), ctx);
879879
break;
880+
} else if (insn_is_mov_percpu_addr(insn)) {
881+
if (dst != src)
882+
emit(A64_MOV(1, dst, src), ctx);
883+
if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
884+
emit(A64_MRS_TPIDR_EL2(tmp), ctx);
885+
else
886+
emit(A64_MRS_TPIDR_EL1(tmp), ctx);
887+
emit(A64_ADD(1, dst, dst, tmp), ctx);
888+
break;
880889
}
881890
switch (insn->off) {
882891
case 0:
@@ -2527,6 +2536,11 @@ bool bpf_jit_supports_arena(void)
25272536
return true;
25282537
}
25292538

2539+
bool bpf_jit_supports_percpu_insn(void)
2540+
{
2541+
return true;
2542+
}
2543+
25302544
void bpf_jit_free(struct bpf_prog *prog)
25312545
{
25322546
if (prog->jited) {

0 commit comments

Comments
 (0)