Skip to content

Commit 68c49b4

Browse files
Xu KuohaiNobody
authored andcommitted
bpf, arm64: Optimize BPF store/load using str/ldr with immediate offset
The current BPF store/load instruction is translated by the JIT into two instructions. The first instruction moves the immediate offset into a temporary register. The second instruction uses this temporary register to do the real store/load. In fact, arm64 supports addressing with immediate offsets. So This patch introduces optimization that uses arm64 str/ldr instruction with immediate offset when the offset fits. Example of generated instuction for r2 = *(u64 *)(r1 + 0): without optimization: mov x10, 0 ldr x1, [x0, x10] with optimization: ldr x1, [x0, 0] If the offset is negative, or is not aligned correctly, or exceeds max value, rollback to the use of temporary register. Signed-off-by: Xu Kuohai <[email protected]>
1 parent da06181 commit 68c49b4

File tree

4 files changed

+113
-53
lines changed

4 files changed

+113
-53
lines changed

arch/arm64/include/asm/insn.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,8 @@ enum aarch64_insn_size_type {
200200
enum aarch64_insn_ldst_type {
201201
AARCH64_INSN_LDST_LOAD_REG_OFFSET,
202202
AARCH64_INSN_LDST_STORE_REG_OFFSET,
203+
AARCH64_INSN_LDST_LOAD_IMM_OFFSET,
204+
AARCH64_INSN_LDST_STORE_IMM_OFFSET,
203205
AARCH64_INSN_LDST_LOAD_PAIR_PRE_INDEX,
204206
AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX,
205207
AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
@@ -334,13 +336,15 @@ __AARCH64_INSN_FUNCS(load_pre, 0x3FE00C00, 0x38400C00)
334336
__AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400)
335337
__AARCH64_INSN_FUNCS(load_post, 0x3FE00C00, 0x38400400)
336338
__AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800)
339+
__AARCH64_INSN_FUNCS(str_imm, 0x3FC00000, 0x39000000)
337340
__AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000)
338341
__AARCH64_INSN_FUNCS(ldclr, 0x3F20FC00, 0x38201000)
339342
__AARCH64_INSN_FUNCS(ldeor, 0x3F20FC00, 0x38202000)
340343
__AARCH64_INSN_FUNCS(ldset, 0x3F20FC00, 0x38203000)
341344
__AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000)
342345
__AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00)
343346
__AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800)
347+
__AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000)
344348
__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
345349
__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
346350
__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000)
@@ -500,6 +504,11 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
500504
enum aarch64_insn_register offset,
501505
enum aarch64_insn_size_type size,
502506
enum aarch64_insn_ldst_type type);
507+
u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg,
508+
enum aarch64_insn_register base,
509+
unsigned int imm,
510+
enum aarch64_insn_size_type size,
511+
enum aarch64_insn_ldst_type type);
503512
u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
504513
enum aarch64_insn_register reg2,
505514
enum aarch64_insn_register base,

arch/arm64/lib/insn.c

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -299,29 +299,24 @@ static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type,
299299
return insn;
300300
}
301301

302+
static const u32 aarch64_insn_ldst_size[] = {
303+
[AARCH64_INSN_SIZE_8] = 0,
304+
[AARCH64_INSN_SIZE_16] = 1,
305+
[AARCH64_INSN_SIZE_32] = 2,
306+
[AARCH64_INSN_SIZE_64] = 3,
307+
};
308+
302309
static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type,
303310
u32 insn)
304311
{
305312
u32 size;
306313

307-
switch (type) {
308-
case AARCH64_INSN_SIZE_8:
309-
size = 0;
310-
break;
311-
case AARCH64_INSN_SIZE_16:
312-
size = 1;
313-
break;
314-
case AARCH64_INSN_SIZE_32:
315-
size = 2;
316-
break;
317-
case AARCH64_INSN_SIZE_64:
318-
size = 3;
319-
break;
320-
default:
314+
if (type < AARCH64_INSN_SIZE_8 || type > AARCH64_INSN_SIZE_64) {
321315
pr_err("%s: unknown size encoding %d\n", __func__, type);
322316
return AARCH64_BREAK_FAULT;
323317
}
324318

319+
size = aarch64_insn_ldst_size[type];
325320
insn &= ~GENMASK(31, 30);
326321
insn |= size << 30;
327322

@@ -504,6 +499,50 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
504499
offset);
505500
}
506501

502+
u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg,
503+
enum aarch64_insn_register base,
504+
unsigned int imm,
505+
enum aarch64_insn_size_type size,
506+
enum aarch64_insn_ldst_type type)
507+
{
508+
u32 insn;
509+
u32 shift;
510+
511+
if (size < AARCH64_INSN_SIZE_8 || size > AARCH64_INSN_SIZE_64) {
512+
pr_err("%s: unknown size encoding %d\n", __func__, type);
513+
return AARCH64_BREAK_FAULT;
514+
}
515+
516+
shift = aarch64_insn_ldst_size[size];
517+
if (imm & ~(BIT(12 + shift) - BIT(shift))) {
518+
pr_err("%s: invalid imm: %d\n", __func__, imm);
519+
return AARCH64_BREAK_FAULT;
520+
}
521+
522+
imm >>= shift;
523+
524+
switch (type) {
525+
case AARCH64_INSN_LDST_LOAD_IMM_OFFSET:
526+
insn = aarch64_insn_get_ldr_imm_value();
527+
break;
528+
case AARCH64_INSN_LDST_STORE_IMM_OFFSET:
529+
insn = aarch64_insn_get_str_imm_value();
530+
break;
531+
default:
532+
pr_err("%s: unknown load/store encoding %d\n", __func__, type);
533+
return AARCH64_BREAK_FAULT;
534+
}
535+
536+
insn = aarch64_insn_encode_ldst_size(size, insn);
537+
538+
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg);
539+
540+
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
541+
base);
542+
543+
return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm);
544+
}
545+
507546
u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
508547
enum aarch64_insn_register reg2,
509548
enum aarch64_insn_register base,

arch/arm64/net/bpf_jit.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,20 @@
6666
#define A64_STR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, STORE)
6767
#define A64_LDR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, LOAD)
6868

69+
/* Load/store register (immediate offset) */
70+
#define A64_LS_IMM(Rt, Rn, imm, size, type) \
71+
aarch64_insn_gen_load_store_imm(Rt, Rn, imm, \
72+
AARCH64_INSN_SIZE_##size, \
73+
AARCH64_INSN_LDST_##type##_IMM_OFFSET)
74+
#define A64_STRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, STORE)
75+
#define A64_LDRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, LOAD)
76+
#define A64_STRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, STORE)
77+
#define A64_LDRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, LOAD)
78+
#define A64_STR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, STORE)
79+
#define A64_LDR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, LOAD)
80+
#define A64_STR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, STORE)
81+
#define A64_LDR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, LOAD)
82+
6983
/* Load/store register pair */
7084
#define A64_LS_PAIR(Rt, Rt2, Rn, offset, ls, type) \
7185
aarch64_insn_gen_load_store_pair(Rt, Rt2, Rn, offset, \

arch/arm64/net/bpf_jit_comp.c

Lines changed: 37 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -971,20 +971,22 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
971971
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
972972
case BPF_LDX | BPF_PROBE_MEM | BPF_H:
973973
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
974-
emit_a64_mov_i(1, tmp, off, ctx);
974+
975+
#define BUILD_LDX_INSN(a, b, c) \
976+
case BPF_##a: \
977+
if ((off & ((1 << c) - 1)) == 0 && off >= 0 && off <= (0xFFF << c)) { \
978+
emit(A64_LDR##b##I(dst, src, off), ctx); \
979+
} else { \
980+
emit_a64_mov_i(1, tmp, off, ctx); \
981+
emit(A64_LDR##b(dst, src, tmp), ctx); \
982+
} \
983+
break;
984+
975985
switch (BPF_SIZE(code)) {
976-
case BPF_W:
977-
emit(A64_LDR32(dst, src, tmp), ctx);
978-
break;
979-
case BPF_H:
980-
emit(A64_LDRH(dst, src, tmp), ctx);
981-
break;
982-
case BPF_B:
983-
emit(A64_LDRB(dst, src, tmp), ctx);
984-
break;
985-
case BPF_DW:
986-
emit(A64_LDR64(dst, src, tmp), ctx);
987-
break;
986+
BUILD_LDX_INSN(W, 32, 2)
987+
BUILD_LDX_INSN(H, H, 1)
988+
BUILD_LDX_INSN(B, B, 0)
989+
BUILD_LDX_INSN(DW, 64, 3)
988990
}
989991

990992
ret = add_exception_handler(insn, ctx, dst);
@@ -1010,22 +1012,25 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
10101012
case BPF_ST | BPF_MEM | BPF_H:
10111013
case BPF_ST | BPF_MEM | BPF_B:
10121014
case BPF_ST | BPF_MEM | BPF_DW:
1015+
1016+
#define __BUILD_STX_INSN(a, b, c, d, e) \
1017+
case BPF_##a: \
1018+
if ((off & ((1 << c) - 1)) == 0 && off >= 0 && off <= (0xFFF << c)) { \
1019+
emit(A64_STR##b##I(d, dst, off), ctx); \
1020+
} else { \
1021+
emit_a64_mov_i(1, e, off, ctx); \
1022+
emit(A64_STR##b(d, dst, e), ctx); \
1023+
} \
1024+
break;
1025+
1026+
#define BUILD_ST_INSN(a, b, c) __BUILD_STX_INSN(a, b, c, tmp, tmp2)
10131027
/* Load imm to a register then store it */
1014-
emit_a64_mov_i(1, tmp2, off, ctx);
10151028
emit_a64_mov_i(1, tmp, imm, ctx);
10161029
switch (BPF_SIZE(code)) {
1017-
case BPF_W:
1018-
emit(A64_STR32(tmp, dst, tmp2), ctx);
1019-
break;
1020-
case BPF_H:
1021-
emit(A64_STRH(tmp, dst, tmp2), ctx);
1022-
break;
1023-
case BPF_B:
1024-
emit(A64_STRB(tmp, dst, tmp2), ctx);
1025-
break;
1026-
case BPF_DW:
1027-
emit(A64_STR64(tmp, dst, tmp2), ctx);
1028-
break;
1030+
BUILD_ST_INSN(W, 32, 2)
1031+
BUILD_ST_INSN(H, H, 1)
1032+
BUILD_ST_INSN(B, B, 0)
1033+
BUILD_ST_INSN(DW, 64, 3)
10291034
}
10301035
break;
10311036

@@ -1034,20 +1039,13 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
10341039
case BPF_STX | BPF_MEM | BPF_H:
10351040
case BPF_STX | BPF_MEM | BPF_B:
10361041
case BPF_STX | BPF_MEM | BPF_DW:
1037-
emit_a64_mov_i(1, tmp, off, ctx);
1042+
1043+
#define BUILD_STX_INSN(a, b, c) __BUILD_STX_INSN(a, b, c, src, tmp)
10381044
switch (BPF_SIZE(code)) {
1039-
case BPF_W:
1040-
emit(A64_STR32(src, dst, tmp), ctx);
1041-
break;
1042-
case BPF_H:
1043-
emit(A64_STRH(src, dst, tmp), ctx);
1044-
break;
1045-
case BPF_B:
1046-
emit(A64_STRB(src, dst, tmp), ctx);
1047-
break;
1048-
case BPF_DW:
1049-
emit(A64_STR64(src, dst, tmp), ctx);
1050-
break;
1045+
BUILD_STX_INSN(W, 32, 2)
1046+
BUILD_STX_INSN(H, H, 1)
1047+
BUILD_STX_INSN(B, B, 0)
1048+
BUILD_STX_INSN(DW, 64, 3)
10511049
}
10521050
break;
10531051

0 commit comments

Comments
 (0)