@@ -113,6 +113,7 @@ static int bpf_size_to_x86_bytes(int bpf_size)
113
113
/* Pick a register outside of BPF range for JIT internal work */
114
114
#define AUX_REG (MAX_BPF_JIT_REG + 1)
115
115
#define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
116
+ #define X86_REG_R12 (MAX_BPF_JIT_REG + 3)
116
117
117
118
/*
118
119
* The following table maps BPF registers to x86-64 registers.
@@ -139,6 +140,7 @@ static const int reg2hex[] = {
139
140
[BPF_REG_AX ] = 2 , /* R10 temp register */
140
141
[AUX_REG ] = 3 , /* R11 temp register */
141
142
[X86_REG_R9 ] = 1 , /* R9 register, 6th function argument */
143
+ [X86_REG_R12 ] = 4 , /* R12 callee saved */
142
144
};
143
145
144
146
static const int reg2pt_regs [] = {
@@ -167,6 +169,7 @@ static bool is_ereg(u32 reg)
167
169
BIT (BPF_REG_8 ) |
168
170
BIT (BPF_REG_9 ) |
169
171
BIT (X86_REG_R9 ) |
172
+ BIT (X86_REG_R12 ) |
170
173
BIT (BPF_REG_AX ));
171
174
}
172
175
@@ -205,6 +208,17 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2)
205
208
return byte ;
206
209
}
207
210
211
+ static u8 add_3mod (u8 byte , u32 r1 , u32 r2 , u32 index )
212
+ {
213
+ if (is_ereg (r1 ))
214
+ byte |= 1 ;
215
+ if (is_ereg (index ))
216
+ byte |= 2 ;
217
+ if (is_ereg (r2 ))
218
+ byte |= 4 ;
219
+ return byte ;
220
+ }
221
+
208
222
/* Encode 'dst_reg' register into x86-64 opcode 'byte' */
209
223
static u8 add_1reg (u8 byte , u32 dst_reg )
210
224
{
@@ -645,6 +659,8 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
645
659
pop_r12 (& prog );
646
660
} else {
647
661
pop_callee_regs (& prog , callee_regs_used );
662
+ if (bpf_arena_get_kern_vm_start (bpf_prog -> aux -> arena ))
663
+ pop_r12 (& prog );
648
664
}
649
665
650
666
EMIT1 (0x58 ); /* pop rax */
@@ -704,6 +720,8 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
704
720
pop_r12 (& prog );
705
721
} else {
706
722
pop_callee_regs (& prog , callee_regs_used );
723
+ if (bpf_arena_get_kern_vm_start (bpf_prog -> aux -> arena ))
724
+ pop_r12 (& prog );
707
725
}
708
726
709
727
EMIT1 (0x58 ); /* pop rax */
@@ -887,6 +905,18 @@ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
887
905
* pprog = prog ;
888
906
}
889
907
908
+ static void emit_insn_suffix_SIB (u8 * * pprog , u32 ptr_reg , u32 val_reg , u32 index_reg , int off )
909
+ {
910
+ u8 * prog = * pprog ;
911
+
912
+ if (is_imm8 (off )) {
913
+ EMIT3 (add_2reg (0x44 , BPF_REG_0 , val_reg ), add_2reg (0 , ptr_reg , index_reg ) /* SIB */ , off );
914
+ } else {
915
+ EMIT2_off32 (add_2reg (0x84 , BPF_REG_0 , val_reg ), add_2reg (0 , ptr_reg , index_reg ) /* SIB */ , off );
916
+ }
917
+ * pprog = prog ;
918
+ }
919
+
890
920
/*
891
921
* Emit a REX byte if it will be necessary to address these registers
892
922
*/
@@ -968,6 +998,37 @@ static void emit_ldsx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
968
998
* pprog = prog ;
969
999
}
970
1000
1001
+ static void emit_ldx_index (u8 * * pprog , u32 size , u32 dst_reg , u32 src_reg , u32 index_reg , int off )
1002
+ {
1003
+ u8 * prog = * pprog ;
1004
+
1005
+ switch (size ) {
1006
+ case BPF_B :
1007
+ /* movzx rax, byte ptr [rax + r12 + off] */
1008
+ EMIT3 (add_3mod (0x40 , src_reg , dst_reg , index_reg ), 0x0F , 0xB6 );
1009
+ break ;
1010
+ case BPF_H :
1011
+ /* movzx rax, word ptr [rax + r12 + off] */
1012
+ EMIT3 (add_3mod (0x40 , src_reg , dst_reg , index_reg ), 0x0F , 0xB7 );
1013
+ break ;
1014
+ case BPF_W :
1015
+ /* mov eax, dword ptr [rax + r12 + off] */
1016
+ EMIT2 (add_3mod (0x40 , src_reg , dst_reg , index_reg ), 0x8B );
1017
+ break ;
1018
+ case BPF_DW :
1019
+ /* mov rax, qword ptr [rax + r12 + off] */
1020
+ EMIT2 (add_3mod (0x48 , src_reg , dst_reg , index_reg ), 0x8B );
1021
+ break ;
1022
+ }
1023
+ emit_insn_suffix_SIB (& prog , src_reg , dst_reg , index_reg , off );
1024
+ * pprog = prog ;
1025
+ }
1026
+
1027
+ static void emit_ldx_r12 (u8 * * pprog , u32 size , u32 dst_reg , u32 src_reg , int off )
1028
+ {
1029
+ emit_ldx_index (pprog , size , dst_reg , src_reg , X86_REG_R12 , off );
1030
+ }
1031
+
971
1032
/* STX: *(u8*)(dst_reg + off) = src_reg */
972
1033
static void emit_stx (u8 * * pprog , u32 size , u32 dst_reg , u32 src_reg , int off )
973
1034
{
@@ -1002,6 +1063,71 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
1002
1063
* pprog = prog ;
1003
1064
}
1004
1065
1066
+ /* STX: *(u8*)(dst_reg + index_reg + off) = src_reg */
1067
+ static void emit_stx_index (u8 * * pprog , u32 size , u32 dst_reg , u32 src_reg , u32 index_reg , int off )
1068
+ {
1069
+ u8 * prog = * pprog ;
1070
+
1071
+ switch (size ) {
1072
+ case BPF_B :
1073
+ /* mov byte ptr [rax + r12 + off], al */
1074
+ EMIT2 (add_3mod (0x40 , dst_reg , src_reg , index_reg ), 0x88 );
1075
+ break ;
1076
+ case BPF_H :
1077
+ /* mov word ptr [rax + r12 + off], ax */
1078
+ EMIT3 (0x66 , add_3mod (0x40 , dst_reg , src_reg , index_reg ), 0x89 );
1079
+ break ;
1080
+ case BPF_W :
1081
+ /* mov dword ptr [rax + r12 + 1], eax */
1082
+ EMIT2 (add_3mod (0x40 , dst_reg , src_reg , index_reg ), 0x89 );
1083
+ break ;
1084
+ case BPF_DW :
1085
+ /* mov qword ptr [rax + r12 + 1], rax */
1086
+ EMIT2 (add_3mod (0x48 , dst_reg , src_reg , index_reg ), 0x89 );
1087
+ break ;
1088
+ }
1089
+ emit_insn_suffix_SIB (& prog , dst_reg , src_reg , index_reg , off );
1090
+ * pprog = prog ;
1091
+ }
1092
+
1093
+ static void emit_stx_r12 (u8 * * pprog , u32 size , u32 dst_reg , u32 src_reg , int off )
1094
+ {
1095
+ emit_stx_index (pprog , size , dst_reg , src_reg , X86_REG_R12 , off );
1096
+ }
1097
+
1098
+ /* ST: *(u8*)(dst_reg + index_reg + off) = imm32 */
1099
+ static void emit_st_index (u8 * * pprog , u32 size , u32 dst_reg , u32 index_reg , int off , int imm )
1100
+ {
1101
+ u8 * prog = * pprog ;
1102
+
1103
+ switch (size ) {
1104
+ case BPF_B :
1105
+ /* mov byte ptr [rax + r12 + off], imm8 */
1106
+ EMIT2 (add_3mod (0x40 , dst_reg , 0 , index_reg ), 0xC6 );
1107
+ break ;
1108
+ case BPF_H :
1109
+ /* mov word ptr [rax + r12 + off], imm16 */
1110
+ EMIT3 (0x66 , add_3mod (0x40 , dst_reg , 0 , index_reg ), 0xC7 );
1111
+ break ;
1112
+ case BPF_W :
1113
+ /* mov dword ptr [rax + r12 + 1], imm32 */
1114
+ EMIT2 (add_3mod (0x40 , dst_reg , 0 , index_reg ), 0xC7 );
1115
+ break ;
1116
+ case BPF_DW :
1117
+ /* mov qword ptr [rax + r12 + 1], imm32 */
1118
+ EMIT2 (add_3mod (0x48 , dst_reg , 0 , index_reg ), 0xC7 );
1119
+ break ;
1120
+ }
1121
+ emit_insn_suffix_SIB (& prog , dst_reg , 0 , index_reg , off );
1122
+ EMIT (imm , bpf_size_to_x86_bytes (size ));
1123
+ * pprog = prog ;
1124
+ }
1125
+
1126
+ static void emit_st_r12 (u8 * * pprog , u32 size , u32 dst_reg , int off , int imm )
1127
+ {
1128
+ emit_st_index (pprog , size , dst_reg , X86_REG_R12 , off , imm );
1129
+ }
1130
+
1005
1131
static int emit_atomic (u8 * * pprog , u8 atomic_op ,
1006
1132
u32 dst_reg , u32 src_reg , s16 off , u8 bpf_size )
1007
1133
{
@@ -1043,12 +1169,15 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
1043
1169
return 0 ;
1044
1170
}
1045
1171
1172
+ #define DONT_CLEAR 1
1173
+
1046
1174
bool ex_handler_bpf (const struct exception_table_entry * x , struct pt_regs * regs )
1047
1175
{
1048
1176
u32 reg = x -> fixup >> 8 ;
1049
1177
1050
1178
/* jump over faulting load and clear dest register */
1051
- * (unsigned long * )((void * )regs + reg ) = 0 ;
1179
+ if (reg != DONT_CLEAR )
1180
+ * (unsigned long * )((void * )regs + reg ) = 0 ;
1052
1181
regs -> ip += x -> fixup & 0xff ;
1053
1182
return true;
1054
1183
}
@@ -1147,11 +1276,15 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
1147
1276
bool tail_call_seen = false;
1148
1277
bool seen_exit = false;
1149
1278
u8 temp [BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY ];
1279
+ u64 arena_vm_start , user_vm_start ;
1150
1280
int i , excnt = 0 ;
1151
1281
int ilen , proglen = 0 ;
1152
1282
u8 * prog = temp ;
1153
1283
int err ;
1154
1284
1285
+ arena_vm_start = bpf_arena_get_kern_vm_start (bpf_prog -> aux -> arena );
1286
+ user_vm_start = bpf_arena_get_user_vm_start (bpf_prog -> aux -> arena );
1287
+
1155
1288
detect_reg_usage (insn , insn_cnt , callee_regs_used ,
1156
1289
& tail_call_seen );
1157
1290
@@ -1172,8 +1305,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
1172
1305
push_r12 (& prog );
1173
1306
push_callee_regs (& prog , all_callee_regs_used );
1174
1307
} else {
1308
+ if (arena_vm_start )
1309
+ push_r12 (& prog );
1175
1310
push_callee_regs (& prog , callee_regs_used );
1176
1311
}
1312
+ if (arena_vm_start )
1313
+ emit_mov_imm64 (& prog , X86_REG_R12 ,
1314
+ arena_vm_start >> 32 , (u32 ) arena_vm_start );
1177
1315
1178
1316
ilen = prog - temp ;
1179
1317
if (rw_image )
@@ -1213,6 +1351,40 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
1213
1351
break ;
1214
1352
1215
1353
case BPF_ALU64 | BPF_MOV | BPF_X :
1354
+ if (insn -> off == BPF_ADDR_SPACE_CAST &&
1355
+ insn -> imm == 1U << 16 ) {
1356
+ if (dst_reg != src_reg )
1357
+ /* 32-bit mov */
1358
+ emit_mov_reg (& prog , false, dst_reg , src_reg );
1359
+ /* shl dst_reg, 32 */
1360
+ maybe_emit_1mod (& prog , dst_reg , true);
1361
+ EMIT3 (0xC1 , add_1reg (0xE0 , dst_reg ), 32 );
1362
+
1363
+ /* or dst_reg, user_vm_start */
1364
+ maybe_emit_1mod (& prog , dst_reg , true);
1365
+ if (is_axreg (dst_reg ))
1366
+ EMIT1_off32 (0x0D , user_vm_start >> 32 );
1367
+ else
1368
+ EMIT2_off32 (0x81 , add_1reg (0xC8 , dst_reg ), user_vm_start >> 32 );
1369
+
1370
+ /* rol dst_reg, 32 */
1371
+ maybe_emit_1mod (& prog , dst_reg , true);
1372
+ EMIT3 (0xC1 , add_1reg (0xC0 , dst_reg ), 32 );
1373
+
1374
+ /* xor r11, r11 */
1375
+ EMIT3 (0x4D , 0x31 , 0xDB );
1376
+
1377
+ /* test dst_reg32, dst_reg32; check if lower 32-bit are zero */
1378
+ maybe_emit_mod (& prog , dst_reg , dst_reg , false);
1379
+ EMIT2 (0x85 , add_2reg (0xC0 , dst_reg , dst_reg ));
1380
+
1381
+ /* cmove r11, dst_reg; if so, set dst_reg to zero */
1382
+ /* WARNING: Intel swapped src/dst register encoding in CMOVcc !!! */
1383
+ maybe_emit_mod (& prog , AUX_REG , dst_reg , true);
1384
+ EMIT3 (0x0F , 0x44 , add_2reg (0xC0 , AUX_REG , dst_reg ));
1385
+ break ;
1386
+ }
1387
+ fallthrough ;
1216
1388
case BPF_ALU | BPF_MOV | BPF_X :
1217
1389
if (insn -> off == 0 )
1218
1390
emit_mov_reg (& prog ,
@@ -1564,6 +1736,56 @@ st: if (is_imm8(insn->off))
1564
1736
emit_stx (& prog , BPF_SIZE (insn -> code ), dst_reg , src_reg , insn -> off );
1565
1737
break ;
1566
1738
1739
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_B :
1740
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_H :
1741
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_W :
1742
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_DW :
1743
+ start_of_ldx = prog ;
1744
+ emit_st_r12 (& prog , BPF_SIZE (insn -> code ), dst_reg , insn -> off , insn -> imm );
1745
+ goto populate_extable ;
1746
+
1747
+ /* LDX: dst_reg = *(u8*)(src_reg + r12 + off) */
1748
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_B :
1749
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_H :
1750
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_W :
1751
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW :
1752
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_B :
1753
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_H :
1754
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_W :
1755
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_DW :
1756
+ start_of_ldx = prog ;
1757
+ if (BPF_CLASS (insn -> code ) == BPF_LDX )
1758
+ emit_ldx_r12 (& prog , BPF_SIZE (insn -> code ), dst_reg , src_reg , insn -> off );
1759
+ else
1760
+ emit_stx_r12 (& prog , BPF_SIZE (insn -> code ), dst_reg , src_reg , insn -> off );
1761
+ populate_extable :
1762
+ {
1763
+ struct exception_table_entry * ex ;
1764
+ u8 * _insn = image + proglen + (start_of_ldx - temp );
1765
+ s64 delta ;
1766
+
1767
+ if (!bpf_prog -> aux -> extable )
1768
+ break ;
1769
+
1770
+ if (excnt >= bpf_prog -> aux -> num_exentries ) {
1771
+ pr_err ("mem32 extable bug\n" );
1772
+ return - EFAULT ;
1773
+ }
1774
+ ex = & bpf_prog -> aux -> extable [excnt ++ ];
1775
+
1776
+ delta = _insn - (u8 * )& ex -> insn ;
1777
+ /* switch ex to rw buffer for writes */
1778
+ ex = (void * )rw_image + ((void * )ex - (void * )image );
1779
+
1780
+ ex -> insn = delta ;
1781
+
1782
+ ex -> data = EX_TYPE_BPF ;
1783
+
1784
+ ex -> fixup = (prog - start_of_ldx ) |
1785
+ ((BPF_CLASS (insn -> code ) == BPF_LDX ? reg2pt_regs [dst_reg ] : DONT_CLEAR ) << 8 );
1786
+ }
1787
+ break ;
1788
+
1567
1789
/* LDX: dst_reg = *(u8*)(src_reg + off) */
1568
1790
case BPF_LDX | BPF_MEM | BPF_B :
1569
1791
case BPF_LDX | BPF_PROBE_MEM | BPF_B :
@@ -2036,6 +2258,8 @@ st: if (is_imm8(insn->off))
2036
2258
pop_r12 (& prog );
2037
2259
} else {
2038
2260
pop_callee_regs (& prog , callee_regs_used );
2261
+ if (arena_vm_start )
2262
+ pop_r12 (& prog );
2039
2263
}
2040
2264
EMIT1 (0xC9 ); /* leave */
2041
2265
emit_return (& prog , image + addrs [i - 1 ] + (prog - temp ));
@@ -3243,6 +3467,11 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
3243
3467
}
3244
3468
}
3245
3469
3470
+ bool bpf_jit_supports_arena (void )
3471
+ {
3472
+ return true;
3473
+ }
3474
+
3246
3475
bool bpf_jit_supports_ptr_xchg (void )
3247
3476
{
3248
3477
return true;
0 commit comments