Skip to content

Commit 7d08c2c

Browse files
anakryikoborkmann
authored andcommitted
bpf: Refactor BPF_PROG_RUN_ARRAY family of macros into functions
Similar to BPF_PROG_RUN, turn BPF_PROG_RUN_ARRAY macros into proper functions with all the same readability and maintainability benefits. Making them into functions required shuffling around bpf_set_run_ctx/bpf_reset_run_ctx functions. Also, explicitly specifying the type of the BPF prog run callback required adjusting __bpf_prog_run_save_cb() to accept const void *, casted internally to const struct sk_buff. Further, split out a cgroup-specific BPF_PROG_RUN_ARRAY_CG and BPF_PROG_RUN_ARRAY_CG_FLAGS from the more generic BPF_PROG_RUN_ARRAY due to the differences in bpf_run_ctx used for those two different use cases. I think BPF_PROG_RUN_ARRAY_CG would benefit from further refactoring to accept struct cgroup and enum bpf_attach_type instead of bpf_prog_array, fetching cgrp->bpf.effective[type] and RCU-dereferencing it internally. But that required including include/linux/cgroup-defs.h, which I wasn't sure is ok with everyone. The remaining generic BPF_PROG_RUN_ARRAY function will be extended to pass-through user-provided context value in the next patch. Signed-off-by: Andrii Nakryiko <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]> Acked-by: Yonghong Song <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent fb7dd8b commit 7d08c2c

File tree

4 files changed

+124
-94
lines changed

4 files changed

+124
-94
lines changed

include/linux/bpf.h

Lines changed: 104 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1146,67 +1146,116 @@ struct bpf_run_ctx {};
11461146

11471147
struct bpf_cg_run_ctx {
11481148
struct bpf_run_ctx run_ctx;
1149-
struct bpf_prog_array_item *prog_item;
1149+
const struct bpf_prog_array_item *prog_item;
11501150
};
11511151

1152+
static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
1153+
{
1154+
struct bpf_run_ctx *old_ctx = NULL;
1155+
1156+
#ifdef CONFIG_BPF_SYSCALL
1157+
old_ctx = current->bpf_ctx;
1158+
current->bpf_ctx = new_ctx;
1159+
#endif
1160+
return old_ctx;
1161+
}
1162+
1163+
static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
1164+
{
1165+
#ifdef CONFIG_BPF_SYSCALL
1166+
current->bpf_ctx = old_ctx;
1167+
#endif
1168+
}
1169+
11521170
/* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
11531171
#define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0)
11541172
/* BPF program asks to set CN on the packet. */
11551173
#define BPF_RET_SET_CN (1 << 0)
11561174

1157-
#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \
1158-
({ \
1159-
struct bpf_prog_array_item *_item; \
1160-
struct bpf_prog *_prog; \
1161-
struct bpf_prog_array *_array; \
1162-
struct bpf_run_ctx *old_run_ctx; \
1163-
struct bpf_cg_run_ctx run_ctx; \
1164-
u32 _ret = 1; \
1165-
u32 func_ret; \
1166-
migrate_disable(); \
1167-
rcu_read_lock(); \
1168-
_array = rcu_dereference(array); \
1169-
_item = &_array->items[0]; \
1170-
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); \
1171-
while ((_prog = READ_ONCE(_item->prog))) { \
1172-
run_ctx.prog_item = _item; \
1173-
func_ret = func(_prog, ctx); \
1174-
_ret &= (func_ret & 1); \
1175-
*(ret_flags) |= (func_ret >> 1); \
1176-
_item++; \
1177-
} \
1178-
bpf_reset_run_ctx(old_run_ctx); \
1179-
rcu_read_unlock(); \
1180-
migrate_enable(); \
1181-
_ret; \
1182-
})
1183-
1184-
#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage) \
1185-
({ \
1186-
struct bpf_prog_array_item *_item; \
1187-
struct bpf_prog *_prog; \
1188-
struct bpf_prog_array *_array; \
1189-
struct bpf_run_ctx *old_run_ctx; \
1190-
struct bpf_cg_run_ctx run_ctx; \
1191-
u32 _ret = 1; \
1192-
migrate_disable(); \
1193-
rcu_read_lock(); \
1194-
_array = rcu_dereference(array); \
1195-
if (unlikely(check_non_null && !_array))\
1196-
goto _out; \
1197-
_item = &_array->items[0]; \
1198-
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);\
1199-
while ((_prog = READ_ONCE(_item->prog))) { \
1200-
run_ctx.prog_item = _item; \
1201-
_ret &= func(_prog, ctx); \
1202-
_item++; \
1203-
} \
1204-
bpf_reset_run_ctx(old_run_ctx); \
1205-
_out: \
1206-
rcu_read_unlock(); \
1207-
migrate_enable(); \
1208-
_ret; \
1209-
})
1175+
typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
1176+
1177+
static __always_inline u32
1178+
BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
1179+
const void *ctx, bpf_prog_run_fn run_prog,
1180+
u32 *ret_flags)
1181+
{
1182+
const struct bpf_prog_array_item *item;
1183+
const struct bpf_prog *prog;
1184+
const struct bpf_prog_array *array;
1185+
struct bpf_run_ctx *old_run_ctx;
1186+
struct bpf_cg_run_ctx run_ctx;
1187+
u32 ret = 1;
1188+
u32 func_ret;
1189+
1190+
migrate_disable();
1191+
rcu_read_lock();
1192+
array = rcu_dereference(array_rcu);
1193+
item = &array->items[0];
1194+
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
1195+
while ((prog = READ_ONCE(item->prog))) {
1196+
run_ctx.prog_item = item;
1197+
func_ret = run_prog(prog, ctx);
1198+
ret &= (func_ret & 1);
1199+
*(ret_flags) |= (func_ret >> 1);
1200+
item++;
1201+
}
1202+
bpf_reset_run_ctx(old_run_ctx);
1203+
rcu_read_unlock();
1204+
migrate_enable();
1205+
return ret;
1206+
}
1207+
1208+
static __always_inline u32
1209+
BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
1210+
const void *ctx, bpf_prog_run_fn run_prog)
1211+
{
1212+
const struct bpf_prog_array_item *item;
1213+
const struct bpf_prog *prog;
1214+
const struct bpf_prog_array *array;
1215+
struct bpf_run_ctx *old_run_ctx;
1216+
struct bpf_cg_run_ctx run_ctx;
1217+
u32 ret = 1;
1218+
1219+
migrate_disable();
1220+
rcu_read_lock();
1221+
array = rcu_dereference(array_rcu);
1222+
item = &array->items[0];
1223+
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
1224+
while ((prog = READ_ONCE(item->prog))) {
1225+
run_ctx.prog_item = item;
1226+
ret &= run_prog(prog, ctx);
1227+
item++;
1228+
}
1229+
bpf_reset_run_ctx(old_run_ctx);
1230+
rcu_read_unlock();
1231+
migrate_enable();
1232+
return ret;
1233+
}
1234+
1235+
static __always_inline u32
1236+
BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
1237+
const void *ctx, bpf_prog_run_fn run_prog)
1238+
{
1239+
const struct bpf_prog_array_item *item;
1240+
const struct bpf_prog *prog;
1241+
const struct bpf_prog_array *array;
1242+
u32 ret = 1;
1243+
1244+
migrate_disable();
1245+
rcu_read_lock();
1246+
array = rcu_dereference(array_rcu);
1247+
if (unlikely(!array))
1248+
goto out;
1249+
item = &array->items[0];
1250+
while ((prog = READ_ONCE(item->prog))) {
1251+
ret &= run_prog(prog, ctx);
1252+
item++;
1253+
}
1254+
out:
1255+
rcu_read_unlock();
1256+
migrate_enable();
1257+
return ret;
1258+
}
12101259

12111260
/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
12121261
* so BPF programs can request cwr for TCP packets.
@@ -1235,7 +1284,7 @@ _out: \
12351284
u32 _flags = 0; \
12361285
bool _cn; \
12371286
u32 _ret; \
1238-
_ret = BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, &_flags); \
1287+
_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
12391288
_cn = _flags & BPF_RET_SET_CN; \
12401289
if (_ret) \
12411290
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
@@ -1244,12 +1293,6 @@ _out: \
12441293
_ret; \
12451294
})
12461295

1247-
#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
1248-
__BPF_PROG_RUN_ARRAY(array, ctx, func, false, true)
1249-
1250-
#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \
1251-
__BPF_PROG_RUN_ARRAY(array, ctx, func, true, false)
1252-
12531296
#ifdef CONFIG_BPF_SYSCALL
12541297
DECLARE_PER_CPU(int, bpf_prog_active);
12551298
extern struct mutex bpf_stats_enabled_mutex;
@@ -1284,20 +1327,6 @@ static inline void bpf_enable_instrumentation(void)
12841327
migrate_enable();
12851328
}
12861329

1287-
static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
1288-
{
1289-
struct bpf_run_ctx *old_ctx;
1290-
1291-
old_ctx = current->bpf_ctx;
1292-
current->bpf_ctx = new_ctx;
1293-
return old_ctx;
1294-
}
1295-
1296-
static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
1297-
{
1298-
current->bpf_ctx = old_ctx;
1299-
}
1300-
13011330
extern const struct file_operations bpf_map_fops;
13021331
extern const struct file_operations bpf_prog_fops;
13031332
extern const struct file_operations bpf_iter_fops;

include/linux/filter.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -723,7 +723,7 @@ static inline void bpf_restore_data_end(
723723
cb->data_end = saved_data_end;
724724
}
725725

726-
static inline u8 *bpf_skb_cb(struct sk_buff *skb)
726+
static inline u8 *bpf_skb_cb(const struct sk_buff *skb)
727727
{
728728
/* eBPF programs may read/write skb->cb[] area to transfer meta
729729
* data between tail calls. Since this also needs to work with
@@ -744,8 +744,9 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb)
744744

745745
/* Must be invoked with migration disabled */
746746
static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
747-
struct sk_buff *skb)
747+
const void *ctx)
748748
{
749+
const struct sk_buff *skb = ctx;
749750
u8 *cb_data = bpf_skb_cb(skb);
750751
u8 cb_saved[BPF_SKB_CB_LEN];
751752
u32 res;

kernel/bpf/cgroup.c

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,8 +1012,8 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
10121012
ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
10131013
cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);
10141014
} else {
1015-
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
1016-
__bpf_prog_run_save_cb);
1015+
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[type], skb,
1016+
__bpf_prog_run_save_cb);
10171017
ret = (ret == 1 ? 0 : -EPERM);
10181018
}
10191019
bpf_restore_data_end(skb, saved_data_end);
@@ -1043,7 +1043,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
10431043
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
10441044
int ret;
10451045

1046-
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, bpf_prog_run);
1046+
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[type], sk, bpf_prog_run);
10471047
return ret == 1 ? 0 : -EPERM;
10481048
}
10491049
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
@@ -1090,8 +1090,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
10901090
}
10911091

10921092
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1093-
ret = BPF_PROG_RUN_ARRAY_FLAGS(cgrp->bpf.effective[type], &ctx,
1094-
bpf_prog_run, flags);
1093+
ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[type], &ctx,
1094+
bpf_prog_run, flags);
10951095

10961096
return ret == 1 ? 0 : -EPERM;
10971097
}
@@ -1120,8 +1120,8 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
11201120
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
11211121
int ret;
11221122

1123-
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops,
1124-
bpf_prog_run);
1123+
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[type], sock_ops,
1124+
bpf_prog_run);
11251125
return ret == 1 ? 0 : -EPERM;
11261126
}
11271127
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
@@ -1139,8 +1139,8 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
11391139

11401140
rcu_read_lock();
11411141
cgrp = task_dfl_cgroup(current);
1142-
allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
1143-
bpf_prog_run);
1142+
allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[type], &ctx,
1143+
bpf_prog_run);
11441144
rcu_read_unlock();
11451145

11461146
return !allow;
@@ -1271,7 +1271,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
12711271

12721272
rcu_read_lock();
12731273
cgrp = task_dfl_cgroup(current);
1274-
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, bpf_prog_run);
1274+
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[type], &ctx, bpf_prog_run);
12751275
rcu_read_unlock();
12761276

12771277
kfree(ctx.cur_val);
@@ -1385,8 +1385,8 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
13851385
}
13861386

13871387
lock_sock(sk);
1388-
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT],
1389-
&ctx, bpf_prog_run);
1388+
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT],
1389+
&ctx, bpf_prog_run);
13901390
release_sock(sk);
13911391

13921392
if (!ret) {
@@ -1495,8 +1495,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
14951495
}
14961496

14971497
lock_sock(sk);
1498-
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
1499-
&ctx, bpf_prog_run);
1498+
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
1499+
&ctx, bpf_prog_run);
15001500
release_sock(sk);
15011501

15021502
if (!ret) {
@@ -1556,8 +1556,8 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
15561556
* be called if that data shouldn't be "exported".
15571557
*/
15581558

1559-
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
1560-
&ctx, bpf_prog_run);
1559+
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
1560+
&ctx, bpf_prog_run);
15611561
if (!ret)
15621562
return -EPERM;
15631563

kernel/trace/bpf_trace.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
124124
* out of events when it was updated in between this and the
125125
* rcu_dereference() which is accepted risk.
126126
*/
127-
ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, bpf_prog_run);
127+
ret = BPF_PROG_RUN_ARRAY(call->prog_array, ctx, bpf_prog_run);
128128

129129
out:
130130
__this_cpu_dec(bpf_prog_active);

0 commit comments

Comments
 (0)