Skip to content

Commit 6e10b63

Browse files
author
Alexei Starovoitov
committed
Merge branch 'introduce-bpf_wq'
Benjamin Tissoires says: ==================== Introduce bpf_wq This is a followup of sleepable bpf_timer[0]. When discussing sleepable bpf_timer, it was thought that we should give a try to bpf_wq, as the 2 APIs are similar but distinct enough to justify a new one. So here it is. I tried to keep as much as possible common code in kernel/bpf/helpers.c but I couldn't get away with code duplication in kernel/bpf/verifier.c. This series introduces a basic bpf_wq support: - creation is supported - assignment is supported - running a simple bpf_wq is also supported. We will probably need to extend the API further with: - a full delayed_work API (can be piggy backed on top with a correct flag) - bpf_wq_cancel() <- apparently not, this is shooting ourself in the foot - bpf_wq_cancel_sync() (for sleepable programs) - documentation --- For reference, the use cases I have in mind: --- Basically, I need to be able to defer a HID-BPF program for the following reasons (from the aforementioned patch): 1. defer an event: Sometimes we receive an out of proximity event, but the device can not be trusted enough, and we need to ensure that we won't receive another one in the following n milliseconds. So we need to wait those n milliseconds, and eventually re-inject that event in the stack. 2. inject new events in reaction to one given event: We might want to transform one given event into several. This is the case for macro keys where a single key press is supposed to send a sequence of key presses. But this could also be used to patch a faulty behavior, if a device forgets to send a release event. 3. communicate with the device in reaction to one event: We might want to communicate back to the device after a given event. For example a device might send us an event saying that it came back from sleeping state and needs to be re-initialized. Currently we can achieve that by keeping a userspace program around, raise a bpf event, and let that userspace program inject the events and commands. However, we are just keeping that program alive as a daemon for just scheduling commands. There is no logic in it, so it doesn't really justify an actual userspace wakeup. So a kernel workqueue seems simpler to handle. bpf_timers are currently running in a soft IRQ context, this patch series implements a sleppable context for them. Cheers, Benjamin [0] https://lore.kernel.org/all/[email protected]/ Changes in v2: - took previous review into account - mainly dropped BPF_F_WQ_SLEEPABLE - Link to v1: https://lore.kernel.org/r/[email protected] ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents a7de265 + 8290dba commit 6e10b63

File tree

16 files changed

+889
-103
lines changed

16 files changed

+889
-103
lines changed

include/linux/bpf.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ struct bpf_map_ops {
185185

186186
enum {
187187
/* Support at most 10 fields in a BTF type */
188-
BTF_FIELDS_MAX = 10,
188+
BTF_FIELDS_MAX = 11,
189189
};
190190

191191
enum btf_field_type {
@@ -202,6 +202,7 @@ enum btf_field_type {
202202
BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE,
203203
BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD,
204204
BPF_REFCOUNT = (1 << 9),
205+
BPF_WORKQUEUE = (1 << 10),
205206
};
206207

207208
typedef void (*btf_dtor_kfunc_t)(void *);
@@ -238,6 +239,7 @@ struct btf_record {
238239
u32 field_mask;
239240
int spin_lock_off;
240241
int timer_off;
242+
int wq_off;
241243
int refcount_off;
242244
struct btf_field fields[];
243245
};
@@ -312,6 +314,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
312314
return "bpf_spin_lock";
313315
case BPF_TIMER:
314316
return "bpf_timer";
317+
case BPF_WORKQUEUE:
318+
return "bpf_wq";
315319
case BPF_KPTR_UNREF:
316320
case BPF_KPTR_REF:
317321
return "kptr";
@@ -340,6 +344,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
340344
return sizeof(struct bpf_spin_lock);
341345
case BPF_TIMER:
342346
return sizeof(struct bpf_timer);
347+
case BPF_WORKQUEUE:
348+
return sizeof(struct bpf_wq);
343349
case BPF_KPTR_UNREF:
344350
case BPF_KPTR_REF:
345351
case BPF_KPTR_PERCPU:
@@ -367,6 +373,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
367373
return __alignof__(struct bpf_spin_lock);
368374
case BPF_TIMER:
369375
return __alignof__(struct bpf_timer);
376+
case BPF_WORKQUEUE:
377+
return __alignof__(struct bpf_wq);
370378
case BPF_KPTR_UNREF:
371379
case BPF_KPTR_REF:
372380
case BPF_KPTR_PERCPU:
@@ -406,6 +414,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
406414
/* RB_ROOT_CACHED 0-inits, no need to do anything after memset */
407415
case BPF_SPIN_LOCK:
408416
case BPF_TIMER:
417+
case BPF_WORKQUEUE:
409418
case BPF_KPTR_UNREF:
410419
case BPF_KPTR_REF:
411420
case BPF_KPTR_PERCPU:
@@ -525,6 +534,7 @@ static inline void zero_map_value(struct bpf_map *map, void *dst)
525534
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
526535
bool lock_src);
527536
void bpf_timer_cancel_and_free(void *timer);
537+
void bpf_wq_cancel_and_free(void *timer);
528538
void bpf_list_head_free(const struct btf_field *field, void *list_head,
529539
struct bpf_spin_lock *spin_lock);
530540
void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
@@ -2195,6 +2205,7 @@ void bpf_map_free_record(struct bpf_map *map);
21952205
struct btf_record *btf_record_dup(const struct btf_record *rec);
21962206
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
21972207
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
2208+
void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj);
21982209
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
21992210
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
22002211

include/linux/bpf_verifier.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,7 @@ struct bpf_verifier_state {
426426
* while they are still in use.
427427
*/
428428
bool used_as_loop_entry;
429+
bool in_sleepable;
429430

430431
/* first and last insn idx of this verifier state */
431432
u32 first_insn_idx;

include/uapi/linux/bpf.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7306,6 +7306,10 @@ struct bpf_timer {
73067306
__u64 __opaque[2];
73077307
} __attribute__((aligned(8)));
73087308

7309+
struct bpf_wq {
7310+
__u64 __opaque[2];
7311+
} __attribute__((aligned(8)));
7312+
73097313
struct bpf_dynptr {
73107314
__u64 __opaque[2];
73117315
} __attribute__((aligned(8)));

kernel/bpf/arraymap.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -428,17 +428,21 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
428428
return (void *)round_down((unsigned long)array, PAGE_SIZE);
429429
}
430430

431-
static void array_map_free_timers(struct bpf_map *map)
431+
static void array_map_free_timers_wq(struct bpf_map *map)
432432
{
433433
struct bpf_array *array = container_of(map, struct bpf_array, map);
434434
int i;
435435

436-
/* We don't reset or free fields other than timer on uref dropping to zero. */
437-
if (!btf_record_has_field(map->record, BPF_TIMER))
438-
return;
436+
/* We don't reset or free fields other than timer and workqueue
437+
* on uref dropping to zero.
438+
*/
439+
if (btf_record_has_field(map->record, BPF_TIMER))
440+
for (i = 0; i < array->map.max_entries; i++)
441+
bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
439442

440-
for (i = 0; i < array->map.max_entries; i++)
441-
bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
443+
if (btf_record_has_field(map->record, BPF_WORKQUEUE))
444+
for (i = 0; i < array->map.max_entries; i++)
445+
bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i));
442446
}
443447

444448
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
@@ -782,7 +786,7 @@ const struct bpf_map_ops array_map_ops = {
782786
.map_alloc = array_map_alloc,
783787
.map_free = array_map_free,
784788
.map_get_next_key = array_map_get_next_key,
785-
.map_release_uref = array_map_free_timers,
789+
.map_release_uref = array_map_free_timers_wq,
786790
.map_lookup_elem = array_map_lookup_elem,
787791
.map_update_elem = array_map_update_elem,
788792
.map_delete_elem = array_map_delete_elem,

kernel/bpf/btf.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3464,6 +3464,15 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
34643464
goto end;
34653465
}
34663466
}
3467+
if (field_mask & BPF_WORKQUEUE) {
3468+
if (!strcmp(name, "bpf_wq")) {
3469+
if (*seen_mask & BPF_WORKQUEUE)
3470+
return -E2BIG;
3471+
*seen_mask |= BPF_WORKQUEUE;
3472+
type = BPF_WORKQUEUE;
3473+
goto end;
3474+
}
3475+
}
34673476
field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
34683477
field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
34693478
field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root");
@@ -3515,6 +3524,7 @@ static int btf_find_struct_field(const struct btf *btf,
35153524
switch (field_type) {
35163525
case BPF_SPIN_LOCK:
35173526
case BPF_TIMER:
3527+
case BPF_WORKQUEUE:
35183528
case BPF_LIST_NODE:
35193529
case BPF_RB_NODE:
35203530
case BPF_REFCOUNT:
@@ -3582,6 +3592,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
35823592
switch (field_type) {
35833593
case BPF_SPIN_LOCK:
35843594
case BPF_TIMER:
3595+
case BPF_WORKQUEUE:
35853596
case BPF_LIST_NODE:
35863597
case BPF_RB_NODE:
35873598
case BPF_REFCOUNT:
@@ -3816,6 +3827,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
38163827

38173828
rec->spin_lock_off = -EINVAL;
38183829
rec->timer_off = -EINVAL;
3830+
rec->wq_off = -EINVAL;
38193831
rec->refcount_off = -EINVAL;
38203832
for (i = 0; i < cnt; i++) {
38213833
field_type_size = btf_field_type_size(info_arr[i].type);
@@ -3846,6 +3858,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
38463858
/* Cache offset for faster lookup at runtime */
38473859
rec->timer_off = rec->fields[i].offset;
38483860
break;
3861+
case BPF_WORKQUEUE:
3862+
WARN_ON_ONCE(rec->wq_off >= 0);
3863+
/* Cache offset for faster lookup at runtime */
3864+
rec->wq_off = rec->fields[i].offset;
3865+
break;
38493866
case BPF_REFCOUNT:
38503867
WARN_ON_ONCE(rec->refcount_off >= 0);
38513868
/* Cache offset for faster lookup at runtime */

kernel/bpf/hashtab.c

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,26 @@ static void htab_free_prealloced_timers(struct bpf_htab *htab)
240240
}
241241
}
242242

243+
static void htab_free_prealloced_wq(struct bpf_htab *htab)
244+
{
245+
u32 num_entries = htab->map.max_entries;
246+
int i;
247+
248+
if (!btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
249+
return;
250+
if (htab_has_extra_elems(htab))
251+
num_entries += num_possible_cpus();
252+
253+
for (i = 0; i < num_entries; i++) {
254+
struct htab_elem *elem;
255+
256+
elem = get_htab_elem(htab, i);
257+
bpf_obj_free_workqueue(htab->map.record,
258+
elem->key + round_up(htab->map.key_size, 8));
259+
cond_resched();
260+
}
261+
}
262+
243263
static void htab_free_prealloced_fields(struct bpf_htab *htab)
244264
{
245265
u32 num_entries = htab->map.max_entries;
@@ -1495,7 +1515,7 @@ static void delete_all_elements(struct bpf_htab *htab)
14951515
migrate_enable();
14961516
}
14971517

1498-
static void htab_free_malloced_timers(struct bpf_htab *htab)
1518+
static void htab_free_malloced_timers_or_wq(struct bpf_htab *htab, bool is_timer)
14991519
{
15001520
int i;
15011521

@@ -1507,24 +1527,35 @@ static void htab_free_malloced_timers(struct bpf_htab *htab)
15071527

15081528
hlist_nulls_for_each_entry(l, n, head, hash_node) {
15091529
/* We only free timer on uref dropping to zero */
1510-
bpf_obj_free_timer(htab->map.record, l->key + round_up(htab->map.key_size, 8));
1530+
if (is_timer)
1531+
bpf_obj_free_timer(htab->map.record,
1532+
l->key + round_up(htab->map.key_size, 8));
1533+
else
1534+
bpf_obj_free_workqueue(htab->map.record,
1535+
l->key + round_up(htab->map.key_size, 8));
15111536
}
15121537
cond_resched_rcu();
15131538
}
15141539
rcu_read_unlock();
15151540
}
15161541

1517-
static void htab_map_free_timers(struct bpf_map *map)
1542+
static void htab_map_free_timers_and_wq(struct bpf_map *map)
15181543
{
15191544
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
15201545

1521-
/* We only free timer on uref dropping to zero */
1522-
if (!btf_record_has_field(htab->map.record, BPF_TIMER))
1523-
return;
1524-
if (!htab_is_prealloc(htab))
1525-
htab_free_malloced_timers(htab);
1526-
else
1527-
htab_free_prealloced_timers(htab);
1546+
/* We only free timer and workqueue on uref dropping to zero */
1547+
if (btf_record_has_field(htab->map.record, BPF_TIMER)) {
1548+
if (!htab_is_prealloc(htab))
1549+
htab_free_malloced_timers_or_wq(htab, true);
1550+
else
1551+
htab_free_prealloced_timers(htab);
1552+
}
1553+
if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE)) {
1554+
if (!htab_is_prealloc(htab))
1555+
htab_free_malloced_timers_or_wq(htab, false);
1556+
else
1557+
htab_free_prealloced_wq(htab);
1558+
}
15281559
}
15291560

15301561
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
@@ -2260,7 +2291,7 @@ const struct bpf_map_ops htab_map_ops = {
22602291
.map_alloc = htab_map_alloc,
22612292
.map_free = htab_map_free,
22622293
.map_get_next_key = htab_map_get_next_key,
2263-
.map_release_uref = htab_map_free_timers,
2294+
.map_release_uref = htab_map_free_timers_and_wq,
22642295
.map_lookup_elem = htab_map_lookup_elem,
22652296
.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
22662297
.map_update_elem = htab_map_update_elem,
@@ -2281,7 +2312,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
22812312
.map_alloc = htab_map_alloc,
22822313
.map_free = htab_map_free,
22832314
.map_get_next_key = htab_map_get_next_key,
2284-
.map_release_uref = htab_map_free_timers,
2315+
.map_release_uref = htab_map_free_timers_and_wq,
22852316
.map_lookup_elem = htab_lru_map_lookup_elem,
22862317
.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
22872318
.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,

0 commit comments

Comments
 (0)