Skip to content

Commit b936ca6

Browse files
rgushchinAlexei Starovoitov
authored andcommitted
bpf: rework memlock-based memory accounting for maps
In order to unify the existing memlock charging code with the memcg-based memory accounting, which will be added later, let's rework the current scheme. Currently the following design is used: 1) .alloc() callback optionally checks if the allocation will likely succeed using bpf_map_precharge_memlock() 2) .alloc() performs actual allocations 3) .alloc() callback calculates map cost and sets map.memory.pages 4) map_create() calls bpf_map_init_memlock() which sets map.memory.user and performs actual charging; in case of failure the map is destroyed <map is in use> 1) bpf_map_free_deferred() calls bpf_map_release_memlock(), which performs uncharge and releases the user 2) .map_free() callback releases the memory The scheme can be simplified and made more robust: 1) .alloc() calculates map cost and calls bpf_map_charge_init() 2) bpf_map_charge_init() sets map.memory.user and performs actual charge 3) .alloc() performs actual allocations <map is in use> 1) .map_free() callback releases the memory 2) bpf_map_charge_finish() performs uncharge and releases the user The new scheme also allows to reuse bpf_map_charge_init()/finish() functions for memcg-based accounting. Because charges are performed before actual allocations and uncharges after freeing the memory, no bogus memory pressure can be created. In cases when the map structure is not available (e.g. it's not created yet, or is already destroyed), on-stack bpf_map_memory structure is used. The charge can be transferred with the bpf_map_charge_move() function. Signed-off-by: Roman Gushchin <[email protected]> Acked-by: Song Liu <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 3539b96 commit b936ca6

File tree

14 files changed

+112
-88
lines changed

14 files changed

+112
-88
lines changed

include/linux/bpf.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -650,9 +650,12 @@ struct bpf_map *__bpf_map_get(struct fd f);
650650
struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
651651
void bpf_map_put_with_uref(struct bpf_map *map);
652652
void bpf_map_put(struct bpf_map *map);
653-
int bpf_map_precharge_memlock(u32 pages);
654653
int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
655654
void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
655+
int bpf_map_charge_init(struct bpf_map_memory *mem, u32 pages);
656+
void bpf_map_charge_finish(struct bpf_map_memory *mem);
657+
void bpf_map_charge_move(struct bpf_map_memory *dst,
658+
struct bpf_map_memory *src);
656659
void *bpf_map_area_alloc(size_t size, int numa_node);
657660
void bpf_map_area_free(void *base);
658661
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);

kernel/bpf/arraymap.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
8383
u32 elem_size, index_mask, max_entries;
8484
bool unpriv = !capable(CAP_SYS_ADMIN);
8585
u64 cost, array_size, mask64;
86+
struct bpf_map_memory mem;
8687
struct bpf_array *array;
8788

8889
elem_size = round_up(attr->value_size, 8);
@@ -125,23 +126,26 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
125126
}
126127
cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
127128

128-
ret = bpf_map_precharge_memlock(cost);
129+
ret = bpf_map_charge_init(&mem, cost);
129130
if (ret < 0)
130131
return ERR_PTR(ret);
131132

132133
/* allocate all map elements and zero-initialize them */
133134
array = bpf_map_area_alloc(array_size, numa_node);
134-
if (!array)
135+
if (!array) {
136+
bpf_map_charge_finish(&mem);
135137
return ERR_PTR(-ENOMEM);
138+
}
136139
array->index_mask = index_mask;
137140
array->map.unpriv_array = unpriv;
138141

139142
/* copy mandatory map attributes */
140143
bpf_map_init_from_attr(&array->map, attr);
141-
array->map.memory.pages = cost;
144+
bpf_map_charge_move(&array->map.memory, &mem);
142145
array->elem_size = elem_size;
143146

144147
if (percpu && bpf_array_alloc_percpu(array)) {
148+
bpf_map_charge_finish(&array->map.memory);
145149
bpf_map_area_free(array);
146150
return ERR_PTR(-ENOMEM);
147151
}

kernel/bpf/cpumap.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,10 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
108108
cost += cpu_map_bitmap_size(attr) * num_possible_cpus();
109109
if (cost >= U32_MAX - PAGE_SIZE)
110110
goto free_cmap;
111-
cmap->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
112111

113112
/* Notice returns -EPERM on if map size is larger than memlock limit */
114-
ret = bpf_map_precharge_memlock(cmap->map.memory.pages);
113+
ret = bpf_map_charge_init(&cmap->map.memory,
114+
round_up(cost, PAGE_SIZE) >> PAGE_SHIFT);
115115
if (ret) {
116116
err = ret;
117117
goto free_cmap;
@@ -121,7 +121,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
121121
cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr),
122122
__alignof__(unsigned long));
123123
if (!cmap->flush_needed)
124-
goto free_cmap;
124+
goto free_charge;
125125

126126
/* Alloc array for possible remote "destination" CPUs */
127127
cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
@@ -133,6 +133,8 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
133133
return &cmap->map;
134134
free_percpu:
135135
free_percpu(cmap->flush_needed);
136+
free_charge:
137+
bpf_map_charge_finish(&cmap->map.memory);
136138
free_cmap:
137139
kfree(cmap);
138140
return ERR_PTR(err);

kernel/bpf/devmap.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
111111
if (cost >= U32_MAX - PAGE_SIZE)
112112
goto free_dtab;
113113

114-
dtab->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
115-
116-
/* if map size is larger than memlock limit, reject it early */
117-
err = bpf_map_precharge_memlock(dtab->map.memory.pages);
114+
/* if map size is larger than memlock limit, reject it */
115+
err = bpf_map_charge_init(&dtab->map.memory,
116+
round_up(cost, PAGE_SIZE) >> PAGE_SHIFT);
118117
if (err)
119118
goto free_dtab;
120119

@@ -125,19 +124,21 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
125124
__alignof__(unsigned long),
126125
GFP_KERNEL | __GFP_NOWARN);
127126
if (!dtab->flush_needed)
128-
goto free_dtab;
127+
goto free_charge;
129128

130129
dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
131130
sizeof(struct bpf_dtab_netdev *),
132131
dtab->map.numa_node);
133132
if (!dtab->netdev_map)
134-
goto free_dtab;
133+
goto free_charge;
135134

136135
spin_lock(&dev_map_lock);
137136
list_add_tail_rcu(&dtab->list, &dev_map_list);
138137
spin_unlock(&dev_map_lock);
139138

140139
return &dtab->map;
140+
free_charge:
141+
bpf_map_charge_finish(&dtab->map.memory);
141142
free_dtab:
142143
free_percpu(dtab->flush_needed);
143144
kfree(dtab);

kernel/bpf/hashtab.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -364,10 +364,9 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
364364
/* make sure page count doesn't overflow */
365365
goto free_htab;
366366

367-
htab->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
368-
369-
/* if map size is larger than memlock limit, reject it early */
370-
err = bpf_map_precharge_memlock(htab->map.memory.pages);
367+
/* if map size is larger than memlock limit, reject it */
368+
err = bpf_map_charge_init(&htab->map.memory,
369+
round_up(cost, PAGE_SIZE) >> PAGE_SHIFT);
371370
if (err)
372371
goto free_htab;
373372

@@ -376,7 +375,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
376375
sizeof(struct bucket),
377376
htab->map.numa_node);
378377
if (!htab->buckets)
379-
goto free_htab;
378+
goto free_charge;
380379

381380
if (htab->map.map_flags & BPF_F_ZERO_SEED)
382381
htab->hashrnd = 0;
@@ -409,6 +408,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
409408
prealloc_destroy(htab);
410409
free_buckets:
411410
bpf_map_area_free(htab->buckets);
411+
free_charge:
412+
bpf_map_charge_finish(&htab->map.memory);
412413
free_htab:
413414
kfree(htab);
414415
return ERR_PTR(err);

kernel/bpf/local_storage.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
272272
{
273273
int numa_node = bpf_map_attr_numa_node(attr);
274274
struct bpf_cgroup_storage_map *map;
275+
struct bpf_map_memory mem;
275276
u32 pages;
276277
int ret;
277278

@@ -294,16 +295,18 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
294295

295296
pages = round_up(sizeof(struct bpf_cgroup_storage_map), PAGE_SIZE) >>
296297
PAGE_SHIFT;
297-
ret = bpf_map_precharge_memlock(pages);
298+
ret = bpf_map_charge_init(&mem, pages);
298299
if (ret < 0)
299300
return ERR_PTR(ret);
300301

301302
map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
302303
__GFP_ZERO | GFP_USER, numa_node);
303-
if (!map)
304+
if (!map) {
305+
bpf_map_charge_finish(&mem);
304306
return ERR_PTR(-ENOMEM);
307+
}
305308

306-
map->map.memory.pages = pages;
309+
bpf_map_charge_move(&map->map.memory, &mem);
307310

308311
/* copy mandatory map attributes */
309312
bpf_map_init_from_attr(&map->map, attr);

kernel/bpf/lpm_trie.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -578,9 +578,8 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
578578
goto out_err;
579579
}
580580

581-
trie->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
582-
583-
ret = bpf_map_precharge_memlock(trie->map.memory.pages);
581+
ret = bpf_map_charge_init(&trie->map.memory,
582+
round_up(cost, PAGE_SIZE) >> PAGE_SHIFT);
584583
if (ret)
585584
goto out_err;
586585

kernel/bpf/queue_stack_maps.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr)
6767
static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
6868
{
6969
int ret, numa_node = bpf_map_attr_numa_node(attr);
70+
struct bpf_map_memory mem = {0};
7071
struct bpf_queue_stack *qs;
7172
u64 size, queue_size, cost;
7273

@@ -77,19 +78,21 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
7778

7879
cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
7980

80-
ret = bpf_map_precharge_memlock(cost);
81+
ret = bpf_map_charge_init(&mem, cost);
8182
if (ret < 0)
8283
return ERR_PTR(ret);
8384

8485
qs = bpf_map_area_alloc(queue_size, numa_node);
85-
if (!qs)
86+
if (!qs) {
87+
bpf_map_charge_finish(&mem);
8688
return ERR_PTR(-ENOMEM);
89+
}
8790

8891
memset(qs, 0, sizeof(*qs));
8992

9093
bpf_map_init_from_attr(&qs->map, attr);
9194

92-
qs->map.memory.pages = cost;
95+
bpf_map_charge_move(&qs->map.memory, &mem);
9396
qs->size = size;
9497

9598
raw_spin_lock_init(&qs->lock);

kernel/bpf/reuseport_array.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
151151
{
152152
int err, numa_node = bpf_map_attr_numa_node(attr);
153153
struct reuseport_array *array;
154+
struct bpf_map_memory mem;
154155
u64 cost, array_size;
155156

156157
if (!capable(CAP_SYS_ADMIN))
@@ -165,18 +166,20 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
165166
return ERR_PTR(-ENOMEM);
166167
cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
167168

168-
err = bpf_map_precharge_memlock(cost);
169+
err = bpf_map_charge_init(&mem, cost);
169170
if (err)
170171
return ERR_PTR(err);
171172

172173
/* allocate all map elements and zero-initialize them */
173174
array = bpf_map_area_alloc(array_size, numa_node);
174-
if (!array)
175+
if (!array) {
176+
bpf_map_charge_finish(&mem);
175177
return ERR_PTR(-ENOMEM);
178+
}
176179

177180
/* copy mandatory map attributes */
178181
bpf_map_init_from_attr(&array->map, attr);
179-
array->map.memory.pages = cost;
182+
bpf_map_charge_move(&array->map.memory, &mem);
180183

181184
return &array->map;
182185
}

kernel/bpf/stackmap.c

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
8989
{
9090
u32 value_size = attr->value_size;
9191
struct bpf_stack_map *smap;
92+
struct bpf_map_memory mem;
9293
u64 cost, n_buckets;
9394
int err;
9495

@@ -116,40 +117,43 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
116117
n_buckets = roundup_pow_of_two(attr->max_entries);
117118

118119
cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
120+
if (cost >= U32_MAX - PAGE_SIZE)
121+
return ERR_PTR(-E2BIG);
122+
cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
119123
if (cost >= U32_MAX - PAGE_SIZE)
120124
return ERR_PTR(-E2BIG);
121125

126+
err = bpf_map_charge_init(&mem,
127+
round_up(cost, PAGE_SIZE) >> PAGE_SHIFT);
128+
if (err)
129+
return ERR_PTR(err);
130+
122131
smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
123-
if (!smap)
132+
if (!smap) {
133+
bpf_map_charge_finish(&mem);
124134
return ERR_PTR(-ENOMEM);
125-
126-
err = -E2BIG;
127-
cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
128-
if (cost >= U32_MAX - PAGE_SIZE)
129-
goto free_smap;
135+
}
130136

131137
bpf_map_init_from_attr(&smap->map, attr);
132138
smap->map.value_size = value_size;
133139
smap->n_buckets = n_buckets;
134-
smap->map.memory.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
135-
136-
err = bpf_map_precharge_memlock(smap->map.memory.pages);
137-
if (err)
138-
goto free_smap;
139140

140141
err = get_callchain_buffers(sysctl_perf_event_max_stack);
141142
if (err)
142-
goto free_smap;
143+
goto free_charge;
143144

144145
err = prealloc_elems_and_freelist(smap);
145146
if (err)
146147
goto put_buffers;
147148

149+
bpf_map_charge_move(&smap->map.memory, &mem);
150+
148151
return &smap->map;
149152

150153
put_buffers:
151154
put_callchain_buffers();
152-
free_smap:
155+
free_charge:
156+
bpf_map_charge_finish(&mem);
153157
bpf_map_area_free(smap);
154158
return ERR_PTR(err);
155159
}

0 commit comments

Comments
 (0)