Skip to content

Commit 492c5d4

Browse files
keithbuschaxboe
authored andcommitted
block: bio-integrity: directly map user buffers
Passthrough commands that utilize metadata currently need to bounce the user space buffer through the kernel. Add support for mapping user space directly so that we can avoid this costly overhead. This is similar to how the normal bio data payload utilizes user addresses with bio_map_user_iov(). If the user address can't directly be used for reason, like too many segments or address unalignement, fallback to a copy of the user vec while keeping the user address pinned for the IO duration so that it can safely be copied on completion in any process context. Signed-off-by: Keith Busch <[email protected]> Link: https://lore.kernel.org/r/[email protected] [axboe: fold in fix from Kanchan Joshi] Signed-off-by: Jens Axboe <[email protected]>
1 parent 668bfee commit 492c5d4

File tree

2 files changed

+225
-2
lines changed

2 files changed

+225
-2
lines changed

block/bio-integrity.c

Lines changed: 216 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,15 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
6969

7070
memset(bip, 0, sizeof(*bip));
7171

72+
/* always report as many vecs as asked explicitly, not inline vecs */
73+
bip->bip_max_vcnt = nr_vecs;
7274
if (nr_vecs > inline_vecs) {
73-
bip->bip_max_vcnt = nr_vecs;
7475
bip->bip_vec = bvec_alloc(&bs->bvec_integrity_pool,
7576
&bip->bip_max_vcnt, gfp_mask);
7677
if (!bip->bip_vec)
7778
goto err;
7879
} else {
7980
bip->bip_vec = bip->bip_inline_vecs;
80-
bip->bip_max_vcnt = inline_vecs;
8181
}
8282

8383
bip->bip_bio = bio;
@@ -91,6 +91,47 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
9191
}
9292
EXPORT_SYMBOL(bio_integrity_alloc);
9393

94+
static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs,
95+
bool dirty)
96+
{
97+
int i;
98+
99+
for (i = 0; i < nr_vecs; i++) {
100+
if (dirty && !PageCompound(bv[i].bv_page))
101+
set_page_dirty_lock(bv[i].bv_page);
102+
unpin_user_page(bv[i].bv_page);
103+
}
104+
}
105+
106+
static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
107+
{
108+
unsigned short nr_vecs = bip->bip_max_vcnt - 1;
109+
struct bio_vec *copy = &bip->bip_vec[1];
110+
size_t bytes = bip->bip_iter.bi_size;
111+
struct iov_iter iter;
112+
int ret;
113+
114+
iov_iter_bvec(&iter, ITER_DEST, copy, nr_vecs, bytes);
115+
ret = copy_to_iter(bvec_virt(bip->bip_vec), bytes, &iter);
116+
WARN_ON_ONCE(ret != bytes);
117+
118+
bio_integrity_unpin_bvec(copy, nr_vecs, true);
119+
}
120+
121+
static void bio_integrity_unmap_user(struct bio_integrity_payload *bip)
122+
{
123+
bool dirty = bio_data_dir(bip->bip_bio) == READ;
124+
125+
if (bip->bip_flags & BIP_COPY_USER) {
126+
if (dirty)
127+
bio_integrity_uncopy_user(bip);
128+
kfree(bvec_virt(bip->bip_vec));
129+
return;
130+
}
131+
132+
bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt, dirty);
133+
}
134+
94135
/**
95136
* bio_integrity_free - Free bio integrity payload
96137
* @bio: bio containing bip to be freed
@@ -105,6 +146,8 @@ void bio_integrity_free(struct bio *bio)
105146

106147
if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
107148
kfree(bvec_virt(bip->bip_vec));
149+
else if (bip->bip_flags & BIP_INTEGRITY_USER)
150+
bio_integrity_unmap_user(bip);
108151

109152
__bio_integrity_free(bs, bip);
110153
bio->bi_integrity = NULL;
@@ -160,6 +203,177 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
160203
}
161204
EXPORT_SYMBOL(bio_integrity_add_page);
162205

206+
static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
207+
int nr_vecs, unsigned int len,
208+
unsigned int direction, u32 seed)
209+
{
210+
bool write = direction == ITER_SOURCE;
211+
struct bio_integrity_payload *bip;
212+
struct iov_iter iter;
213+
void *buf;
214+
int ret;
215+
216+
buf = kmalloc(len, GFP_KERNEL);
217+
if (!buf)
218+
return -ENOMEM;
219+
220+
if (write) {
221+
iov_iter_bvec(&iter, direction, bvec, nr_vecs, len);
222+
if (!copy_from_iter_full(buf, len, &iter)) {
223+
ret = -EFAULT;
224+
goto free_buf;
225+
}
226+
227+
bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
228+
} else {
229+
memset(buf, 0, len);
230+
231+
/*
232+
* We need to preserve the original bvec and the number of vecs
233+
* in it for completion handling
234+
*/
235+
bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs + 1);
236+
}
237+
238+
if (IS_ERR(bip)) {
239+
ret = PTR_ERR(bip);
240+
goto free_buf;
241+
}
242+
243+
if (write)
244+
bio_integrity_unpin_bvec(bvec, nr_vecs, false);
245+
else
246+
memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec));
247+
248+
ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
249+
offset_in_page(buf));
250+
if (ret != len) {
251+
ret = -ENOMEM;
252+
goto free_bip;
253+
}
254+
255+
bip->bip_flags |= BIP_INTEGRITY_USER | BIP_COPY_USER;
256+
bip->bip_iter.bi_sector = seed;
257+
return 0;
258+
free_bip:
259+
bio_integrity_free(bio);
260+
free_buf:
261+
kfree(buf);
262+
return ret;
263+
}
264+
265+
static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec,
266+
int nr_vecs, unsigned int len, u32 seed)
267+
{
268+
struct bio_integrity_payload *bip;
269+
270+
bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs);
271+
if (IS_ERR(bip))
272+
return PTR_ERR(bip);
273+
274+
memcpy(bip->bip_vec, bvec, nr_vecs * sizeof(*bvec));
275+
bip->bip_flags |= BIP_INTEGRITY_USER;
276+
bip->bip_iter.bi_sector = seed;
277+
bip->bip_iter.bi_size = len;
278+
return 0;
279+
}
280+
281+
static unsigned int bvec_from_pages(struct bio_vec *bvec, struct page **pages,
282+
int nr_vecs, ssize_t bytes, ssize_t offset)
283+
{
284+
unsigned int nr_bvecs = 0;
285+
int i, j;
286+
287+
for (i = 0; i < nr_vecs; i = j) {
288+
size_t size = min_t(size_t, bytes, PAGE_SIZE - offset);
289+
struct folio *folio = page_folio(pages[i]);
290+
291+
bytes -= size;
292+
for (j = i + 1; j < nr_vecs; j++) {
293+
size_t next = min_t(size_t, PAGE_SIZE, bytes);
294+
295+
if (page_folio(pages[j]) != folio ||
296+
pages[j] != pages[j - 1] + 1)
297+
break;
298+
unpin_user_page(pages[j]);
299+
size += next;
300+
bytes -= next;
301+
}
302+
303+
bvec_set_page(&bvec[nr_bvecs], pages[i], size, offset);
304+
offset = 0;
305+
nr_bvecs++;
306+
}
307+
308+
return nr_bvecs;
309+
}
310+
311+
int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
312+
u32 seed)
313+
{
314+
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
315+
unsigned int align = q->dma_pad_mask | queue_dma_alignment(q);
316+
struct page *stack_pages[UIO_FASTIOV], **pages = stack_pages;
317+
struct bio_vec stack_vec[UIO_FASTIOV], *bvec = stack_vec;
318+
unsigned int direction, nr_bvecs;
319+
struct iov_iter iter;
320+
int ret, nr_vecs;
321+
size_t offset;
322+
bool copy;
323+
324+
if (bio_integrity(bio))
325+
return -EINVAL;
326+
if (bytes >> SECTOR_SHIFT > queue_max_hw_sectors(q))
327+
return -E2BIG;
328+
329+
if (bio_data_dir(bio) == READ)
330+
direction = ITER_DEST;
331+
else
332+
direction = ITER_SOURCE;
333+
334+
iov_iter_ubuf(&iter, direction, ubuf, bytes);
335+
nr_vecs = iov_iter_npages(&iter, BIO_MAX_VECS + 1);
336+
if (nr_vecs > BIO_MAX_VECS)
337+
return -E2BIG;
338+
if (nr_vecs > UIO_FASTIOV) {
339+
bvec = kcalloc(sizeof(*bvec), nr_vecs, GFP_KERNEL);
340+
if (!bvec)
341+
return -ENOMEM;
342+
pages = NULL;
343+
}
344+
345+
copy = !iov_iter_is_aligned(&iter, align, align);
346+
ret = iov_iter_extract_pages(&iter, &pages, bytes, nr_vecs, 0, &offset);
347+
if (unlikely(ret < 0))
348+
goto free_bvec;
349+
350+
nr_bvecs = bvec_from_pages(bvec, pages, nr_vecs, bytes, offset);
351+
if (pages != stack_pages)
352+
kvfree(pages);
353+
if (nr_bvecs > queue_max_integrity_segments(q))
354+
copy = true;
355+
356+
if (copy)
357+
ret = bio_integrity_copy_user(bio, bvec, nr_bvecs, bytes,
358+
direction, seed);
359+
else
360+
ret = bio_integrity_init_user(bio, bvec, nr_bvecs, bytes, seed);
361+
if (ret)
362+
goto release_pages;
363+
if (bvec != stack_vec)
364+
kfree(bvec);
365+
366+
return 0;
367+
368+
release_pages:
369+
bio_integrity_unpin_bvec(bvec, nr_bvecs, false);
370+
free_bvec:
371+
if (bvec != stack_vec)
372+
kfree(bvec);
373+
return ret;
374+
}
375+
EXPORT_SYMBOL_GPL(bio_integrity_map_user);
376+
163377
/**
164378
* bio_integrity_process - Process integrity metadata for a bio
165379
* @bio: bio to generate/verify integrity metadata for

include/linux/bio.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,8 @@ enum bip_flags {
324324
BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */
325325
BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */
326326
BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */
327+
BIP_INTEGRITY_USER = 1 << 5, /* Integrity payload is user address */
328+
BIP_COPY_USER = 1 << 6, /* Kernel bounce buffer in use */
327329
};
328330

329331
/*
@@ -718,6 +720,7 @@ static inline bool bioset_initialized(struct bio_set *bs)
718720
for_each_bio(_bio) \
719721
bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
720722

723+
int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed);
721724
extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
722725
extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
723726
extern bool bio_integrity_prep(struct bio *);
@@ -789,6 +792,12 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
789792
return 0;
790793
}
791794

795+
static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
796+
ssize_t len, u32 seed)
797+
{
798+
return -EINVAL;
799+
}
800+
792801
#endif /* CONFIG_BLK_DEV_INTEGRITY */
793802

794803
/*

0 commit comments

Comments
 (0)