Skip to content

Commit 0abdd7a

Browse files
djbwtorvalds
authored andcommitted
dma-debug: introduce debug_dma_assert_idle()
Record actively mapped pages and provide an api for asserting a given page is dma inactive before execution proceeds. Placing debug_dma_assert_idle() in cow_user_page() flagged the violation of the dma-api in the NET_DMA implementation (see commit 7787380 "net_dma: mark broken"). The implementation includes the capability to count, in a limited way, repeat mappings of the same page that occur without an intervening unmap. This 'overlap' counter is limited to the few bits of tag space in a radix tree. This mechanism is added to mitigate false negative cases where, for example, a page is dma mapped twice and debug_dma_assert_idle() is called after the page is un-mapped once. Signed-off-by: Dan Williams <[email protected]> Cc: Joerg Roedel <[email protected]> Cc: Vinod Koul <[email protected]> Cc: Russell King <[email protected]> Cc: James Bottomley <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 03d11a0 commit 0abdd7a

File tree

4 files changed

+199
-15
lines changed

4 files changed

+199
-15
lines changed

include/linux/dma-debug.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ extern void debug_dma_sync_sg_for_device(struct device *dev,
8585

8686
extern void debug_dma_dump_mappings(struct device *dev);
8787

88+
extern void debug_dma_assert_idle(struct page *page);
89+
8890
#else /* CONFIG_DMA_API_DEBUG */
8991

9092
static inline void dma_debug_add_bus(struct bus_type *bus)
@@ -183,6 +185,10 @@ static inline void debug_dma_dump_mappings(struct device *dev)
183185
{
184186
}
185187

188+
static inline void debug_dma_assert_idle(struct page *page)
189+
{
190+
}
191+
186192
#endif /* CONFIG_DMA_API_DEBUG */
187193

188194
#endif /* __DMA_DEBUG_H */

lib/Kconfig.debug

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1584,8 +1584,16 @@ config DMA_API_DEBUG
15841584
With this option you will be able to detect common bugs in device
15851585
drivers like double-freeing of DMA mappings or freeing mappings that
15861586
were never allocated.
1587-
This option causes a performance degredation. Use only if you want
1588-
to debug device drivers. If unsure, say N.
1587+
1588+
This also attempts to catch cases where a page owned by DMA is
1589+
accessed by the cpu in a way that could cause data corruption. For
1590+
example, this enables cow_user_page() to check that the source page is
1591+
not undergoing DMA.
1592+
1593+
This option causes a performance degradation. Use only if you want to
1594+
debug device drivers and dma interactions.
1595+
1596+
If unsure, say N.
15891597

15901598
source "samples/Kconfig"
15911599

lib/dma-debug.c

Lines changed: 180 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,26 @@ enum map_err_types {
5353

5454
#define DMA_DEBUG_STACKTRACE_ENTRIES 5
5555

56+
/**
57+
* struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping
58+
* @list: node on pre-allocated free_entries list
59+
* @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent
60+
* @type: single, page, sg, coherent
61+
* @pfn: page frame of the start address
62+
* @offset: offset of mapping relative to pfn
63+
* @size: length of the mapping
64+
* @direction: enum dma_data_direction
65+
* @sg_call_ents: 'nents' from dma_map_sg
66+
* @sg_mapped_ents: 'mapped_ents' from dma_map_sg
67+
* @map_err_type: track whether dma_mapping_error() was checked
68+
* @stacktrace: support backtraces when a violation is detected
69+
*/
5670
struct dma_debug_entry {
5771
struct list_head list;
5872
struct device *dev;
5973
int type;
60-
phys_addr_t paddr;
74+
unsigned long pfn;
75+
size_t offset;
6176
u64 dev_addr;
6277
u64 size;
6378
int direction;
@@ -372,6 +387,11 @@ static void hash_bucket_del(struct dma_debug_entry *entry)
372387
list_del(&entry->list);
373388
}
374389

390+
static unsigned long long phys_addr(struct dma_debug_entry *entry)
391+
{
392+
return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset;
393+
}
394+
375395
/*
376396
* Dump mapping entries for debugging purposes
377397
*/
@@ -389,9 +409,9 @@ void debug_dma_dump_mappings(struct device *dev)
389409
list_for_each_entry(entry, &bucket->list, list) {
390410
if (!dev || dev == entry->dev) {
391411
dev_info(entry->dev,
392-
"%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n",
412+
"%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n",
393413
type2name[entry->type], idx,
394-
(unsigned long long)entry->paddr,
414+
phys_addr(entry), entry->pfn,
395415
entry->dev_addr, entry->size,
396416
dir2name[entry->direction],
397417
maperr2str[entry->map_err_type]);
@@ -403,6 +423,133 @@ void debug_dma_dump_mappings(struct device *dev)
403423
}
404424
EXPORT_SYMBOL(debug_dma_dump_mappings);
405425

426+
/*
427+
* For each page mapped (initial page in the case of
428+
* dma_alloc_coherent/dma_map_{single|page}, or each page in a
429+
* scatterlist) insert into this tree using the pfn as the key. At
430+
* dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If
431+
* the pfn already exists at insertion time add a tag as a reference
432+
* count for the overlapping mappings. For now, the overlap tracking
433+
* just ensures that 'unmaps' balance 'maps' before marking the pfn
434+
* idle, but we should also be flagging overlaps as an API violation.
435+
*
436+
* Memory usage is mostly constrained by the maximum number of available
437+
* dma-debug entries in that we need a free dma_debug_entry before
438+
* inserting into the tree. In the case of dma_map_{single|page} and
439+
* dma_alloc_coherent there is only one dma_debug_entry and one pfn to
440+
* track per event. dma_map_sg(), on the other hand,
441+
* consumes a single dma_debug_entry, but inserts 'nents' entries into
442+
* the tree.
443+
*
444+
* At any time debug_dma_assert_idle() can be called to trigger a
445+
* warning if the given page is in the active set.
446+
*/
447+
static RADIX_TREE(dma_active_pfn, GFP_NOWAIT);
448+
static DEFINE_SPINLOCK(radix_lock);
449+
#define ACTIVE_PFN_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
450+
451+
static int active_pfn_read_overlap(unsigned long pfn)
452+
{
453+
int overlap = 0, i;
454+
455+
for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
456+
if (radix_tree_tag_get(&dma_active_pfn, pfn, i))
457+
overlap |= 1 << i;
458+
return overlap;
459+
}
460+
461+
static int active_pfn_set_overlap(unsigned long pfn, int overlap)
462+
{
463+
int i;
464+
465+
if (overlap > ACTIVE_PFN_MAX_OVERLAP || overlap < 0)
466+
return 0;
467+
468+
for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
469+
if (overlap & 1 << i)
470+
radix_tree_tag_set(&dma_active_pfn, pfn, i);
471+
else
472+
radix_tree_tag_clear(&dma_active_pfn, pfn, i);
473+
474+
return overlap;
475+
}
476+
477+
static void active_pfn_inc_overlap(unsigned long pfn)
478+
{
479+
int overlap = active_pfn_read_overlap(pfn);
480+
481+
overlap = active_pfn_set_overlap(pfn, ++overlap);
482+
483+
/* If we overflowed the overlap counter then we're potentially
484+
* leaking dma-mappings. Otherwise, if maps and unmaps are
485+
* balanced then this overflow may cause false negatives in
486+
* debug_dma_assert_idle() as the pfn may be marked idle
487+
* prematurely.
488+
*/
489+
WARN_ONCE(overlap == 0,
490+
"DMA-API: exceeded %d overlapping mappings of pfn %lx\n",
491+
ACTIVE_PFN_MAX_OVERLAP, pfn);
492+
}
493+
494+
static int active_pfn_dec_overlap(unsigned long pfn)
495+
{
496+
int overlap = active_pfn_read_overlap(pfn);
497+
498+
return active_pfn_set_overlap(pfn, --overlap);
499+
}
500+
501+
static int active_pfn_insert(struct dma_debug_entry *entry)
502+
{
503+
unsigned long flags;
504+
int rc;
505+
506+
spin_lock_irqsave(&radix_lock, flags);
507+
rc = radix_tree_insert(&dma_active_pfn, entry->pfn, entry);
508+
if (rc == -EEXIST)
509+
active_pfn_inc_overlap(entry->pfn);
510+
spin_unlock_irqrestore(&radix_lock, flags);
511+
512+
return rc;
513+
}
514+
515+
static void active_pfn_remove(struct dma_debug_entry *entry)
516+
{
517+
unsigned long flags;
518+
519+
spin_lock_irqsave(&radix_lock, flags);
520+
if (active_pfn_dec_overlap(entry->pfn) == 0)
521+
radix_tree_delete(&dma_active_pfn, entry->pfn);
522+
spin_unlock_irqrestore(&radix_lock, flags);
523+
}
524+
525+
/**
526+
* debug_dma_assert_idle() - assert that a page is not undergoing dma
527+
* @page: page to lookup in the dma_active_pfn tree
528+
*
529+
* Place a call to this routine in cases where the cpu touching the page
530+
* before the dma completes (page is dma_unmapped) will lead to data
531+
* corruption.
532+
*/
533+
void debug_dma_assert_idle(struct page *page)
534+
{
535+
unsigned long flags;
536+
struct dma_debug_entry *entry;
537+
538+
if (!page)
539+
return;
540+
541+
spin_lock_irqsave(&radix_lock, flags);
542+
entry = radix_tree_lookup(&dma_active_pfn, page_to_pfn(page));
543+
spin_unlock_irqrestore(&radix_lock, flags);
544+
545+
if (!entry)
546+
return;
547+
548+
err_printk(entry->dev, entry,
549+
"DMA-API: cpu touching an active dma mapped page "
550+
"[pfn=0x%lx]\n", entry->pfn);
551+
}
552+
406553
/*
407554
* Wrapper function for adding an entry to the hash.
408555
* This function takes care of locking itself.
@@ -411,10 +558,21 @@ static void add_dma_entry(struct dma_debug_entry *entry)
411558
{
412559
struct hash_bucket *bucket;
413560
unsigned long flags;
561+
int rc;
414562

415563
bucket = get_hash_bucket(entry, &flags);
416564
hash_bucket_add(bucket, entry);
417565
put_hash_bucket(bucket, &flags);
566+
567+
rc = active_pfn_insert(entry);
568+
if (rc == -ENOMEM) {
569+
pr_err("DMA-API: pfn tracking ENOMEM, dma-debug disabled\n");
570+
global_disable = true;
571+
}
572+
573+
/* TODO: report -EEXIST errors here as overlapping mappings are
574+
* not supported by the DMA API
575+
*/
418576
}
419577

420578
static struct dma_debug_entry *__dma_entry_alloc(void)
@@ -469,6 +627,8 @@ static void dma_entry_free(struct dma_debug_entry *entry)
469627
{
470628
unsigned long flags;
471629

630+
active_pfn_remove(entry);
631+
472632
/*
473633
* add to beginning of the list - this way the entries are
474634
* more likely cache hot when they are reallocated.
@@ -895,15 +1055,15 @@ static void check_unmap(struct dma_debug_entry *ref)
8951055
ref->dev_addr, ref->size,
8961056
type2name[entry->type], type2name[ref->type]);
8971057
} else if ((entry->type == dma_debug_coherent) &&
898-
(ref->paddr != entry->paddr)) {
1058+
(phys_addr(ref) != phys_addr(entry))) {
8991059
err_printk(ref->dev, entry, "DMA-API: device driver frees "
9001060
"DMA memory with different CPU address "
9011061
"[device address=0x%016llx] [size=%llu bytes] "
9021062
"[cpu alloc address=0x%016llx] "
9031063
"[cpu free address=0x%016llx]",
9041064
ref->dev_addr, ref->size,
905-
(unsigned long long)entry->paddr,
906-
(unsigned long long)ref->paddr);
1065+
phys_addr(entry),
1066+
phys_addr(ref));
9071067
}
9081068

9091069
if (ref->sg_call_ents && ref->type == dma_debug_sg &&
@@ -1052,7 +1212,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
10521212

10531213
entry->dev = dev;
10541214
entry->type = dma_debug_page;
1055-
entry->paddr = page_to_phys(page) + offset;
1215+
entry->pfn = page_to_pfn(page);
1216+
entry->offset = offset,
10561217
entry->dev_addr = dma_addr;
10571218
entry->size = size;
10581219
entry->direction = direction;
@@ -1148,7 +1309,8 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
11481309

11491310
entry->type = dma_debug_sg;
11501311
entry->dev = dev;
1151-
entry->paddr = sg_phys(s);
1312+
entry->pfn = page_to_pfn(sg_page(s));
1313+
entry->offset = s->offset,
11521314
entry->size = sg_dma_len(s);
11531315
entry->dev_addr = sg_dma_address(s);
11541316
entry->direction = direction;
@@ -1198,7 +1360,8 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
11981360
struct dma_debug_entry ref = {
11991361
.type = dma_debug_sg,
12001362
.dev = dev,
1201-
.paddr = sg_phys(s),
1363+
.pfn = page_to_pfn(sg_page(s)),
1364+
.offset = s->offset,
12021365
.dev_addr = sg_dma_address(s),
12031366
.size = sg_dma_len(s),
12041367
.direction = dir,
@@ -1233,7 +1396,8 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size,
12331396

12341397
entry->type = dma_debug_coherent;
12351398
entry->dev = dev;
1236-
entry->paddr = virt_to_phys(virt);
1399+
entry->pfn = page_to_pfn(virt_to_page(virt));
1400+
entry->offset = (size_t) virt & PAGE_MASK;
12371401
entry->size = size;
12381402
entry->dev_addr = dma_addr;
12391403
entry->direction = DMA_BIDIRECTIONAL;
@@ -1248,7 +1412,8 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
12481412
struct dma_debug_entry ref = {
12491413
.type = dma_debug_coherent,
12501414
.dev = dev,
1251-
.paddr = virt_to_phys(virt),
1415+
.pfn = page_to_pfn(virt_to_page(virt)),
1416+
.offset = (size_t) virt & PAGE_MASK,
12521417
.dev_addr = addr,
12531418
.size = size,
12541419
.direction = DMA_BIDIRECTIONAL,
@@ -1356,7 +1521,8 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
13561521
struct dma_debug_entry ref = {
13571522
.type = dma_debug_sg,
13581523
.dev = dev,
1359-
.paddr = sg_phys(s),
1524+
.pfn = page_to_pfn(sg_page(s)),
1525+
.offset = s->offset,
13601526
.dev_addr = sg_dma_address(s),
13611527
.size = sg_dma_len(s),
13621528
.direction = direction,
@@ -1388,7 +1554,8 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
13881554
struct dma_debug_entry ref = {
13891555
.type = dma_debug_sg,
13901556
.dev = dev,
1391-
.paddr = sg_phys(s),
1557+
.pfn = page_to_pfn(sg_page(s)),
1558+
.offset = s->offset,
13921559
.dev_addr = sg_dma_address(s),
13931560
.size = sg_dma_len(s),
13941561
.direction = direction,

mm/memory.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
#include <linux/gfp.h>
6060
#include <linux/migrate.h>
6161
#include <linux/string.h>
62+
#include <linux/dma-debug.h>
6263

6364
#include <asm/io.h>
6465
#include <asm/pgalloc.h>
@@ -2559,6 +2560,8 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
25592560

25602561
static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
25612562
{
2563+
debug_dma_assert_idle(src);
2564+
25622565
/*
25632566
* If the source page was a PFN mapping, we don't have
25642567
* a "struct page" for it. We do a best-effort copy by

0 commit comments

Comments
 (0)