Skip to content

Commit 30d621f

Browse files
Steven Pricetorvalds
authored andcommitted
mm: add generic ptdump
Add a generic version of page table dumping that architectures can opt-in to. Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Steven Price <[email protected]> Cc: Albert Ou <[email protected]> Cc: Alexandre Ghiti <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Ard Biesheuvel <[email protected]> Cc: Arnd Bergmann <[email protected]> Cc: Benjamin Herrenschmidt <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Christian Borntraeger <[email protected]> Cc: Dave Hansen <[email protected]> Cc: David S. Miller <[email protected]> Cc: Heiko Carstens <[email protected]> Cc: "H. Peter Anvin" <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: James Hogan <[email protected]> Cc: James Morse <[email protected]> Cc: Jerome Glisse <[email protected]> Cc: "Liang, Kan" <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Michael Ellerman <[email protected]> Cc: Paul Burton <[email protected]> Cc: Paul Mackerras <[email protected]> Cc: Paul Walmsley <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Ralf Baechle <[email protected]> Cc: Russell King <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vasily Gorbik <[email protected]> Cc: Vineet Gupta <[email protected]> Cc: Will Deacon <[email protected]> Cc: Zong Li <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent c5cfae1 commit 30d621f

File tree

4 files changed

+182
-0
lines changed

4 files changed

+182
-0
lines changed

include/linux/ptdump.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
3+
#ifndef _LINUX_PTDUMP_H
4+
#define _LINUX_PTDUMP_H
5+
6+
#include <linux/mm_types.h>
7+
8+
struct ptdump_range {
9+
unsigned long start;
10+
unsigned long end;
11+
};
12+
13+
struct ptdump_state {
14+
void (*note_page)(struct ptdump_state *st, unsigned long addr,
15+
int level, unsigned long val);
16+
const struct ptdump_range *range;
17+
};
18+
19+
void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm);
20+
21+
#endif /* _LINUX_PTDUMP_H */

mm/Kconfig.debug

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,24 @@ config DEBUG_RODATA_TEST
117117
depends on STRICT_KERNEL_RWX
118118
---help---
119119
This option enables a testcase for the setting rodata read-only.
120+
121+
config GENERIC_PTDUMP
122+
bool
123+
124+
config PTDUMP_CORE
125+
bool
126+
127+
config PTDUMP_DEBUGFS
128+
bool "Export kernel pagetable layout to userspace via debugfs"
129+
depends on DEBUG_KERNEL
130+
depends on DEBUG_FS
131+
depends on GENERIC_PTDUMP
132+
select PTDUMP_CORE
133+
help
134+
Say Y here if you want to show the kernel pagetable layout in a
135+
debugfs file. This information is only useful for kernel developers
136+
who are working in architecture specific areas of the kernel.
137+
It is probably not a good idea to enable this feature in a production
138+
kernel.
139+
140+
If in doubt, say N.

mm/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,4 @@ obj-$(CONFIG_ZONE_DEVICE) += memremap.o
109109
obj-$(CONFIG_HMM_MIRROR) += hmm.o
110110
obj-$(CONFIG_MEMFD_CREATE) += memfd.o
111111
obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
112+
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o

mm/ptdump.c

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include <linux/pagewalk.h>
4+
#include <linux/ptdump.h>
5+
#include <linux/kasan.h>
6+
7+
#ifdef CONFIG_KASAN
8+
/*
9+
* This is an optimization for KASAN=y case. Since all kasan page tables
10+
* eventually point to the kasan_early_shadow_page we could call note_page()
11+
* right away without walking through lower level page tables. This saves
12+
* us dozens of seconds (minutes for 5-level config) while checking for
13+
* W+X mapping or reading kernel_page_tables debugfs file.
14+
*/
15+
static inline int note_kasan_page_table(struct mm_walk *walk,
16+
unsigned long addr)
17+
{
18+
struct ptdump_state *st = walk->private;
19+
20+
st->note_page(st, addr, 5, pte_val(kasan_early_shadow_pte[0]));
21+
22+
walk->action = ACTION_CONTINUE;
23+
24+
return 0;
25+
}
26+
#endif
27+
28+
static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr,
29+
unsigned long next, struct mm_walk *walk)
30+
{
31+
struct ptdump_state *st = walk->private;
32+
pgd_t val = READ_ONCE(*pgd);
33+
34+
#if CONFIG_PGTABLE_LEVELS > 4 && defined(CONFIG_KASAN)
35+
if (pgd_page(val) == virt_to_page(lm_alias(kasan_early_shadow_p4d)))
36+
return note_kasan_page_table(walk, addr);
37+
#endif
38+
39+
if (pgd_leaf(val))
40+
st->note_page(st, addr, 1, pgd_val(val));
41+
42+
return 0;
43+
}
44+
45+
static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr,
46+
unsigned long next, struct mm_walk *walk)
47+
{
48+
struct ptdump_state *st = walk->private;
49+
p4d_t val = READ_ONCE(*p4d);
50+
51+
#if CONFIG_PGTABLE_LEVELS > 3 && defined(CONFIG_KASAN)
52+
if (p4d_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pud)))
53+
return note_kasan_page_table(walk, addr);
54+
#endif
55+
56+
if (p4d_leaf(val))
57+
st->note_page(st, addr, 2, p4d_val(val));
58+
59+
return 0;
60+
}
61+
62+
static int ptdump_pud_entry(pud_t *pud, unsigned long addr,
63+
unsigned long next, struct mm_walk *walk)
64+
{
65+
struct ptdump_state *st = walk->private;
66+
pud_t val = READ_ONCE(*pud);
67+
68+
#if CONFIG_PGTABLE_LEVELS > 2 && defined(CONFIG_KASAN)
69+
if (pud_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pmd)))
70+
return note_kasan_page_table(walk, addr);
71+
#endif
72+
73+
if (pud_leaf(val))
74+
st->note_page(st, addr, 3, pud_val(val));
75+
76+
return 0;
77+
}
78+
79+
static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr,
80+
unsigned long next, struct mm_walk *walk)
81+
{
82+
struct ptdump_state *st = walk->private;
83+
pmd_t val = READ_ONCE(*pmd);
84+
85+
#if defined(CONFIG_KASAN)
86+
if (pmd_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pte)))
87+
return note_kasan_page_table(walk, addr);
88+
#endif
89+
90+
if (pmd_leaf(val))
91+
st->note_page(st, addr, 4, pmd_val(val));
92+
93+
return 0;
94+
}
95+
96+
static int ptdump_pte_entry(pte_t *pte, unsigned long addr,
97+
unsigned long next, struct mm_walk *walk)
98+
{
99+
struct ptdump_state *st = walk->private;
100+
101+
st->note_page(st, addr, 5, pte_val(READ_ONCE(*pte)));
102+
103+
return 0;
104+
}
105+
106+
static int ptdump_hole(unsigned long addr, unsigned long next,
107+
int depth, struct mm_walk *walk)
108+
{
109+
struct ptdump_state *st = walk->private;
110+
111+
st->note_page(st, addr, depth + 1, 0);
112+
113+
return 0;
114+
}
115+
116+
static const struct mm_walk_ops ptdump_ops = {
117+
.pgd_entry = ptdump_pgd_entry,
118+
.p4d_entry = ptdump_p4d_entry,
119+
.pud_entry = ptdump_pud_entry,
120+
.pmd_entry = ptdump_pmd_entry,
121+
.pte_entry = ptdump_pte_entry,
122+
.pte_hole = ptdump_hole,
123+
};
124+
125+
void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm)
126+
{
127+
const struct ptdump_range *range = st->range;
128+
129+
down_read(&mm->mmap_sem);
130+
while (range->start != range->end) {
131+
walk_page_range_novma(mm, range->start, range->end,
132+
&ptdump_ops, st);
133+
range++;
134+
}
135+
up_read(&mm->mmap_sem);
136+
137+
/* Flush out the last page */
138+
st->note_page(st, 0, 0, 0);
139+
}

0 commit comments

Comments
 (0)