Skip to content

Commit f2a4165

Browse files
committed
Merge branch 'tracing/mmiotrace' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'tracing/mmiotrace' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86 mmiotrace: fix race with release_kmmio_fault_page() x86 mmiotrace: improve handling of secondary faults x86 mmiotrace: split set_page_presence() x86 mmiotrace: fix save/restore page table state x86 mmiotrace: WARN_ONCE if dis/arming a page fails x86: add far read test to testmmiotrace x86: count errors in testmmiotrace.ko
2 parents b24746c + 340430c commit f2a4165

File tree

2 files changed

+153
-66
lines changed

2 files changed

+153
-66
lines changed

arch/x86/mm/kmmio.c

Lines changed: 96 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,14 @@ struct kmmio_fault_page {
3232
struct list_head list;
3333
struct kmmio_fault_page *release_next;
3434
unsigned long page; /* location of the fault page */
35+
bool old_presence; /* page presence prior to arming */
36+
bool armed;
3537

3638
/*
3739
* Number of times this page has been registered as a part
3840
* of a probe. If zero, page is disarmed and this may be freed.
39-
* Used only by writers (RCU).
41+
* Used only by writers (RCU) and post_kmmio_handler().
42+
* Protected by kmmio_lock, when linked into kmmio_page_table.
4043
*/
4144
int count;
4245
};
@@ -105,57 +108,85 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
105108
return NULL;
106109
}
107110

108-
static void set_page_present(unsigned long addr, bool present,
109-
unsigned int *pglevel)
111+
static void set_pmd_presence(pmd_t *pmd, bool present, bool *old)
112+
{
113+
pmdval_t v = pmd_val(*pmd);
114+
*old = !!(v & _PAGE_PRESENT);
115+
v &= ~_PAGE_PRESENT;
116+
if (present)
117+
v |= _PAGE_PRESENT;
118+
set_pmd(pmd, __pmd(v));
119+
}
120+
121+
static void set_pte_presence(pte_t *pte, bool present, bool *old)
122+
{
123+
pteval_t v = pte_val(*pte);
124+
*old = !!(v & _PAGE_PRESENT);
125+
v &= ~_PAGE_PRESENT;
126+
if (present)
127+
v |= _PAGE_PRESENT;
128+
set_pte_atomic(pte, __pte(v));
129+
}
130+
131+
static int set_page_presence(unsigned long addr, bool present, bool *old)
110132
{
111-
pteval_t pteval;
112-
pmdval_t pmdval;
113133
unsigned int level;
114-
pmd_t *pmd;
115134
pte_t *pte = lookup_address(addr, &level);
116135

117136
if (!pte) {
118137
pr_err("kmmio: no pte for page 0x%08lx\n", addr);
119-
return;
138+
return -1;
120139
}
121140

122-
if (pglevel)
123-
*pglevel = level;
124-
125141
switch (level) {
126142
case PG_LEVEL_2M:
127-
pmd = (pmd_t *)pte;
128-
pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT;
129-
if (present)
130-
pmdval |= _PAGE_PRESENT;
131-
set_pmd(pmd, __pmd(pmdval));
143+
set_pmd_presence((pmd_t *)pte, present, old);
132144
break;
133-
134145
case PG_LEVEL_4K:
135-
pteval = pte_val(*pte) & ~_PAGE_PRESENT;
136-
if (present)
137-
pteval |= _PAGE_PRESENT;
138-
set_pte_atomic(pte, __pte(pteval));
146+
set_pte_presence(pte, present, old);
139147
break;
140-
141148
default:
142149
pr_err("kmmio: unexpected page level 0x%x.\n", level);
143-
return;
150+
return -1;
144151
}
145152

146153
__flush_tlb_one(addr);
154+
return 0;
147155
}
148156

149-
/** Mark the given page as not present. Access to it will trigger a fault. */
150-
static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
157+
/*
158+
* Mark the given page as not present. Access to it will trigger a fault.
159+
*
160+
* Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the
161+
* protection is ignored here. RCU read lock is assumed held, so the struct
162+
* will not disappear unexpectedly. Furthermore, the caller must guarantee,
163+
* that double arming the same virtual address (page) cannot occur.
164+
*
165+
* Double disarming on the other hand is allowed, and may occur when a fault
166+
* and mmiotrace shutdown happen simultaneously.
167+
*/
168+
static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
151169
{
152-
set_page_present(page & PAGE_MASK, false, pglevel);
170+
int ret;
171+
WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
172+
if (f->armed) {
173+
pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
174+
f->page, f->count, f->old_presence);
175+
}
176+
ret = set_page_presence(f->page, false, &f->old_presence);
177+
WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
178+
f->armed = true;
179+
return ret;
153180
}
154181

155-
/** Mark the given page as present. */
156-
static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
182+
/** Restore the given page to saved presence state. */
183+
static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
157184
{
158-
set_page_present(page & PAGE_MASK, true, pglevel);
185+
bool tmp;
186+
int ret = set_page_presence(f->page, f->old_presence, &tmp);
187+
WARN_ONCE(ret < 0,
188+
KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
189+
f->armed = false;
159190
}
160191

161192
/*
@@ -202,28 +233,32 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
202233

203234
ctx = &get_cpu_var(kmmio_ctx);
204235
if (ctx->active) {
205-
disarm_kmmio_fault_page(faultpage->page, NULL);
206236
if (addr == ctx->addr) {
207237
/*
208-
* On SMP we sometimes get recursive probe hits on the
209-
* same address. Context is already saved, fall out.
238+
* A second fault on the same page means some other
239+
* condition needs handling by do_page_fault(), the
240+
* page really not being present is the most common.
210241
*/
211-
pr_debug("kmmio: duplicate probe hit on CPU %d, for "
212-
"address 0x%08lx.\n",
213-
smp_processor_id(), addr);
214-
ret = 1;
215-
goto no_kmmio_ctx;
216-
}
217-
/*
218-
* Prevent overwriting already in-flight context.
219-
* This should not happen, let's hope disarming at least
220-
* prevents a panic.
221-
*/
222-
pr_emerg("kmmio: recursive probe hit on CPU %d, "
242+
pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n",
243+
addr, smp_processor_id());
244+
245+
if (!faultpage->old_presence)
246+
pr_info("kmmio: unexpected secondary hit for "
247+
"address 0x%08lx on CPU %d.\n", addr,
248+
smp_processor_id());
249+
} else {
250+
/*
251+
* Prevent overwriting already in-flight context.
252+
* This should not happen, let's hope disarming at
253+
* least prevents a panic.
254+
*/
255+
pr_emerg("kmmio: recursive probe hit on CPU %d, "
223256
"for address 0x%08lx. Ignoring.\n",
224257
smp_processor_id(), addr);
225-
pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
226-
ctx->addr);
258+
pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
259+
ctx->addr);
260+
disarm_kmmio_fault_page(faultpage);
261+
}
227262
goto no_kmmio_ctx;
228263
}
229264
ctx->active++;
@@ -244,7 +279,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
244279
regs->flags &= ~X86_EFLAGS_IF;
245280

246281
/* Now we set present bit in PTE and single step. */
247-
disarm_kmmio_fault_page(ctx->fpage->page, NULL);
282+
disarm_kmmio_fault_page(ctx->fpage);
248283

249284
/*
250285
* If another cpu accesses the same page while we are stepping,
@@ -275,15 +310,19 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
275310
struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
276311

277312
if (!ctx->active) {
278-
pr_debug("kmmio: spurious debug trap on CPU %d.\n",
313+
pr_warning("kmmio: spurious debug trap on CPU %d.\n",
279314
smp_processor_id());
280315
goto out;
281316
}
282317

283318
if (ctx->probe && ctx->probe->post_handler)
284319
ctx->probe->post_handler(ctx->probe, condition, regs);
285320

286-
arm_kmmio_fault_page(ctx->fpage->page, NULL);
321+
/* Prevent racing against release_kmmio_fault_page(). */
322+
spin_lock(&kmmio_lock);
323+
if (ctx->fpage->count)
324+
arm_kmmio_fault_page(ctx->fpage);
325+
spin_unlock(&kmmio_lock);
287326

288327
regs->flags &= ~X86_EFLAGS_TF;
289328
regs->flags |= ctx->saved_flags;
@@ -315,20 +354,24 @@ static int add_kmmio_fault_page(unsigned long page)
315354
f = get_kmmio_fault_page(page);
316355
if (f) {
317356
if (!f->count)
318-
arm_kmmio_fault_page(f->page, NULL);
357+
arm_kmmio_fault_page(f);
319358
f->count++;
320359
return 0;
321360
}
322361

323-
f = kmalloc(sizeof(*f), GFP_ATOMIC);
362+
f = kzalloc(sizeof(*f), GFP_ATOMIC);
324363
if (!f)
325364
return -1;
326365

327366
f->count = 1;
328367
f->page = page;
329-
list_add_rcu(&f->list, kmmio_page_list(f->page));
330368

331-
arm_kmmio_fault_page(f->page, NULL);
369+
if (arm_kmmio_fault_page(f)) {
370+
kfree(f);
371+
return -1;
372+
}
373+
374+
list_add_rcu(&f->list, kmmio_page_list(f->page));
332375

333376
return 0;
334377
}
@@ -347,7 +390,7 @@ static void release_kmmio_fault_page(unsigned long page,
347390
f->count--;
348391
BUG_ON(f->count < 0);
349392
if (!f->count) {
350-
disarm_kmmio_fault_page(f->page, NULL);
393+
disarm_kmmio_fault_page(f);
351394
f->release_next = *release_list;
352395
*release_list = f;
353396
}

arch/x86/mm/testmmiotrace.c

Lines changed: 57 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Written by Pekka Paalanen, 2008 <[email protected]>
2+
* Written by Pekka Paalanen, 2008-2009 <[email protected]>
33
*/
44
#include <linux/module.h>
55
#include <linux/io.h>
@@ -9,47 +9,90 @@
99

1010
static unsigned long mmio_address;
1111
module_param(mmio_address, ulong, 0);
12-
MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB.");
12+
MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB "
13+
"(or 8 MB if read_far is non-zero).");
14+
15+
static unsigned long read_far = 0x400100;
16+
module_param(read_far, ulong, 0);
17+
MODULE_PARM_DESC(read_far, " Offset of a 32-bit read within 8 MB "
18+
"(default: 0x400100).");
19+
20+
static unsigned v16(unsigned i)
21+
{
22+
return i * 12 + 7;
23+
}
24+
25+
static unsigned v32(unsigned i)
26+
{
27+
return i * 212371 + 13;
28+
}
1329

1430
static void do_write_test(void __iomem *p)
1531
{
1632
unsigned int i;
33+
pr_info(MODULE_NAME ": write test.\n");
1734
mmiotrace_printk("Write test.\n");
35+
1836
for (i = 0; i < 256; i++)
1937
iowrite8(i, p + i);
38+
2039
for (i = 1024; i < (5 * 1024); i += 2)
21-
iowrite16(i * 12 + 7, p + i);
40+
iowrite16(v16(i), p + i);
41+
2242
for (i = (5 * 1024); i < (16 * 1024); i += 4)
23-
iowrite32(i * 212371 + 13, p + i);
43+
iowrite32(v32(i), p + i);
2444
}
2545

2646
static void do_read_test(void __iomem *p)
2747
{
2848
unsigned int i;
49+
unsigned errs[3] = { 0 };
50+
pr_info(MODULE_NAME ": read test.\n");
2951
mmiotrace_printk("Read test.\n");
52+
3053
for (i = 0; i < 256; i++)
31-
ioread8(p + i);
54+
if (ioread8(p + i) != i)
55+
++errs[0];
56+
3257
for (i = 1024; i < (5 * 1024); i += 2)
33-
ioread16(p + i);
58+
if (ioread16(p + i) != v16(i))
59+
++errs[1];
60+
3461
for (i = (5 * 1024); i < (16 * 1024); i += 4)
35-
ioread32(p + i);
62+
if (ioread32(p + i) != v32(i))
63+
++errs[2];
64+
65+
mmiotrace_printk("Read errors: 8-bit %d, 16-bit %d, 32-bit %d.\n",
66+
errs[0], errs[1], errs[2]);
3667
}
3768

38-
static void do_test(void)
69+
static void do_read_far_test(void __iomem *p)
3970
{
40-
void __iomem *p = ioremap_nocache(mmio_address, 0x4000);
71+
pr_info(MODULE_NAME ": read far test.\n");
72+
mmiotrace_printk("Read far test.\n");
73+
74+
ioread32(p + read_far);
75+
}
76+
77+
static void do_test(unsigned long size)
78+
{
79+
void __iomem *p = ioremap_nocache(mmio_address, size);
4180
if (!p) {
4281
pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
4382
return;
4483
}
4584
mmiotrace_printk("ioremap returned %p.\n", p);
4685
do_write_test(p);
4786
do_read_test(p);
87+
if (read_far && read_far < size - 4)
88+
do_read_far_test(p);
4889
iounmap(p);
4990
}
5091

5192
static int __init init(void)
5293
{
94+
unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
95+
5396
if (mmio_address == 0) {
5497
pr_err(MODULE_NAME ": you have to use the module argument "
5598
"mmio_address.\n");
@@ -58,10 +101,11 @@ static int __init init(void)
58101
return -ENXIO;
59102
}
60103

61-
pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
62-
"in PCI address space, and writing "
63-
"rubbish in there.\n", mmio_address);
64-
do_test();
104+
pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI "
105+
"address space, and writing 16 kB of rubbish in there.\n",
106+
size >> 10, mmio_address);
107+
do_test(size);
108+
pr_info(MODULE_NAME ": All done.\n");
65109
return 0;
66110
}
67111

0 commit comments

Comments
 (0)