Skip to content

Commit c594ada

Browse files
dgibsonpaulusmack
authored andcommitted
[PATCH] Dynamic hugepage addresses for ppc64
Paulus, I think this is now a reasonable candidate for the post-2.6.13 queue. Relax address restrictions for hugepages on ppc64 Presently, 64-bit applications on ppc64 may only use hugepages in the address region from 1-1.5T. Furthermore, if hugepages are enabled in the kernel config, they may only use hugepages and never normal pages in this area. This patch relaxes this restriction, allowing any address to be used with hugepages, but with a 1TB granularity. That is if you map a hugepage anywhere in the region 1TB-2TB, that entire area will be reserved exclusively for hugepages for the remainder of the process's lifetime. This works analagously to hugepages in 32-bit applications, where hugepages can be mapped anywhere, but with 256MB (mmu segment) granularity. This patch applies on top of the four level pagetable patch (http://patchwork.ozlabs.org/linuxppc64/patch?id=1936). Signed-off-by: David Gibson <[email protected]> Signed-off-by: Paul Mackerras <[email protected]>
1 parent 9a5573e commit c594ada

File tree

5 files changed

+190
-78
lines changed

5 files changed

+190
-78
lines changed

arch/ppc64/kernel/asm-offsets.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@ int main(void)
9494
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
9595
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
9696
#ifdef CONFIG_HUGETLB_PAGE
97-
DEFINE(PACAHTLBSEGS, offsetof(struct paca_struct, context.htlb_segs));
97+
DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
98+
DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
9899
#endif /* CONFIG_HUGETLB_PAGE */
99100
DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr));
100101
DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));

arch/ppc64/mm/hugetlbpage.c

Lines changed: 158 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@
2727

2828
#include <linux/sysctl.h>
2929

30+
#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)
31+
#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
32+
3033
/* Modelled after find_linux_pte() */
3134
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
3235
{
@@ -129,29 +132,51 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
129132
return 0;
130133
}
131134

132-
static void flush_segments(void *parm)
135+
static void flush_low_segments(void *parm)
133136
{
134-
u16 segs = (unsigned long) parm;
137+
u16 areas = (unsigned long) parm;
135138
unsigned long i;
136139

137140
asm volatile("isync" : : : "memory");
138141

139-
for (i = 0; i < 16; i++) {
140-
if (! (segs & (1U << i)))
142+
BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS);
143+
144+
for (i = 0; i < NUM_LOW_AREAS; i++) {
145+
if (! (areas & (1U << i)))
141146
continue;
142147
asm volatile("slbie %0" : : "r" (i << SID_SHIFT));
143148
}
144149

145150
asm volatile("isync" : : : "memory");
146151
}
147152

148-
static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg)
153+
static void flush_high_segments(void *parm)
154+
{
155+
u16 areas = (unsigned long) parm;
156+
unsigned long i, j;
157+
158+
asm volatile("isync" : : : "memory");
159+
160+
BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS);
161+
162+
for (i = 0; i < NUM_HIGH_AREAS; i++) {
163+
if (! (areas & (1U << i)))
164+
continue;
165+
for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)
166+
asm volatile("slbie %0"
167+
:: "r" ((i << HTLB_AREA_SHIFT) + (j << SID_SHIFT)));
168+
}
169+
170+
asm volatile("isync" : : : "memory");
171+
}
172+
173+
static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area)
149174
{
150-
unsigned long start = seg << SID_SHIFT;
151-
unsigned long end = (seg+1) << SID_SHIFT;
175+
unsigned long start = area << SID_SHIFT;
176+
unsigned long end = (area+1) << SID_SHIFT;
152177
struct vm_area_struct *vma;
153178

154-
BUG_ON(seg >= 16);
179+
BUG_ON(area >= NUM_LOW_AREAS);
155180

156181
/* Check no VMAs are in the region */
157182
vma = find_vma(mm, start);
@@ -161,50 +186,103 @@ static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg)
161186
return 0;
162187
}
163188

164-
static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs)
189+
static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
190+
{
191+
unsigned long start = area << HTLB_AREA_SHIFT;
192+
unsigned long end = (area+1) << HTLB_AREA_SHIFT;
193+
struct vm_area_struct *vma;
194+
195+
BUG_ON(area >= NUM_HIGH_AREAS);
196+
197+
/* Check no VMAs are in the region */
198+
vma = find_vma(mm, start);
199+
if (vma && (vma->vm_start < end))
200+
return -EBUSY;
201+
202+
return 0;
203+
}
204+
205+
static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
165206
{
166207
unsigned long i;
167208

168-
newsegs &= ~(mm->context.htlb_segs);
169-
if (! newsegs)
209+
BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);
210+
BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);
211+
212+
newareas &= ~(mm->context.low_htlb_areas);
213+
if (! newareas)
170214
return 0; /* The segments we want are already open */
171215

172-
for (i = 0; i < 16; i++)
173-
if ((1 << i) & newsegs)
174-
if (prepare_low_seg_for_htlb(mm, i) != 0)
216+
for (i = 0; i < NUM_LOW_AREAS; i++)
217+
if ((1 << i) & newareas)
218+
if (prepare_low_area_for_htlb(mm, i) != 0)
219+
return -EBUSY;
220+
221+
mm->context.low_htlb_areas |= newareas;
222+
223+
/* update the paca copy of the context struct */
224+
get_paca()->context = mm->context;
225+
226+
/* the context change must make it to memory before the flush,
227+
* so that further SLB misses do the right thing. */
228+
mb();
229+
on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1);
230+
231+
return 0;
232+
}
233+
234+
static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
235+
{
236+
unsigned long i;
237+
238+
BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);
239+
BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8)
240+
!= NUM_HIGH_AREAS);
241+
242+
newareas &= ~(mm->context.high_htlb_areas);
243+
if (! newareas)
244+
return 0; /* The areas we want are already open */
245+
246+
for (i = 0; i < NUM_HIGH_AREAS; i++)
247+
if ((1 << i) & newareas)
248+
if (prepare_high_area_for_htlb(mm, i) != 0)
175249
return -EBUSY;
176250

177-
mm->context.htlb_segs |= newsegs;
251+
mm->context.high_htlb_areas |= newareas;
178252

179253
/* update the paca copy of the context struct */
180254
get_paca()->context = mm->context;
181255

182256
/* the context change must make it to memory before the flush,
183257
* so that further SLB misses do the right thing. */
184258
mb();
185-
on_each_cpu(flush_segments, (void *)(unsigned long)newsegs, 0, 1);
259+
on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1);
186260

187261
return 0;
188262
}
189263

190264
int prepare_hugepage_range(unsigned long addr, unsigned long len)
191265
{
192-
if (within_hugepage_high_range(addr, len))
193-
return 0;
194-
else if ((addr < 0x100000000UL) && ((addr+len) < 0x100000000UL)) {
195-
int err;
196-
/* Yes, we need both tests, in case addr+len overflows
197-
* 64-bit arithmetic */
198-
err = open_low_hpage_segs(current->mm,
266+
int err;
267+
268+
if ( (addr+len) < addr )
269+
return -EINVAL;
270+
271+
if ((addr + len) < 0x100000000UL)
272+
err = open_low_hpage_areas(current->mm,
199273
LOW_ESID_MASK(addr, len));
200-
if (err)
201-
printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
202-
" failed (segs: 0x%04hx)\n", addr, len,
203-
LOW_ESID_MASK(addr, len));
274+
else
275+
err = open_high_hpage_areas(current->mm,
276+
HTLB_AREA_MASK(addr, len));
277+
if (err) {
278+
printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
279+
" failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
280+
addr, len,
281+
LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len));
204282
return err;
205283
}
206284

207-
return -EINVAL;
285+
return 0;
208286
}
209287

210288
struct page *
@@ -276,8 +354,8 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
276354
vma = find_vma(mm, addr);
277355
continue;
278356
}
279-
if (touches_hugepage_high_range(addr, len)) {
280-
addr = TASK_HPAGE_END;
357+
if (touches_hugepage_high_range(mm, addr, len)) {
358+
addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
281359
vma = find_vma(mm, addr);
282360
continue;
283361
}
@@ -356,8 +434,9 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
356434
if (touches_hugepage_low_range(mm, addr, len)) {
357435
addr = (addr & ((~0) << SID_SHIFT)) - len;
358436
goto hugepage_recheck;
359-
} else if (touches_hugepage_high_range(addr, len)) {
360-
addr = TASK_HPAGE_BASE - len;
437+
} else if (touches_hugepage_high_range(mm, addr, len)) {
438+
addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len;
439+
goto hugepage_recheck;
361440
}
362441

363442
/*
@@ -448,23 +527,28 @@ static unsigned long htlb_get_low_area(unsigned long len, u16 segmask)
448527
return -ENOMEM;
449528
}
450529

451-
static unsigned long htlb_get_high_area(unsigned long len)
530+
static unsigned long htlb_get_high_area(unsigned long len, u16 areamask)
452531
{
453-
unsigned long addr = TASK_HPAGE_BASE;
532+
unsigned long addr = 0x100000000UL;
454533
struct vm_area_struct *vma;
455534

456535
vma = find_vma(current->mm, addr);
457-
for (vma = find_vma(current->mm, addr);
458-
addr + len <= TASK_HPAGE_END;
459-
vma = vma->vm_next) {
536+
while (addr + len <= TASK_SIZE_USER64) {
460537
BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
461-
BUG_ON(! within_hugepage_high_range(addr, len));
538+
539+
if (! __within_hugepage_high_range(addr, len, areamask)) {
540+
addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
541+
vma = find_vma(current->mm, addr);
542+
continue;
543+
}
462544

463545
if (!vma || (addr + len) <= vma->vm_start)
464546
return addr;
465547
addr = ALIGN(vma->vm_end, HPAGE_SIZE);
466-
/* Because we're in a hugepage region, this alignment
467-
* should not skip us over any VMAs */
548+
/* Depending on segmask this might not be a confirmed
549+
* hugepage region, so the ALIGN could have skipped
550+
* some VMAs */
551+
vma = find_vma(current->mm, addr);
468552
}
469553

470554
return -ENOMEM;
@@ -474,38 +558,59 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
474558
unsigned long len, unsigned long pgoff,
475559
unsigned long flags)
476560
{
561+
int lastshift;
562+
u16 areamask, curareas;
563+
477564
if (len & ~HPAGE_MASK)
478565
return -EINVAL;
479566

480567
if (!cpu_has_feature(CPU_FTR_16M_PAGE))
481568
return -EINVAL;
482569

483570
if (test_thread_flag(TIF_32BIT)) {
484-
int lastshift = 0;
485-
u16 segmask, cursegs = current->mm->context.htlb_segs;
571+
curareas = current->mm->context.low_htlb_areas;
486572

487573
/* First see if we can do the mapping in the existing
488-
* low hpage segments */
489-
addr = htlb_get_low_area(len, cursegs);
574+
* low areas */
575+
addr = htlb_get_low_area(len, curareas);
490576
if (addr != -ENOMEM)
491577
return addr;
492578

493-
for (segmask = LOW_ESID_MASK(0x100000000UL-len, len);
494-
! lastshift; segmask >>=1) {
495-
if (segmask & 1)
579+
lastshift = 0;
580+
for (areamask = LOW_ESID_MASK(0x100000000UL-len, len);
581+
! lastshift; areamask >>=1) {
582+
if (areamask & 1)
496583
lastshift = 1;
497584

498-
addr = htlb_get_low_area(len, cursegs | segmask);
585+
addr = htlb_get_low_area(len, curareas | areamask);
499586
if ((addr != -ENOMEM)
500-
&& open_low_hpage_segs(current->mm, segmask) == 0)
587+
&& open_low_hpage_areas(current->mm, areamask) == 0)
501588
return addr;
502589
}
503-
printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
504-
" enough segments\n");
505-
return -ENOMEM;
506590
} else {
507-
return htlb_get_high_area(len);
591+
curareas = current->mm->context.high_htlb_areas;
592+
593+
/* First see if we can do the mapping in the existing
594+
* high areas */
595+
addr = htlb_get_high_area(len, curareas);
596+
if (addr != -ENOMEM)
597+
return addr;
598+
599+
lastshift = 0;
600+
for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len);
601+
! lastshift; areamask >>=1) {
602+
if (areamask & 1)
603+
lastshift = 1;
604+
605+
addr = htlb_get_high_area(len, curareas | areamask);
606+
if ((addr != -ENOMEM)
607+
&& open_high_hpage_areas(current->mm, areamask) == 0)
608+
return addr;
609+
}
508610
}
611+
printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
612+
" enough areas\n");
613+
return -ENOMEM;
509614
}
510615

511616
int hash_huge_page(struct mm_struct *mm, unsigned long access,

arch/ppc64/mm/slb_low.S

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -89,28 +89,29 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
8989
b 9f
9090

9191
0: /* user address: proto-VSID = context<<15 | ESID */
92-
li r11,SLB_VSID_USER
93-
9492
srdi. r9,r3,USER_ESID_BITS
9593
bne- 8f /* invalid ea bits set */
9694

9795
#ifdef CONFIG_HUGETLB_PAGE
9896
BEGIN_FTR_SECTION
99-
/* check against the hugepage ranges */
100-
cmpldi r3,(TASK_HPAGE_END>>SID_SHIFT)
101-
bge 6f /* >= TASK_HPAGE_END */
102-
cmpldi r3,(TASK_HPAGE_BASE>>SID_SHIFT)
103-
bge 5f /* TASK_HPAGE_BASE..TASK_HPAGE_END */
97+
lhz r9,PACAHIGHHTLBAREAS(r13)
98+
srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT)
99+
srd r9,r9,r11
100+
andi. r9,r9,1
101+
bne 5f
102+
103+
li r11,SLB_VSID_USER
104+
104105
cmpldi r3,16
105-
bge 6f /* 4GB..TASK_HPAGE_BASE */
106+
bge 6f
106107

107-
lhz r9,PACAHTLBSEGS(r13)
108+
lhz r9,PACALOWHTLBAREAS(r13)
108109
srd r9,r9,r3
109110
andi. r9,r9,1
111+
110112
beq 6f
111113

112-
5: /* this is a hugepage user address */
113-
li r11,(SLB_VSID_USER|SLB_VSID_L)
114+
5: li r11,SLB_VSID_USER|SLB_VSID_L
114115
END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
115116
#endif /* CONFIG_HUGETLB_PAGE */
116117

include/asm-ppc64/mmu.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ typedef unsigned long mm_context_id_t;
307307
typedef struct {
308308
mm_context_id_t id;
309309
#ifdef CONFIG_HUGETLB_PAGE
310-
u16 htlb_segs; /* bitmask */
310+
u16 low_htlb_areas, high_htlb_areas;
311311
#endif
312312
} mm_context_t;
313313

0 commit comments

Comments
 (0)