Skip to content

Commit 94fcc58

Browse files
aagittorvalds
authored andcommitted
thp: avoid breaking huge pmd invariants in case of vma_adjust failures
An huge pmd can only be mapped if the corresponding 2M virtual range is fully contained in the vma. At times the VM calls split_vma twice, if the first split_vma succeeds and the second fail, the first split_vma remains in effect and it's not rolled back. For split_vma or vma_adjust to fail an allocation failure is needed so it's a very unlikely event (the out of memory killer would normally fire before any allocation failure is visible to kernel and userland and if an out of memory condition happens it's unlikely to happen exactly here). Nevertheless it's safer to ensure that no huge pmd can be left around if the vma is adjusted in a way that can't fit hugepages anymore at the new vm_start/vm_end address. Signed-off-by: Andrea Arcangeli <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent bc83501 commit 94fcc58

File tree

3 files changed

+99
-2
lines changed

3 files changed

+99
-2
lines changed

include/linux/huge_mm.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,19 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
104104
#error "hugepages can't be allocated by the buddy allocator"
105105
#endif
106106
extern int hugepage_madvise(unsigned long *vm_flags);
107+
extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
108+
unsigned long start,
109+
unsigned long end,
110+
long adjust_next);
111+
static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
112+
unsigned long start,
113+
unsigned long end,
114+
long adjust_next)
115+
{
116+
if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
117+
return;
118+
__vma_adjust_trans_huge(vma, start, end, adjust_next);
119+
}
107120
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
108121
#define HPAGE_PMD_SHIFT ({ BUG(); 0; })
109122
#define HPAGE_PMD_MASK ({ BUG(); 0; })
@@ -125,6 +138,12 @@ static inline int hugepage_madvise(unsigned long *vm_flags)
125138
BUG();
126139
return 0;
127140
}
141+
static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
142+
unsigned long start,
143+
unsigned long end,
144+
long adjust_next)
145+
{
146+
}
128147
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
129148

130149
#endif /* _LINUX_HUGE_MM_H */

mm/huge_memory.c

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,8 +1075,16 @@ pmd_t *page_check_address_pmd(struct page *page,
10751075
goto out;
10761076
if (pmd_page(*pmd) != page)
10771077
goto out;
1078-
VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG &&
1079-
pmd_trans_splitting(*pmd));
1078+
/*
1079+
* split_vma() may create temporary aliased mappings. There is
1080+
* no risk as long as all huge pmd are found and have their
1081+
* splitting bit set before __split_huge_page_refcount
1082+
* runs. Finding the same huge pmd more than once during the
1083+
* same rmap walk is not a problem.
1084+
*/
1085+
if (flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG &&
1086+
pmd_trans_splitting(*pmd))
1087+
goto out;
10801088
if (pmd_trans_huge(*pmd)) {
10811089
VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG &&
10821090
!pmd_trans_splitting(*pmd));
@@ -2196,3 +2204,71 @@ void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd)
21962204
put_page(page);
21972205
BUG_ON(pmd_trans_huge(*pmd));
21982206
}
2207+
2208+
static void split_huge_page_address(struct mm_struct *mm,
2209+
unsigned long address)
2210+
{
2211+
pgd_t *pgd;
2212+
pud_t *pud;
2213+
pmd_t *pmd;
2214+
2215+
VM_BUG_ON(!(address & ~HPAGE_PMD_MASK));
2216+
2217+
pgd = pgd_offset(mm, address);
2218+
if (!pgd_present(*pgd))
2219+
return;
2220+
2221+
pud = pud_offset(pgd, address);
2222+
if (!pud_present(*pud))
2223+
return;
2224+
2225+
pmd = pmd_offset(pud, address);
2226+
if (!pmd_present(*pmd))
2227+
return;
2228+
/*
2229+
* Caller holds the mmap_sem write mode, so a huge pmd cannot
2230+
* materialize from under us.
2231+
*/
2232+
split_huge_page_pmd(mm, pmd);
2233+
}
2234+
2235+
void __vma_adjust_trans_huge(struct vm_area_struct *vma,
2236+
unsigned long start,
2237+
unsigned long end,
2238+
long adjust_next)
2239+
{
2240+
/*
2241+
* If the new start address isn't hpage aligned and it could
2242+
* previously contain an hugepage: check if we need to split
2243+
* an huge pmd.
2244+
*/
2245+
if (start & ~HPAGE_PMD_MASK &&
2246+
(start & HPAGE_PMD_MASK) >= vma->vm_start &&
2247+
(start & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
2248+
split_huge_page_address(vma->vm_mm, start);
2249+
2250+
/*
2251+
* If the new end address isn't hpage aligned and it could
2252+
* previously contain an hugepage: check if we need to split
2253+
* an huge pmd.
2254+
*/
2255+
if (end & ~HPAGE_PMD_MASK &&
2256+
(end & HPAGE_PMD_MASK) >= vma->vm_start &&
2257+
(end & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
2258+
split_huge_page_address(vma->vm_mm, end);
2259+
2260+
/*
2261+
* If we're also updating the vma->vm_next->vm_start, if the new
2262+
* vm_next->vm_start isn't page aligned and it could previously
2263+
* contain an hugepage: check if we need to split an huge pmd.
2264+
*/
2265+
if (adjust_next > 0) {
2266+
struct vm_area_struct *next = vma->vm_next;
2267+
unsigned long nstart = next->vm_start;
2268+
nstart += adjust_next << PAGE_SHIFT;
2269+
if (nstart & ~HPAGE_PMD_MASK &&
2270+
(nstart & HPAGE_PMD_MASK) >= next->vm_start &&
2271+
(nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end)
2272+
split_huge_page_address(next->vm_mm, nstart);
2273+
}
2274+
}

mm/mmap.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,8 @@ again: remove_next = 1 + (end > next->vm_end);
589589
}
590590
}
591591

592+
vma_adjust_trans_huge(vma, start, end, adjust_next);
593+
592594
/*
593595
* When changing only vma->vm_end, we don't really need anon_vma
594596
* lock. This is a fairly rare case by itself, but the anon_vma

0 commit comments

Comments
 (0)