@@ -740,133 +740,6 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
740740 return 0 ;
741741}
742742
743- /*
744- * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
745- * is already present in an i_mmap tree without adjusting the tree.
746- * The following helper function should be used when such adjustments
747- * are necessary. The "insert" vma (if any) is to be inserted
748- * before we drop the necessary locks.
749- */
750- int __vma_adjust (struct vma_iterator * vmi , struct vm_area_struct * vma ,
751- unsigned long start , unsigned long end , pgoff_t pgoff ,
752- struct vm_area_struct * expand )
753- {
754- struct mm_struct * mm = vma -> vm_mm ;
755- struct vm_area_struct * remove2 = NULL ;
756- struct vm_area_struct * remove = NULL ;
757- struct vm_area_struct * next = find_vma (mm , vma -> vm_end );
758- struct vm_area_struct * orig_vma = vma ;
759- struct file * file = vma -> vm_file ;
760- bool vma_changed = false;
761- long adjust_next = 0 ;
762- struct vma_prepare vma_prep ;
763-
764- if (next ) {
765- int error = 0 ;
766-
767- if (end >= next -> vm_end ) {
768- /*
769- * vma expands, overlapping all the next, and
770- * perhaps the one after too (mprotect case 6).
771- * The only other cases that gets here are
772- * case 1, case 7 and case 8.
773- */
774- if (next == expand ) {
775- /*
776- * The only case where we don't expand "vma"
777- * and we expand "next" instead is case 8.
778- */
779- VM_WARN_ON (end != next -> vm_end );
780- /*
781- * we're removing "vma" and that to do so we
782- * swapped "vma" and "next".
783- */
784- VM_WARN_ON (file != next -> vm_file );
785- swap (vma , next );
786- remove = next ;
787- } else {
788- VM_WARN_ON (expand != vma );
789- /*
790- * case 1, 6, 7, remove next.
791- * case 6 also removes the one beyond next
792- */
793- remove = next ;
794- if (end > next -> vm_end )
795- remove2 = find_vma (mm , next -> vm_end );
796-
797- VM_WARN_ON (remove2 != NULL &&
798- end != remove2 -> vm_end );
799- }
800-
801- /*
802- * If next doesn't have anon_vma, import from vma after
803- * next, if the vma overlaps with it.
804- */
805- if (remove != NULL && !next -> anon_vma )
806- error = dup_anon_vma (vma , remove2 );
807- else
808- error = dup_anon_vma (vma , remove );
809-
810- } else if (end > next -> vm_start ) {
811- /*
812- * vma expands, overlapping part of the next:
813- * mprotect case 5 shifting the boundary up.
814- */
815- adjust_next = (end - next -> vm_start );
816- VM_WARN_ON (expand != vma );
817- error = dup_anon_vma (vma , next );
818- } else if (end < vma -> vm_end ) {
819- /*
820- * vma shrinks, and !insert tells it's not
821- * split_vma inserting another: so it must be
822- * mprotect case 4 shifting the boundary down.
823- */
824- adjust_next = - (vma -> vm_end - end );
825- VM_WARN_ON (expand != next );
826- error = dup_anon_vma (next , vma );
827- }
828- if (error )
829- return error ;
830- }
831-
832- if (vma_iter_prealloc (vmi ))
833- return - ENOMEM ;
834-
835- vma_adjust_trans_huge (orig_vma , start , end , adjust_next );
836-
837- init_multi_vma_prep (& vma_prep , vma , adjust_next ? next : NULL , remove ,
838- remove2 );
839- VM_WARN_ON (vma_prep .anon_vma && adjust_next && next -> anon_vma &&
840- vma_prep .anon_vma != next -> anon_vma );
841-
842- vma_prepare (& vma_prep );
843-
844- if (start < vma -> vm_start || end > vma -> vm_end )
845- vma_changed = true;
846-
847- vma -> vm_start = start ;
848- vma -> vm_end = end ;
849- vma -> vm_pgoff = pgoff ;
850-
851- if (vma_changed )
852- vma_iter_store (vmi , vma );
853-
854- if (adjust_next ) {
855- next -> vm_start += adjust_next ;
856- next -> vm_pgoff += adjust_next >> PAGE_SHIFT ;
857- if (adjust_next < 0 ) {
858- WARN_ON_ONCE (vma_changed );
859- vma_iter_store (vmi , next );
860- }
861- }
862-
863- vma_complete (& vma_prep , vmi , mm );
864- vma_iter_free (vmi );
865- validate_mm (mm );
866-
867- return 0 ;
868- }
869-
870743/*
871744 * If the vma has a ->close operation then the driver probably needs to release
872745 * per-vma resources, so we don't attempt to merge those.
@@ -993,7 +866,7 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
993866 * It is important for case 8 that the vma NNNN overlapping the
994867 * region AAAA is never going to extended over XXXX. Instead XXXX must
995868 * be extended in region AAAA and NNNN must be removed. This way in
996- * all cases where vma_merge succeeds, the moment vma_adjust drops the
869+ * all cases where vma_merge succeeds, the moment vma_merge drops the
997870 * rmap_locks, the properties of the merged vma will be already
998871 * correct for the whole merged range. Some of those properties like
999872 * vm_page_prot/vm_flags may be accessed by rmap_walks and they must
@@ -1003,6 +876,12 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
1003876 * or other rmap walkers (if working on addresses beyond the "end"
1004877 * parameter) may establish ptes with the wrong permissions of NNNN
1005878 * instead of the right permissions of XXXX.
879+ *
880+ * In the code below:
881+ * PPPP is represented by *prev
882+ * NNNN is represented by *mid (and possibly equal to *next)
883+ * XXXX is represented by *next or not represented at all.
884+ * AAAA is not represented - it will be merged or the function will return NULL
1006885 */
1007886struct vm_area_struct * vma_merge (struct vma_iterator * vmi , struct mm_struct * mm ,
1008887 struct vm_area_struct * prev , unsigned long addr ,
@@ -1013,11 +892,19 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
1013892 struct anon_vma_name * anon_name )
1014893{
1015894 pgoff_t pglen = (end - addr ) >> PAGE_SHIFT ;
895+ pgoff_t vma_pgoff ;
1016896 struct vm_area_struct * mid , * next , * res = NULL ;
897+ struct vm_area_struct * vma , * adjust , * remove , * remove2 ;
1017898 int err = -1 ;
1018899 bool merge_prev = false;
1019900 bool merge_next = false;
901+ bool vma_expanded = false;
902+ struct vma_prepare vp ;
903+ unsigned long vma_end = end ;
904+ long adj_next = 0 ;
905+ unsigned long vma_start = addr ;
1020906
907+ validate_mm (mm );
1021908 /*
1022909 * We later require that vma->vm_flags == vm_flags,
1023910 * so this tests vma->vm_flags & VM_SPECIAL, too.
@@ -1035,13 +922,17 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
1035922 VM_WARN_ON (mid && end > mid -> vm_end );
1036923 VM_WARN_ON (addr >= end );
1037924
1038- /* Can we merge the predecessor? */
1039- if (prev && prev -> vm_end == addr &&
1040- mpol_equal (vma_policy (prev ), policy ) &&
1041- can_vma_merge_after (prev , vm_flags ,
1042- anon_vma , file , pgoff ,
1043- vm_userfaultfd_ctx , anon_name )) {
1044- merge_prev = true;
925+ if (prev ) {
926+ res = prev ;
927+ vma = prev ;
928+ vma_start = prev -> vm_start ;
929+ vma_pgoff = prev -> vm_pgoff ;
930+ /* Can we merge the predecessor? */
931+ if (prev -> vm_end == addr && mpol_equal (vma_policy (prev ), policy )
932+ && can_vma_merge_after (prev , vm_flags , anon_vma , file ,
933+ pgoff , vm_userfaultfd_ctx , anon_name )) {
934+ merge_prev = true;
935+ }
1045936 }
1046937 /* Can we merge the successor? */
1047938 if (next && end == next -> vm_start &&
@@ -1051,32 +942,85 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
1051942 vm_userfaultfd_ctx , anon_name )) {
1052943 merge_next = true;
1053944 }
945+
946+ remove = remove2 = adjust = NULL ;
1054947 /* Can we merge both the predecessor and the successor? */
1055948 if (merge_prev && merge_next &&
1056- is_mergeable_anon_vma (prev -> anon_vma ,
1057- next -> anon_vma , NULL )) { /* cases 1, 6 */
1058- err = __vma_adjust (vmi , prev , prev -> vm_start ,
1059- next -> vm_end , prev -> vm_pgoff , prev );
1060- res = prev ;
1061- } else if (merge_prev ) { /* cases 2, 5, 7 */
1062- err = __vma_adjust (vmi , prev , prev -> vm_start ,
1063- end , prev -> vm_pgoff , prev );
1064- res = prev ;
949+ is_mergeable_anon_vma (prev -> anon_vma , next -> anon_vma , NULL )) {
950+ remove = mid ; /* case 1 */
951+ vma_end = next -> vm_end ;
952+ err = dup_anon_vma (res , remove );
953+ if (mid != next ) { /* case 6 */
954+ remove2 = next ;
955+ if (!remove -> anon_vma )
956+ err = dup_anon_vma (res , remove2 );
957+ }
958+ } else if (merge_prev ) {
959+ err = 0 ; /* case 2 */
960+ if (mid && end > mid -> vm_start ) {
961+ err = dup_anon_vma (res , mid );
962+ if (end == mid -> vm_end ) { /* case 7 */
963+ remove = mid ;
964+ } else { /* case 5 */
965+ adjust = mid ;
966+ adj_next = (end - mid -> vm_start );
967+ }
968+ }
1065969 } else if (merge_next ) {
1066- if (prev && addr < prev -> vm_end ) /* case 4 */
1067- err = __vma_adjust (vmi , prev , prev -> vm_start ,
1068- addr , prev -> vm_pgoff , next );
1069- else /* cases 3, 8 */
1070- err = __vma_adjust (vmi , mid , addr , next -> vm_end ,
1071- next -> vm_pgoff - pglen , next );
1072970 res = next ;
971+ if (prev && addr < prev -> vm_end ) { /* case 4 */
972+ vma_end = addr ;
973+ adjust = mid ;
974+ adj_next = - (vma -> vm_end - addr );
975+ err = dup_anon_vma (res , adjust );
976+ } else {
977+ vma = next ; /* case 3 */
978+ vma_start = addr ;
979+ vma_end = next -> vm_end ;
980+ vma_pgoff = mid -> vm_pgoff ;
981+ err = 0 ;
982+ if (mid != next ) { /* case 8 */
983+ remove = mid ;
984+ err = dup_anon_vma (res , remove );
985+ }
986+ }
1073987 }
1074988
1075- /*
1076- * Cannot merge with predecessor or successor or error in __vma_adjust?
1077- */
989+ /* Cannot merge or error in anon_vma clone */
1078990 if (err )
1079991 return NULL ;
992+
993+ if (vma_iter_prealloc (vmi ))
994+ return NULL ;
995+
996+ vma_adjust_trans_huge (vma , vma_start , vma_end , adj_next );
997+ init_multi_vma_prep (& vp , vma , adjust , remove , remove2 );
998+ VM_WARN_ON (vp .anon_vma && adjust && adjust -> anon_vma &&
999+ vp .anon_vma != adjust -> anon_vma );
1000+
1001+ vma_prepare (& vp );
1002+ if (vma_start < vma -> vm_start || vma_end > vma -> vm_end )
1003+ vma_expanded = true;
1004+
1005+ vma -> vm_start = vma_start ;
1006+ vma -> vm_end = vma_end ;
1007+ vma -> vm_pgoff = vma_pgoff ;
1008+
1009+ if (vma_expanded )
1010+ vma_iter_store (vmi , vma );
1011+
1012+ if (adj_next ) {
1013+ adjust -> vm_start += adj_next ;
1014+ adjust -> vm_pgoff += adj_next >> PAGE_SHIFT ;
1015+ if (adj_next < 0 ) {
1016+ WARN_ON (vma_expanded );
1017+ vma_iter_store (vmi , next );
1018+ }
1019+ }
1020+
1021+ vma_complete (& vp , vmi , mm );
1022+ vma_iter_free (vmi );
1023+ validate_mm (mm );
10801024 khugepaged_enter_vma (res , vm_flags );
10811025
10821026 if (res )
0 commit comments