@@ -1078,6 +1078,15 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
10781078}
10791079EXPORT_SYMBOL_GPL (dax_iomap_rw );
10801080
1081+ static int dax_fault_return (int error )
1082+ {
1083+ if (error == 0 )
1084+ return VM_FAULT_NOPAGE ;
1085+ if (error == - ENOMEM )
1086+ return VM_FAULT_OOM ;
1087+ return VM_FAULT_SIGBUS ;
1088+ }
1089+
10811090/**
10821091 * dax_iomap_fault - handle a page fault on a DAX file
10831092 * @vma: The virtual memory area where the fault occurred
@@ -1110,12 +1119,6 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
11101119 if (pos >= i_size_read (inode ))
11111120 return VM_FAULT_SIGBUS ;
11121121
1113- entry = grab_mapping_entry (mapping , vmf -> pgoff , 0 );
1114- if (IS_ERR (entry )) {
1115- error = PTR_ERR (entry );
1116- goto out ;
1117- }
1118-
11191122 if ((vmf -> flags & FAULT_FLAG_WRITE ) && !vmf -> cow_page )
11201123 flags |= IOMAP_WRITE ;
11211124
@@ -1126,9 +1129,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
11261129 */
11271130 error = ops -> iomap_begin (inode , pos , PAGE_SIZE , flags , & iomap );
11281131 if (error )
1129- goto unlock_entry ;
1132+ return dax_fault_return ( error ) ;
11301133 if (WARN_ON_ONCE (iomap .offset + iomap .length < pos + PAGE_SIZE )) {
1131- error = - EIO ; /* fs corruption? */
1134+ vmf_ret = dax_fault_return (- EIO ); /* fs corruption? */
1135+ goto finish_iomap ;
1136+ }
1137+
1138+ entry = grab_mapping_entry (mapping , vmf -> pgoff , 0 );
1139+ if (IS_ERR (entry )) {
1140+ vmf_ret = dax_fault_return (PTR_ERR (entry ));
11321141 goto finish_iomap ;
11331142 }
11341143
@@ -1151,13 +1160,13 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
11511160 }
11521161
11531162 if (error )
1154- goto finish_iomap ;
1163+ goto error_unlock_entry ;
11551164
11561165 __SetPageUptodate (vmf -> cow_page );
11571166 vmf_ret = finish_fault (vmf );
11581167 if (!vmf_ret )
11591168 vmf_ret = VM_FAULT_DONE_COW ;
1160- goto finish_iomap ;
1169+ goto unlock_entry ;
11611170 }
11621171
11631172 switch (iomap .type ) {
@@ -1169,12 +1178,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
11691178 }
11701179 error = dax_insert_mapping (mapping , iomap .bdev , sector ,
11711180 PAGE_SIZE , & entry , vma , vmf );
1181+ /* -EBUSY is fine, somebody else faulted on the same PTE */
1182+ if (error == - EBUSY )
1183+ error = 0 ;
11721184 break ;
11731185 case IOMAP_UNWRITTEN :
11741186 case IOMAP_HOLE :
11751187 if (!(vmf -> flags & FAULT_FLAG_WRITE )) {
11761188 vmf_ret = dax_load_hole (mapping , & entry , vmf );
1177- goto finish_iomap ;
1189+ goto unlock_entry ;
11781190 }
11791191 /*FALLTHRU*/
11801192 default :
@@ -1183,30 +1195,25 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
11831195 break ;
11841196 }
11851197
1186- finish_iomap :
1187- if (ops -> iomap_end ) {
1188- if (error || (vmf_ret & VM_FAULT_ERROR )) {
1189- /* keep previous error */
1190- ops -> iomap_end (inode , pos , PAGE_SIZE , 0 , flags ,
1191- & iomap );
1192- } else {
1193- error = ops -> iomap_end (inode , pos , PAGE_SIZE ,
1194- PAGE_SIZE , flags , & iomap );
1195- }
1196- }
1198+ error_unlock_entry :
1199+ vmf_ret = dax_fault_return (error ) | major ;
11971200 unlock_entry :
11981201 put_locked_mapping_entry (mapping , vmf -> pgoff , entry );
1199- out :
1200- if (error == - ENOMEM )
1201- return VM_FAULT_OOM | major ;
1202- /* -EBUSY is fine, somebody else faulted on the same PTE */
1203- if (error < 0 && error != - EBUSY )
1204- return VM_FAULT_SIGBUS | major ;
1205- if (vmf_ret ) {
1206- WARN_ON_ONCE (error ); /* -EBUSY from ops->iomap_end? */
1207- return vmf_ret ;
1202+ finish_iomap :
1203+ if (ops -> iomap_end ) {
1204+ int copied = PAGE_SIZE ;
1205+
1206+ if (vmf_ret & VM_FAULT_ERROR )
1207+ copied = 0 ;
1208+ /*
1209+ * The fault is done by now and there's no way back (other
1210+ * thread may be already happily using PTE we have installed).
1211+ * Just ignore error from ->iomap_end since we cannot do much
1212+ * with it.
1213+ */
1214+ ops -> iomap_end (inode , pos , PAGE_SIZE , copied , flags , & iomap );
12081215 }
1209- return VM_FAULT_NOPAGE | major ;
1216+ return vmf_ret ;
12101217}
12111218EXPORT_SYMBOL_GPL (dax_iomap_fault );
12121219
@@ -1330,16 +1337,6 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
13301337 if ((pgoff | PG_PMD_COLOUR ) > max_pgoff )
13311338 goto fallback ;
13321339
1333- /*
1334- * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
1335- * PMD or a HZP entry. If it can't (because a 4k page is already in
1336- * the tree, for instance), it will return -EEXIST and we just fall
1337- * back to 4k entries.
1338- */
1339- entry = grab_mapping_entry (mapping , pgoff , RADIX_DAX_PMD );
1340- if (IS_ERR (entry ))
1341- goto fallback ;
1342-
13431340 /*
13441341 * Note that we don't use iomap_apply here. We aren't doing I/O, only
13451342 * setting up a mapping, so really we're using iomap_begin() as a way
@@ -1348,10 +1345,21 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
13481345 pos = (loff_t )pgoff << PAGE_SHIFT ;
13491346 error = ops -> iomap_begin (inode , pos , PMD_SIZE , iomap_flags , & iomap );
13501347 if (error )
1351- goto unlock_entry ;
1348+ goto fallback ;
1349+
13521350 if (iomap .offset + iomap .length < pos + PMD_SIZE )
13531351 goto finish_iomap ;
13541352
1353+ /*
1354+ * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
1355+ * PMD or a HZP entry. If it can't (because a 4k page is already in
1356+ * the tree, for instance), it will return -EEXIST and we just fall
1357+ * back to 4k entries.
1358+ */
1359+ entry = grab_mapping_entry (mapping , pgoff , RADIX_DAX_PMD );
1360+ if (IS_ERR (entry ))
1361+ goto finish_iomap ;
1362+
13551363 vmf .pgoff = pgoff ;
13561364 vmf .flags = flags ;
13571365 vmf .gfp_mask = mapping_gfp_mask (mapping ) | __GFP_IO ;
@@ -1364,7 +1372,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
13641372 case IOMAP_UNWRITTEN :
13651373 case IOMAP_HOLE :
13661374 if (WARN_ON_ONCE (write ))
1367- goto finish_iomap ;
1375+ goto unlock_entry ;
13681376 result = dax_pmd_load_hole (vma , pmd , & vmf , address , & iomap ,
13691377 & entry );
13701378 break ;
@@ -1373,20 +1381,23 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
13731381 break ;
13741382 }
13751383
1384+ unlock_entry :
1385+ put_locked_mapping_entry (mapping , pgoff , entry );
13761386 finish_iomap :
13771387 if (ops -> iomap_end ) {
1378- if (result == VM_FAULT_FALLBACK ) {
1379- ops -> iomap_end (inode , pos , PMD_SIZE , 0 , iomap_flags ,
1380- & iomap );
1381- } else {
1382- error = ops -> iomap_end (inode , pos , PMD_SIZE , PMD_SIZE ,
1383- iomap_flags , & iomap );
1384- if (error )
1385- result = VM_FAULT_FALLBACK ;
1386- }
1388+ int copied = PMD_SIZE ;
1389+
1390+ if (result == VM_FAULT_FALLBACK )
1391+ copied = 0 ;
1392+ /*
1393+ * The fault is done by now and there's no way back (other
1394+ * thread may be already happily using PMD we have installed).
1395+ * Just ignore error from ->iomap_end since we cannot do much
1396+ * with it.
1397+ */
1398+ ops -> iomap_end (inode , pos , PMD_SIZE , copied , iomap_flags ,
1399+ & iomap );
13871400 }
1388- unlock_entry :
1389- put_locked_mapping_entry (mapping , pgoff , entry );
13901401 fallback :
13911402 if (result == VM_FAULT_FALLBACK ) {
13921403 split_huge_pmd (vma , pmd , address );
0 commit comments