@@ -797,18 +797,16 @@ int ext4_dio_get_block(struct inode *inode, sector_t iblock,
797797}
798798
799799/*
800- * Get block function for DIO writes when we create unwritten extent if
800+ * Get block function for AIO DIO writes when we create unwritten extent if
801801 * blocks are not allocated yet. The extent will be converted to written
802802 * after IO is complete.
803803 */
804- static int ext4_dio_get_block_unwritten (struct inode * inode , sector_t iblock ,
805- struct buffer_head * bh_result , int create )
804+ static int ext4_dio_get_block_unwritten_async (struct inode * inode ,
805+ sector_t iblock , struct buffer_head * bh_result , int create )
806806{
807807 handle_t * handle ;
808808 int ret ;
809809
810- ext4_debug ("ext4_dio_get_block_unwritten: inode %lu, create flag %d\n" ,
811- inode -> i_ino , create );
812810 /* We don't expect handle for direct IO */
813811 WARN_ON_ONCE (ext4_journal_current_handle ());
814812
@@ -818,16 +816,62 @@ static int ext4_dio_get_block_unwritten(struct inode *inode, sector_t iblock,
818816 ret = _ext4_get_block (inode , iblock , bh_result ,
819817 EXT4_GET_BLOCKS_IO_CREATE_EXT );
820818 ext4_journal_stop (handle );
821- if (!ret && buffer_unwritten (bh_result )) {
822- ext4_io_end_t * io_end = ext4_inode_aio (inode );
823819
820+ /*
821+ * When doing DIO using unwritten extents, we need io_end to convert
822+ * unwritten extents to written on IO completion. We allocate io_end
823+ * once we spot unwritten extent and store it in b_private. Generic
824+ * DIO code keeps b_private set and furthermore passes the value to
825+ * our completion callback in 'private' argument.
826+ */
827+ if (!ret && buffer_unwritten (bh_result )) {
828+ if (!bh_result -> b_private ) {
829+ ext4_io_end_t * io_end ;
830+
831+ io_end = ext4_init_io_end (inode , GFP_KERNEL );
832+ if (!io_end )
833+ return - ENOMEM ;
834+ bh_result -> b_private = io_end ;
835+ ext4_set_io_unwritten_flag (inode , io_end );
836+ }
824837 set_buffer_defer_completion (bh_result );
825- WARN_ON_ONCE (io_end && !(io_end -> flag & EXT4_IO_END_UNWRITTEN ));
826838 }
827839
828840 return ret ;
829841}
830842
843+ /*
844+ * Get block function for non-AIO DIO writes when we create unwritten extent if
845+ * blocks are not allocated yet. The extent will be converted to written
846+ * after IO is complete from ext4_ext_direct_IO() function.
847+ */
848+ static int ext4_dio_get_block_unwritten_sync (struct inode * inode ,
849+ sector_t iblock , struct buffer_head * bh_result , int create )
850+ {
851+ handle_t * handle ;
852+ int ret ;
853+
854+ /* We don't expect handle for direct IO */
855+ WARN_ON_ONCE (ext4_journal_current_handle ());
856+
857+ handle = start_dio_trans (inode , bh_result );
858+ if (IS_ERR (handle ))
859+ return PTR_ERR (handle );
860+ ret = _ext4_get_block (inode , iblock , bh_result ,
861+ EXT4_GET_BLOCKS_IO_CREATE_EXT );
862+ ext4_journal_stop (handle );
863+
864+ /*
865+ * Mark inode as having pending DIO writes to unwritten extents.
866+ * ext4_ext_direct_IO() checks this flag and converts extents to
867+ * written.
868+ */
869+ if (!ret && buffer_unwritten (bh_result ))
870+ ext4_set_inode_state (inode , EXT4_STATE_DIO_UNWRITTEN );
871+
872+ return ret ;
873+ }
874+
831875static int ext4_dio_get_block_overwrite (struct inode * inode , sector_t iblock ,
832876 struct buffer_head * bh_result , int create )
833877{
@@ -3243,18 +3287,16 @@ int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
32433287static void ext4_end_io_dio (struct kiocb * iocb , loff_t offset ,
32443288 ssize_t size , void * private )
32453289{
3246- ext4_io_end_t * io_end = iocb -> private ;
3290+ ext4_io_end_t * io_end = private ;
32473291
32483292 /* if not async direct IO just return */
32493293 if (!io_end )
32503294 return ;
32513295
32523296 ext_debug ("ext4_end_io_dio(): io_end 0x%p "
32533297 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n" ,
3254- iocb -> private , io_end -> inode -> i_ino , iocb , offset ,
3255- size );
3298+ io_end , io_end -> inode -> i_ino , iocb , offset , size );
32563299
3257- iocb -> private = NULL ;
32583300 io_end -> offset = offset ;
32593301 io_end -> size = size ;
32603302 ext4_put_io_end (io_end );
@@ -3290,7 +3332,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
32903332 get_block_t * get_block_func = NULL ;
32913333 int dio_flags = 0 ;
32923334 loff_t final_size = offset + count ;
3293- ext4_io_end_t * io_end = NULL ;
32943335
32953336 /* Use the old path for reads and writes beyond i_size. */
32963337 if (iov_iter_rw (iter ) != WRITE || final_size > inode -> i_size )
@@ -3315,47 +3356,31 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
33153356 /*
33163357 * We could direct write to holes and fallocate.
33173358 *
3318- * Allocated blocks to fill the hole are marked as
3319- * unwritten to prevent parallel buffered read to expose
3320- * the stale data before DIO complete the data IO.
3359+ * Allocated blocks to fill the hole are marked as unwritten to prevent
3360+ * parallel buffered read to expose the stale data before DIO complete
3361+ * the data IO.
33213362 *
3322- * As to previously fallocated extents, ext4 get_block will
3323- * just simply mark the buffer mapped but still keep the
3324- * extents unwritten.
3363+ * As to previously fallocated extents, ext4 get_block will just simply
3364+ * mark the buffer mapped but still keep the extents unwritten.
33253365 *
3326- * For non AIO case, we will convert those unwritten extents
3327- * to written after return back from blockdev_direct_IO.
3366+ * For non AIO case, we will convert those unwritten extents to written
3367+ * after return back from blockdev_direct_IO. That way we save us from
3368+ * allocating io_end structure and also the overhead of offloading
3369+ * the extent convertion to a workqueue.
33283370 *
33293371 * For async DIO, the conversion needs to be deferred when the
33303372 * IO is completed. The ext4 end_io callback function will be
33313373 * called to take care of the conversion work. Here for async
33323374 * case, we allocate an io_end structure to hook to the iocb.
33333375 */
33343376 iocb -> private = NULL ;
3335- if (overwrite ) {
3377+ if (overwrite )
33363378 get_block_func = ext4_dio_get_block_overwrite ;
3379+ else if (is_sync_kiocb (iocb )) {
3380+ get_block_func = ext4_dio_get_block_unwritten_sync ;
3381+ dio_flags = DIO_LOCKING ;
33373382 } else {
3338- ext4_inode_aio_set (inode , NULL );
3339- if (!is_sync_kiocb (iocb )) {
3340- io_end = ext4_init_io_end (inode , GFP_NOFS );
3341- if (!io_end ) {
3342- ret = - ENOMEM ;
3343- goto retake_lock ;
3344- }
3345- /*
3346- * Grab reference for DIO. Will be dropped in
3347- * ext4_end_io_dio()
3348- */
3349- iocb -> private = ext4_get_io_end (io_end );
3350- /*
3351- * we save the io structure for current async direct
3352- * IO, so that later ext4_map_blocks() could flag the
3353- * io structure whether there is a unwritten extents
3354- * needs to be converted when IO is completed.
3355- */
3356- ext4_inode_aio_set (inode , io_end );
3357- }
3358- get_block_func = ext4_dio_get_block_unwritten ;
3383+ get_block_func = ext4_dio_get_block_unwritten_async ;
33593384 dio_flags = DIO_LOCKING ;
33603385 }
33613386#ifdef CONFIG_EXT4_FS_ENCRYPTION
@@ -3370,27 +3395,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
33703395 get_block_func ,
33713396 ext4_end_io_dio , NULL , dio_flags );
33723397
3373- /*
3374- * Put our reference to io_end. This can free the io_end structure e.g.
3375- * in sync IO case or in case of error. It can even perform extent
3376- * conversion if all bios we submitted finished before we got here.
3377- * Note that in that case iocb->private can be already set to NULL
3378- * here.
3379- */
3380- if (io_end ) {
3381- ext4_inode_aio_set (inode , NULL );
3382- ext4_put_io_end (io_end );
3383- /*
3384- * When no IO was submitted ext4_end_io_dio() was not
3385- * called so we have to put iocb's reference.
3386- */
3387- if (ret <= 0 && ret != - EIOCBQUEUED && iocb -> private ) {
3388- WARN_ON (iocb -> private != io_end );
3389- WARN_ON (io_end -> flag & EXT4_IO_END_UNWRITTEN );
3390- ext4_put_io_end (io_end );
3391- iocb -> private = NULL ;
3392- }
3393- }
33943398 if (ret > 0 && !overwrite && ext4_test_inode_state (inode ,
33953399 EXT4_STATE_DIO_UNWRITTEN )) {
33963400 int err ;
@@ -3405,7 +3409,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
34053409 ext4_clear_inode_state (inode , EXT4_STATE_DIO_UNWRITTEN );
34063410 }
34073411
3408- retake_lock :
34093412 if (iov_iter_rw (iter ) == WRITE )
34103413 inode_dio_end (inode );
34113414 /* take i_mutex locking again if we do a ovewrite dio */
0 commit comments