2020#include <linux/mman.h>
2121#include <linux/sched/mm.h>
2222#include <linux/crc32.h>
23+ #include <linux/task_io_accounting_ops.h>
2324
2425#include "zonefs.h"
2526
@@ -596,6 +597,61 @@ static const struct iomap_dio_ops zonefs_write_dio_ops = {
596597 .end_io = zonefs_file_write_dio_end_io ,
597598};
598599
600+ static ssize_t zonefs_file_dio_append (struct kiocb * iocb , struct iov_iter * from )
601+ {
602+ struct inode * inode = file_inode (iocb -> ki_filp );
603+ struct zonefs_inode_info * zi = ZONEFS_I (inode );
604+ struct block_device * bdev = inode -> i_sb -> s_bdev ;
605+ unsigned int max ;
606+ struct bio * bio ;
607+ ssize_t size ;
608+ int nr_pages ;
609+ ssize_t ret ;
610+
611+ nr_pages = iov_iter_npages (from , BIO_MAX_PAGES );
612+ if (!nr_pages )
613+ return 0 ;
614+
615+ max = queue_max_zone_append_sectors (bdev_get_queue (bdev ));
616+ max = ALIGN_DOWN (max << SECTOR_SHIFT , inode -> i_sb -> s_blocksize );
617+ iov_iter_truncate (from , max );
618+
619+ bio = bio_alloc_bioset (GFP_NOFS , nr_pages , & fs_bio_set );
620+ if (!bio )
621+ return - ENOMEM ;
622+
623+ bio_set_dev (bio , bdev );
624+ bio -> bi_iter .bi_sector = zi -> i_zsector ;
625+ bio -> bi_write_hint = iocb -> ki_hint ;
626+ bio -> bi_ioprio = iocb -> ki_ioprio ;
627+ bio -> bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE ;
628+ if (iocb -> ki_flags & IOCB_DSYNC )
629+ bio -> bi_opf |= REQ_FUA ;
630+
631+ ret = bio_iov_iter_get_pages (bio , from );
632+ if (unlikely (ret )) {
633+ bio_io_error (bio );
634+ return ret ;
635+ }
636+ size = bio -> bi_iter .bi_size ;
637+ task_io_account_write (ret );
638+
639+ if (iocb -> ki_flags & IOCB_HIPRI )
640+ bio_set_polled (bio , iocb );
641+
642+ ret = submit_bio_wait (bio );
643+
644+ bio_put (bio );
645+
646+ zonefs_file_write_dio_end_io (iocb , size , ret , 0 );
647+ if (ret >= 0 ) {
648+ iocb -> ki_pos += size ;
649+ return size ;
650+ }
651+
652+ return ret ;
653+ }
654+
599655/*
600656 * Handle direct writes. For sequential zone files, this is the only possible
601657 * write path. For these files, check that the user is issuing writes
@@ -611,6 +667,8 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
611667 struct inode * inode = file_inode (iocb -> ki_filp );
612668 struct zonefs_inode_info * zi = ZONEFS_I (inode );
613669 struct super_block * sb = inode -> i_sb ;
670+ bool sync = is_sync_kiocb (iocb );
671+ bool append = false;
614672 size_t count ;
615673 ssize_t ret ;
616674
@@ -619,7 +677,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
619677 * as this can cause write reordering (e.g. the first aio gets EAGAIN
620678 * on the inode lock but the second goes through but is now unaligned).
621679 */
622- if (zi -> i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb ( iocb ) &&
680+ if (zi -> i_ztype == ZONEFS_ZTYPE_SEQ && !sync &&
623681 (iocb -> ki_flags & IOCB_NOWAIT ))
624682 return - EOPNOTSUPP ;
625683
@@ -643,16 +701,22 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
643701 }
644702
645703 /* Enforce sequential writes (append only) in sequential zones */
646- mutex_lock (& zi -> i_truncate_mutex );
647- if (zi -> i_ztype == ZONEFS_ZTYPE_SEQ && iocb -> ki_pos != zi -> i_wpoffset ) {
704+ if (zi -> i_ztype == ZONEFS_ZTYPE_SEQ ) {
705+ mutex_lock (& zi -> i_truncate_mutex );
706+ if (iocb -> ki_pos != zi -> i_wpoffset ) {
707+ mutex_unlock (& zi -> i_truncate_mutex );
708+ ret = - EINVAL ;
709+ goto inode_unlock ;
710+ }
648711 mutex_unlock (& zi -> i_truncate_mutex );
649- ret = - EINVAL ;
650- goto inode_unlock ;
712+ append = sync ;
651713 }
652- mutex_unlock (& zi -> i_truncate_mutex );
653714
654- ret = iomap_dio_rw (iocb , from , & zonefs_iomap_ops ,
655- & zonefs_write_dio_ops , is_sync_kiocb (iocb ));
715+ if (append )
716+ ret = zonefs_file_dio_append (iocb , from );
717+ else
718+ ret = iomap_dio_rw (iocb , from , & zonefs_iomap_ops ,
719+ & zonefs_write_dio_ops , sync );
656720 if (zi -> i_ztype == ZONEFS_ZTYPE_SEQ &&
657721 (ret > 0 || ret == - EIOCBQUEUED )) {
658722 if (ret > 0 )
0 commit comments