2424
2525#include "zonefs.h"
2626
27+ static inline int zonefs_zone_mgmt (struct inode * inode ,
28+ enum req_opf op )
29+ {
30+ struct zonefs_inode_info * zi = ZONEFS_I (inode );
31+ int ret ;
32+
33+ lockdep_assert_held (& zi -> i_truncate_mutex );
34+
35+ ret = blkdev_zone_mgmt (inode -> i_sb -> s_bdev , op , zi -> i_zsector ,
36+ zi -> i_zone_size >> SECTOR_SHIFT , GFP_NOFS );
37+ if (ret ) {
38+ zonefs_err (inode -> i_sb ,
39+ "Zone management operation %s at %llu failed %d\n" ,
40+ blk_op_str (op ), zi -> i_zsector , ret );
41+ return ret ;
42+ }
43+
44+ return 0 ;
45+ }
46+
47+ static inline void zonefs_i_size_write (struct inode * inode , loff_t isize )
48+ {
49+ struct zonefs_inode_info * zi = ZONEFS_I (inode );
50+
51+ i_size_write (inode , isize );
52+ /*
53+ * A full zone is no longer open/active and does not need
54+ * explicit closing.
55+ */
56+ if (isize >= zi -> i_max_size )
57+ zi -> i_flags &= ~ZONEFS_ZONE_OPEN ;
58+ }
59+
2760static int zonefs_iomap_begin (struct inode * inode , loff_t offset , loff_t length ,
2861 unsigned int flags , struct iomap * iomap ,
2962 struct iomap * srcmap )
@@ -301,6 +334,17 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
301334 }
302335 }
303336
337+ /*
338+ * If the filesystem is mounted with the explicit-open mount option, we
339+ * need to clear the ZONEFS_ZONE_OPEN flag if the zone transitioned to
340+ * the read-only or offline condition, to avoid attempting an explicit
341+ * close of the zone when the inode file is closed.
342+ */
343+ if ((sbi -> s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN ) &&
344+ (zone -> cond == BLK_ZONE_COND_OFFLINE ||
345+ zone -> cond == BLK_ZONE_COND_READONLY ))
346+ zi -> i_flags &= ~ZONEFS_ZONE_OPEN ;
347+
304348 /*
305349 * If error=remount-ro was specified, any error result in remounting
306350 * the volume as read-only.
@@ -315,7 +359,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
315359 * invalid data.
316360 */
317361 zonefs_update_stats (inode , data_size );
318- i_size_write (inode , data_size );
362+ zonefs_i_size_write (inode , data_size );
319363 zi -> i_wpoffset = data_size ;
320364
321365 return 0 ;
@@ -328,7 +372,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
328372 * eventually correct the file size and zonefs inode write pointer offset
329373 * (which can be out of sync with the drive due to partial write failures).
330374 */
331- static void zonefs_io_error (struct inode * inode , bool write )
375+ static void __zonefs_io_error (struct inode * inode , bool write )
332376{
333377 struct zonefs_inode_info * zi = ZONEFS_I (inode );
334378 struct super_block * sb = inode -> i_sb ;
@@ -342,8 +386,6 @@ static void zonefs_io_error(struct inode *inode, bool write)
342386 };
343387 int ret ;
344388
345- mutex_lock (& zi -> i_truncate_mutex );
346-
347389 /*
348390 * Memory allocations in blkdev_report_zones() can trigger a memory
349391 * reclaim which may in turn cause a recursion into zonefs as well as
@@ -359,7 +401,14 @@ static void zonefs_io_error(struct inode *inode, bool write)
359401 zonefs_err (sb , "Get inode %lu zone information failed %d\n" ,
360402 inode -> i_ino , ret );
361403 memalloc_noio_restore (noio_flag );
404+ }
362405
406+ static void zonefs_io_error (struct inode * inode , bool write )
407+ {
408+ struct zonefs_inode_info * zi = ZONEFS_I (inode );
409+
410+ mutex_lock (& zi -> i_truncate_mutex );
411+ __zonefs_io_error (inode , write );
363412 mutex_unlock (& zi -> i_truncate_mutex );
364413}
365414
@@ -397,13 +446,27 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize)
397446 if (isize == old_isize )
398447 goto unlock ;
399448
400- ret = blkdev_zone_mgmt (inode -> i_sb -> s_bdev , op , zi -> i_zsector ,
401- zi -> i_zone_size >> SECTOR_SHIFT , GFP_NOFS );
402- if (ret ) {
403- zonefs_err (inode -> i_sb ,
404- "Zone management operation at %llu failed %d" ,
405- zi -> i_zsector , ret );
449+ ret = zonefs_zone_mgmt (inode , op );
450+ if (ret )
406451 goto unlock ;
452+
453+ /*
454+ * If the mount option ZONEFS_MNTOPT_EXPLICIT_OPEN is set,
455+ * take care of open zones.
456+ */
457+ if (zi -> i_flags & ZONEFS_ZONE_OPEN ) {
458+ /*
459+ * Truncating a zone to EMPTY or FULL is the equivalent of
460+ * closing the zone. For a truncation to 0, we need to
461+ * re-open the zone to ensure new writes can be processed.
462+ * For a truncation to the maximum file size, the zone is
463+ * closed and writes cannot be accepted anymore, so clear
464+ * the open flag.
465+ */
466+ if (!isize )
467+ ret = zonefs_zone_mgmt (inode , REQ_OP_ZONE_OPEN );
468+ else
469+ zi -> i_flags &= ~ZONEFS_ZONE_OPEN ;
407470 }
408471
409472 zonefs_update_stats (inode , isize );
@@ -584,7 +647,7 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
584647 mutex_lock (& zi -> i_truncate_mutex );
585648 if (i_size_read (inode ) < iocb -> ki_pos + size ) {
586649 zonefs_update_stats (inode , iocb -> ki_pos + size );
587- i_size_write (inode , iocb -> ki_pos + size );
650+ zonefs_i_size_write (inode , iocb -> ki_pos + size );
588651 }
589652 mutex_unlock (& zi -> i_truncate_mutex );
590653 }
@@ -865,8 +928,128 @@ static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
865928 return ret ;
866929}
867930
931+ static inline bool zonefs_file_use_exp_open (struct inode * inode , struct file * file )
932+ {
933+ struct zonefs_inode_info * zi = ZONEFS_I (inode );
934+ struct zonefs_sb_info * sbi = ZONEFS_SB (inode -> i_sb );
935+
936+ if (!(sbi -> s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN ))
937+ return false;
938+
939+ if (zi -> i_ztype != ZONEFS_ZTYPE_SEQ )
940+ return false;
941+
942+ if (!(file -> f_mode & FMODE_WRITE ))
943+ return false;
944+
945+ return true;
946+ }
947+
948+ static int zonefs_open_zone (struct inode * inode )
949+ {
950+ struct zonefs_inode_info * zi = ZONEFS_I (inode );
951+ struct zonefs_sb_info * sbi = ZONEFS_SB (inode -> i_sb );
952+ int ret = 0 ;
953+
954+ mutex_lock (& zi -> i_truncate_mutex );
955+
956+ zi -> i_wr_refcnt ++ ;
957+ if (zi -> i_wr_refcnt == 1 ) {
958+
959+ if (atomic_inc_return (& sbi -> s_open_zones ) > sbi -> s_max_open_zones ) {
960+ atomic_dec (& sbi -> s_open_zones );
961+ ret = - EBUSY ;
962+ goto unlock ;
963+ }
964+
965+ if (i_size_read (inode ) < zi -> i_max_size ) {
966+ ret = zonefs_zone_mgmt (inode , REQ_OP_ZONE_OPEN );
967+ if (ret ) {
968+ zi -> i_wr_refcnt -- ;
969+ atomic_dec (& sbi -> s_open_zones );
970+ goto unlock ;
971+ }
972+ zi -> i_flags |= ZONEFS_ZONE_OPEN ;
973+ }
974+ }
975+
976+ unlock :
977+ mutex_unlock (& zi -> i_truncate_mutex );
978+
979+ return ret ;
980+ }
981+
982+ static int zonefs_file_open (struct inode * inode , struct file * file )
983+ {
984+ int ret ;
985+
986+ ret = generic_file_open (inode , file );
987+ if (ret )
988+ return ret ;
989+
990+ if (zonefs_file_use_exp_open (inode , file ))
991+ return zonefs_open_zone (inode );
992+
993+ return 0 ;
994+ }
995+
996+ static void zonefs_close_zone (struct inode * inode )
997+ {
998+ struct zonefs_inode_info * zi = ZONEFS_I (inode );
999+ int ret = 0 ;
1000+
1001+ mutex_lock (& zi -> i_truncate_mutex );
1002+ zi -> i_wr_refcnt -- ;
1003+ if (!zi -> i_wr_refcnt ) {
1004+ struct zonefs_sb_info * sbi = ZONEFS_SB (inode -> i_sb );
1005+ struct super_block * sb = inode -> i_sb ;
1006+
1007+ /*
1008+ * If the file zone is full, it is not open anymore and we only
1009+ * need to decrement the open count.
1010+ */
1011+ if (!(zi -> i_flags & ZONEFS_ZONE_OPEN ))
1012+ goto dec ;
1013+
1014+ ret = zonefs_zone_mgmt (inode , REQ_OP_ZONE_CLOSE );
1015+ if (ret ) {
1016+ __zonefs_io_error (inode , false);
1017+ /*
1018+ * Leaving zones explicitly open may lead to a state
1019+ * where most zones cannot be written (zone resources
1020+ * exhausted). So take preventive action by remounting
1021+ * read-only.
1022+ */
1023+ if (zi -> i_flags & ZONEFS_ZONE_OPEN &&
1024+ !(sb -> s_flags & SB_RDONLY )) {
1025+ zonefs_warn (sb , "closing zone failed, remounting filesystem read-only\n" );
1026+ sb -> s_flags |= SB_RDONLY ;
1027+ }
1028+ }
1029+ zi -> i_flags &= ~ZONEFS_ZONE_OPEN ;
1030+ dec :
1031+ atomic_dec (& sbi -> s_open_zones );
1032+ }
1033+ mutex_unlock (& zi -> i_truncate_mutex );
1034+ }
1035+
1036+ static int zonefs_file_release (struct inode * inode , struct file * file )
1037+ {
1038+ /*
1039+ * If we explicitly open a zone we must close it again as well, but the
1040+ * zone management operation can fail (either due to an IO error or as
1041+ * the zone has gone offline or read-only). Make sure we don't fail the
1042+ * close(2) for user-space.
1043+ */
1044+ if (zonefs_file_use_exp_open (inode , file ))
1045+ zonefs_close_zone (inode );
1046+
1047+ return 0 ;
1048+ }
1049+
8681050static const struct file_operations zonefs_file_operations = {
869- .open = generic_file_open ,
1051+ .open = zonefs_file_open ,
1052+ .release = zonefs_file_release ,
8701053 .fsync = zonefs_file_fsync ,
8711054 .mmap = zonefs_file_mmap ,
8721055 .llseek = zonefs_file_llseek ,
@@ -890,6 +1073,7 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb)
8901073 inode_init_once (& zi -> i_vnode );
8911074 mutex_init (& zi -> i_truncate_mutex );
8921075 init_rwsem (& zi -> i_mmap_sem );
1076+ zi -> i_wr_refcnt = 0 ;
8931077
8941078 return & zi -> i_vnode ;
8951079}
@@ -940,14 +1124,15 @@ static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
9401124
9411125enum {
9421126 Opt_errors_ro , Opt_errors_zro , Opt_errors_zol , Opt_errors_repair ,
943- Opt_err ,
1127+ Opt_explicit_open , Opt_err ,
9441128};
9451129
9461130static const match_table_t tokens = {
9471131 { Opt_errors_ro , "errors=remount-ro" },
9481132 { Opt_errors_zro , "errors=zone-ro" },
9491133 { Opt_errors_zol , "errors=zone-offline" },
9501134 { Opt_errors_repair , "errors=repair" },
1135+ { Opt_explicit_open , "explicit-open" },
9511136 { Opt_err , NULL }
9521137};
9531138
@@ -984,6 +1169,9 @@ static int zonefs_parse_options(struct super_block *sb, char *options)
9841169 sbi -> s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK ;
9851170 sbi -> s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR ;
9861171 break ;
1172+ case Opt_explicit_open :
1173+ sbi -> s_mount_opts |= ZONEFS_MNTOPT_EXPLICIT_OPEN ;
1174+ break ;
9871175 default :
9881176 return - EINVAL ;
9891177 }
@@ -1403,6 +1591,13 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
14031591 sbi -> s_gid = GLOBAL_ROOT_GID ;
14041592 sbi -> s_perm = 0640 ;
14051593 sbi -> s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO ;
1594+ sbi -> s_max_open_zones = bdev_max_open_zones (sb -> s_bdev );
1595+ atomic_set (& sbi -> s_open_zones , 0 );
1596+ if (!sbi -> s_max_open_zones &&
1597+ sbi -> s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN ) {
1598+ zonefs_info (sb , "No open zones limit. Ignoring explicit_open mount option\n" );
1599+ sbi -> s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN ;
1600+ }
14061601
14071602 ret = zonefs_read_super (sb );
14081603 if (ret )
0 commit comments