Skip to content

Commit 4140913

Browse files
Bo Liu (OpenAnolis)hsiangkao
authored andcommitted
erofs: implement metadata compression
Thanks to the meta buffer infrastructure, metadata-compressed inodes are just read from the metabox inode instead of the blockdevice (or backing file) inode. The same is true for shared extended attributes. When metadata compression is enabled, inode numbers are divided from on-disk NIDs because of non-LTS 32-bit application compatibility. Co-developed-by: Gao Xiang <[email protected]> Signed-off-by: Bo Liu (OpenAnolis) <[email protected]> Acked-by: Chao Yu <[email protected]> Signed-off-by: Gao Xiang <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 681acbd commit 4140913

File tree

14 files changed

+136
-59
lines changed

14 files changed

+136
-59
lines changed

Documentation/ABI/testing/sysfs-fs-erofs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Description: Shows all enabled kernel features.
55
Supported features:
66
zero_padding, compr_cfgs, big_pcluster, chunked_file,
77
device_table, compr_head2, sb_chksum, ztailpacking,
8-
dedupe, fragments.
8+
dedupe, fragments, 48bit, metabox.
99

1010
What: /sys/fs/erofs/<disk>/sync_decompress
1111
Date: November 2021

fs/erofs/data.c

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,18 @@ void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap)
4949
return buf->base + (offset & ~PAGE_MASK);
5050
}
5151

52-
void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
52+
int erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb,
53+
bool in_metabox)
5354
{
5455
struct erofs_sb_info *sbi = EROFS_SB(sb);
5556

5657
buf->file = NULL;
58+
if (in_metabox) {
59+
if (unlikely(!sbi->metabox_inode))
60+
return -EFSCORRUPTED;
61+
buf->mapping = sbi->metabox_inode->i_mapping;
62+
return 0;
63+
}
5764
buf->off = sbi->dif0.fsoff;
5865
if (erofs_is_fileio_mode(sbi)) {
5966
buf->file = sbi->dif0.file; /* some fs like FUSE needs it */
@@ -62,12 +69,17 @@ void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
6269
buf->mapping = sbi->dif0.fscache->inode->i_mapping;
6370
else
6471
buf->mapping = sb->s_bdev->bd_mapping;
72+
return 0;
6573
}
6674

6775
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
68-
erofs_off_t offset)
76+
erofs_off_t offset, bool in_metabox)
6977
{
70-
erofs_init_metabuf(buf, sb);
78+
int err;
79+
80+
err = erofs_init_metabuf(buf, sb, in_metabox);
81+
if (err)
82+
return ERR_PTR(err);
7183
return erofs_bread(buf, offset, true);
7284
}
7385

@@ -118,7 +130,7 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
118130
pos = ALIGN(erofs_iloc(inode) + vi->inode_isize +
119131
vi->xattr_isize, unit) + unit * chunknr;
120132

121-
idx = erofs_read_metabuf(&buf, sb, pos);
133+
idx = erofs_read_metabuf(&buf, sb, pos, erofs_inode_in_metabox(inode));
122134
if (IS_ERR(idx)) {
123135
err = PTR_ERR(idx);
124136
goto out;
@@ -264,7 +276,6 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
264276

265277
map.m_la = offset;
266278
map.m_llen = length;
267-
268279
ret = erofs_map_blocks(inode, &map);
269280
if (ret < 0)
270281
return ret;
@@ -273,35 +284,37 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
273284
iomap->length = map.m_llen;
274285
iomap->flags = 0;
275286
iomap->private = NULL;
287+
iomap->addr = IOMAP_NULL_ADDR;
276288
if (!(map.m_flags & EROFS_MAP_MAPPED)) {
277289
iomap->type = IOMAP_HOLE;
278-
iomap->addr = IOMAP_NULL_ADDR;
279290
return 0;
280291
}
281292

282-
mdev = (struct erofs_map_dev) {
283-
.m_deviceid = map.m_deviceid,
284-
.m_pa = map.m_pa,
285-
};
286-
ret = erofs_map_dev(sb, &mdev);
287-
if (ret)
288-
return ret;
289-
290-
if (flags & IOMAP_DAX)
291-
iomap->dax_dev = mdev.m_dif->dax_dev;
292-
else
293-
iomap->bdev = mdev.m_bdev;
294-
295-
iomap->addr = mdev.m_dif->fsoff + mdev.m_pa;
296-
if (flags & IOMAP_DAX)
297-
iomap->addr += mdev.m_dif->dax_part_off;
293+
if (!(map.m_flags & EROFS_MAP_META) || !erofs_inode_in_metabox(inode)) {
294+
mdev = (struct erofs_map_dev) {
295+
.m_deviceid = map.m_deviceid,
296+
.m_pa = map.m_pa,
297+
};
298+
ret = erofs_map_dev(sb, &mdev);
299+
if (ret)
300+
return ret;
301+
302+
if (flags & IOMAP_DAX)
303+
iomap->dax_dev = mdev.m_dif->dax_dev;
304+
else
305+
iomap->bdev = mdev.m_bdev;
306+
iomap->addr = mdev.m_dif->fsoff + mdev.m_pa;
307+
if (flags & IOMAP_DAX)
308+
iomap->addr += mdev.m_dif->dax_part_off;
309+
}
298310

299311
if (map.m_flags & EROFS_MAP_META) {
300312
void *ptr;
301313
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
302314

303315
iomap->type = IOMAP_INLINE;
304-
ptr = erofs_read_metabuf(&buf, sb, mdev.m_pa);
316+
ptr = erofs_read_metabuf(&buf, sb, map.m_pa,
317+
erofs_inode_in_metabox(inode));
305318
if (IS_ERR(ptr))
306319
return PTR_ERR(ptr);
307320
iomap->inline_data = ptr;

fs/erofs/decompressor.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb)
467467
return -EOPNOTSUPP;
468468
}
469469

470-
erofs_init_metabuf(&buf, sb);
470+
(void)erofs_init_metabuf(&buf, sb, false);
471471
offset = EROFS_SUPER_OFFSET + sbi->sb_size;
472472
alg = 0;
473473
for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) {

fs/erofs/dir.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
3434
}
3535

3636
if (!dir_emit(ctx, de_name, de_namelen,
37-
le64_to_cpu(de->nid), d_type))
37+
erofs_nid_to_ino64(EROFS_SB(dir->i_sb),
38+
le64_to_cpu(de->nid)), d_type))
3839
return 1;
3940
++de;
4041
ctx->pos += sizeof(struct erofs_dirent);

fs/erofs/erofs_fs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
#define EROFS_FEATURE_INCOMPAT_48BIT 0x00000080
3535
#define EROFS_FEATURE_INCOMPAT_METABOX 0x00000100
3636
#define EROFS_ALL_FEATURE_INCOMPAT \
37-
((EROFS_FEATURE_INCOMPAT_48BIT << 1) - 1)
37+
((EROFS_FEATURE_INCOMPAT_METABOX << 1) - 1)
3838

3939
#define EROFS_SB_EXTSLOT_SIZE 16
4040

fs/erofs/fileio.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio)
115115
void *src;
116116

117117
src = erofs_read_metabuf(&buf, inode->i_sb,
118-
map->m_pa + ofs);
118+
map->m_pa + ofs, erofs_inode_in_metabox(inode));
119119
if (IS_ERR(src)) {
120120
err = PTR_ERR(src);
121121
break;

fs/erofs/fscache.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,8 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
274274
size_t size = map.m_llen;
275275
void *src;
276276

277-
src = erofs_read_metabuf(&buf, sb, map.m_pa);
277+
src = erofs_read_metabuf(&buf, sb, map.m_pa,
278+
erofs_inode_in_metabox(inode));
278279
if (IS_ERR(src))
279280
return PTR_ERR(src);
280281

fs/erofs/inode.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ static int erofs_read_inode(struct inode *inode)
2929
struct super_block *sb = inode->i_sb;
3030
erofs_blk_t blkaddr = erofs_blknr(sb, erofs_iloc(inode));
3131
unsigned int ofs = erofs_blkoff(sb, erofs_iloc(inode));
32+
bool in_mbox = erofs_inode_in_metabox(inode);
3233
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
3334
struct erofs_sb_info *sbi = EROFS_SB(sb);
3435
erofs_blk_t addrmask = BIT_ULL(48) - 1;
@@ -39,7 +40,7 @@ static int erofs_read_inode(struct inode *inode)
3940
void *ptr;
4041
int err = 0;
4142

42-
ptr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr));
43+
ptr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), in_mbox);
4344
if (IS_ERR(ptr)) {
4445
err = PTR_ERR(ptr);
4546
erofs_err(sb, "failed to read inode meta block (nid: %llu): %d",
@@ -78,7 +79,7 @@ static int erofs_read_inode(struct inode *inode)
7879

7980
memcpy(&copied, dic, gotten);
8081
ptr = erofs_read_metabuf(&buf, sb,
81-
erofs_pos(sb, blkaddr + 1));
82+
erofs_pos(sb, blkaddr + 1), in_mbox);
8283
if (IS_ERR(ptr)) {
8384
err = PTR_ERR(ptr);
8485
erofs_err(sb, "failed to read inode payload block (nid: %llu): %d",
@@ -264,13 +265,13 @@ static int erofs_fill_inode(struct inode *inode)
264265
* ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
265266
* so that it will fit.
266267
*/
267-
static ino_t erofs_squash_ino(erofs_nid_t nid)
268+
static ino_t erofs_squash_ino(struct super_block *sb, erofs_nid_t nid)
268269
{
269-
ino_t ino = (ino_t)nid;
270+
u64 ino64 = erofs_nid_to_ino64(EROFS_SB(sb), nid);
270271

271272
if (sizeof(ino_t) < sizeof(erofs_nid_t))
272-
ino ^= nid >> (sizeof(erofs_nid_t) - sizeof(ino_t)) * 8;
273-
return ino;
273+
ino64 ^= ino64 >> (sizeof(erofs_nid_t) - sizeof(ino_t)) * 8;
274+
return (ino_t)ino64;
274275
}
275276

276277
static int erofs_iget5_eq(struct inode *inode, void *opaque)
@@ -282,7 +283,7 @@ static int erofs_iget5_set(struct inode *inode, void *opaque)
282283
{
283284
const erofs_nid_t nid = *(erofs_nid_t *)opaque;
284285

285-
inode->i_ino = erofs_squash_ino(nid);
286+
inode->i_ino = erofs_squash_ino(inode->i_sb, nid);
286287
EROFS_I(inode)->nid = nid;
287288
return 0;
288289
}
@@ -291,7 +292,7 @@ struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid)
291292
{
292293
struct inode *inode;
293294

294-
inode = iget5_locked(sb, erofs_squash_ino(nid), erofs_iget5_eq,
295+
inode = iget5_locked(sb, erofs_squash_ino(sb, nid), erofs_iget5_eq,
295296
erofs_iget5_set, &nid);
296297
if (!inode)
297298
return ERR_PTR(-ENOMEM);

fs/erofs/internal.h

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ struct erofs_sb_info {
125125
struct erofs_sb_lz4_info lz4;
126126
#endif /* CONFIG_EROFS_FS_ZIP */
127127
struct inode *packed_inode;
128+
struct inode *metabox_inode;
128129
struct erofs_dev_context *devs;
129130
u64 total_blocks;
130131

@@ -148,6 +149,7 @@ struct erofs_sb_info {
148149
/* what we really care is nid, rather than ino.. */
149150
erofs_nid_t root_nid;
150151
erofs_nid_t packed_nid;
152+
erofs_nid_t metabox_nid;
151153
/* used for statfs, f_files - f_favail */
152154
u64 inos;
153155

@@ -232,6 +234,23 @@ EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
232234
EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER)
233235
EROFS_FEATURE_FUNCS(shared_ea_in_metabox, compat, COMPAT_SHARED_EA_IN_METABOX)
234236

237+
static inline u64 erofs_nid_to_ino64(struct erofs_sb_info *sbi, erofs_nid_t nid)
238+
{
239+
if (!erofs_sb_has_metabox(sbi))
240+
return nid;
241+
242+
/*
243+
* When metadata compression is enabled, avoid generating excessively
244+
* large inode numbers for metadata-compressed inodes. Shift NIDs in
245+
* the 31-62 bit range left by one and move the metabox flag to bit 31.
246+
*
247+
* Note: on-disk NIDs remain unchanged as they are primarily used for
248+
* compatibility with non-LFS 32-bit applications.
249+
*/
250+
return ((nid << 1) & GENMASK_ULL(63, 32)) | (nid & GENMASK(30, 0)) |
251+
((nid >> EROFS_DIRENT_NID_METABOX_BIT) << 31);
252+
}
253+
235254
/* atomic flag definitions */
236255
#define EROFS_I_EA_INITED_BIT 0
237256
#define EROFS_I_Z_INITED_BIT 1
@@ -281,12 +300,20 @@ struct erofs_inode {
281300

282301
#define EROFS_I(ptr) container_of(ptr, struct erofs_inode, vfs_inode)
283302

303+
static inline bool erofs_inode_in_metabox(struct inode *inode)
304+
{
305+
return EROFS_I(inode)->nid & BIT_ULL(EROFS_DIRENT_NID_METABOX_BIT);
306+
}
307+
284308
static inline erofs_off_t erofs_iloc(struct inode *inode)
285309
{
286310
struct erofs_sb_info *sbi = EROFS_I_SB(inode);
311+
erofs_nid_t nid_lo = EROFS_I(inode)->nid & EROFS_DIRENT_NID_MASK;
287312

313+
if (erofs_inode_in_metabox(inode))
314+
return nid_lo << sbi->islotbits;
288315
return erofs_pos(inode->i_sb, sbi->meta_blkaddr) +
289-
(EROFS_I(inode)->nid << sbi->islotbits);
316+
(nid_lo << sbi->islotbits);
290317
}
291318

292319
static inline unsigned int erofs_inode_version(unsigned int ifmt)
@@ -385,9 +412,10 @@ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf,
385412
void erofs_unmap_metabuf(struct erofs_buf *buf);
386413
void erofs_put_metabuf(struct erofs_buf *buf);
387414
void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap);
388-
void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb);
415+
int erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb,
416+
bool in_metabox);
389417
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
390-
erofs_off_t offset);
418+
erofs_off_t offset, bool in_metabox);
391419
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev);
392420
int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
393421
u64 start, u64 len);

fs/erofs/super.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
141141
struct erofs_deviceslot *dis;
142142
struct file *file;
143143

144-
dis = erofs_read_metabuf(buf, sb, *pos);
144+
dis = erofs_read_metabuf(buf, sb, *pos, false);
145145
if (IS_ERR(dis))
146146
return PTR_ERR(dis);
147147

@@ -258,7 +258,7 @@ static int erofs_read_superblock(struct super_block *sb)
258258
void *data;
259259
int ret;
260260

261-
data = erofs_read_metabuf(&buf, sb, 0);
261+
data = erofs_read_metabuf(&buf, sb, 0, false);
262262
if (IS_ERR(data)) {
263263
erofs_err(sb, "cannot read erofs superblock");
264264
return PTR_ERR(data);
@@ -319,6 +319,14 @@ static int erofs_read_superblock(struct super_block *sb)
319319
sbi->root_nid = le16_to_cpu(dsb->rb.rootnid_2b);
320320
}
321321
sbi->packed_nid = le64_to_cpu(dsb->packed_nid);
322+
if (erofs_sb_has_metabox(sbi)) {
323+
if (sbi->sb_size <= offsetof(struct erofs_super_block,
324+
metabox_nid))
325+
return -EFSCORRUPTED;
326+
sbi->metabox_nid = le64_to_cpu(dsb->metabox_nid);
327+
if (sbi->metabox_nid & BIT_ULL(EROFS_DIRENT_NID_METABOX_BIT))
328+
return -EFSCORRUPTED; /* self-loop detection */
329+
}
322330
sbi->inos = le64_to_cpu(dsb->inos);
323331

324332
sbi->epoch = (s64)le64_to_cpu(dsb->epoch);
@@ -335,6 +343,8 @@ static int erofs_read_superblock(struct super_block *sb)
335343

336344
if (erofs_sb_has_48bit(sbi))
337345
erofs_info(sb, "EXPERIMENTAL 48-bit layout support in use. Use at your own risk!");
346+
if (erofs_sb_has_metabox(sbi))
347+
erofs_info(sb, "EXPERIMENTAL metadata compression support in use. Use at your own risk!");
338348
if (erofs_is_fscache_mode(sb))
339349
erofs_info(sb, "[deprecated] fscache-based on-demand read feature in use. Use at your own risk!");
340350
out:
@@ -690,6 +700,12 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
690700
return PTR_ERR(inode);
691701
sbi->packed_inode = inode;
692702
}
703+
if (erofs_sb_has_metabox(sbi)) {
704+
inode = erofs_iget(sb, sbi->metabox_nid);
705+
if (IS_ERR(inode))
706+
return PTR_ERR(inode);
707+
sbi->metabox_inode = inode;
708+
}
693709

694710
inode = erofs_iget(sb, sbi->root_nid);
695711
if (IS_ERR(inode))
@@ -845,6 +861,8 @@ static void erofs_drop_internal_inodes(struct erofs_sb_info *sbi)
845861
{
846862
iput(sbi->packed_inode);
847863
sbi->packed_inode = NULL;
864+
iput(sbi->metabox_inode);
865+
sbi->metabox_inode = NULL;
848866
#ifdef CONFIG_EROFS_FS_ZIP
849867
iput(sbi->managed_cache);
850868
sbi->managed_cache = NULL;

0 commit comments

Comments
 (0)