Skip to content

Commit e9306ad

Browse files
adam900710kdave
authored andcommitted
btrfs: more graceful errors/warnings on 32bit systems when reaching limits
Btrfs uses internally mapped u64 address space for all its metadata. Due to the page cache limit on 32bit systems, btrfs can't access metadata at or beyond (ULONG_MAX + 1) << PAGE_SHIFT. See how MAX_LFS_FILESIZE and page::index are defined. This is 16T for 4K page size while 256T for 64K page size. Users can have a filesystem which doesn't have metadata beyond the boundary at mount time, but later balance can cause it to create metadata beyond the boundary. And modification to MM layer is unrealistic just for such minor use case. We can't do more than to prevent mounting such filesystem or warn early when the numbers are still within the limits. To address such problem, this patch will introduce the following checks: - Mount time rejection This will reject any fs which has metadata chunk at or beyond the boundary. - Mount time early warning If there is any metadata chunk beyond 5/8th of the boundary, we do an early warning and hope the end user will see it. - Runtime extent buffer rejection If we're going to allocate an extent buffer at or beyond the boundary, reject such request with EOVERFLOW. This is definitely going to cause problems like transaction abort, but we have no better ways. - Runtime extent buffer early warning If an extent buffer beyond 5/8th of the max file size is allocated, do an early warning. Above error/warning message will only be printed once for each fs to reduce dmesg flood. If the mount is rejected, the filesystem will be mountable only on a 64bit host. Link: https://lore.kernel.org/linux-btrfs/[email protected]/ Reported-by: Erik Jensen <[email protected]> Reviewed-by: Josef Bacik <[email protected]> Signed-off-by: Qu Wenruo <[email protected]> Reviewed-by: David Sterba <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 0dc16ef commit e9306ad

File tree

4 files changed

+107
-2
lines changed

4 files changed

+107
-2
lines changed

fs/btrfs/ctree.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,12 @@ enum {
585585

586586
/* Indicate whether there are any tree modification log users */
587587
BTRFS_FS_TREE_MOD_LOG_USERS,
588+
589+
#if BITS_PER_LONG == 32
590+
/* Indicate if we have error/warn message printed on 32bit systems */
591+
BTRFS_FS_32BIT_ERROR,
592+
BTRFS_FS_32BIT_WARN,
593+
#endif
588594
};
589595

590596
/*
@@ -3412,6 +3418,19 @@ static inline void assertfail(const char *expr, const char* file, int line) { }
34123418
#define ASSERT(expr) (void)(expr)
34133419
#endif
34143420

3421+
#if BITS_PER_LONG == 32
3422+
#define BTRFS_32BIT_MAX_FILE_SIZE (((u64)ULONG_MAX + 1) << PAGE_SHIFT)
3423+
/*
3424+
* The warning threshold is 5/8th of the MAX_LFS_FILESIZE that limits the logical
3425+
* addresses of extents.
3426+
*
3427+
* For 4K page size it's about 10T, for 64K it's 160T.
3428+
*/
3429+
#define BTRFS_32BIT_EARLY_WARN_THRESHOLD (BTRFS_32BIT_MAX_FILE_SIZE * 5 / 8)
3430+
void btrfs_warn_32bit_limit(struct btrfs_fs_info *fs_info);
3431+
void btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info);
3432+
#endif
3433+
34153434
/*
34163435
* Get the correct offset inside the page of extent buffer.
34173436
*

fs/btrfs/extent_io.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5821,6 +5821,17 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
58215821
return ERR_PTR(-EINVAL);
58225822
}
58235823

5824+
#if BITS_PER_LONG == 32
5825+
if (start >= MAX_LFS_FILESIZE) {
5826+
btrfs_err_rl(fs_info,
5827+
"extent buffer %llu is beyond 32bit page cache limit", start);
5828+
btrfs_err_32bit_limit(fs_info);
5829+
return ERR_PTR(-EOVERFLOW);
5830+
}
5831+
if (start >= BTRFS_32BIT_EARLY_WARN_THRESHOLD)
5832+
btrfs_warn_32bit_limit(fs_info);
5833+
#endif
5834+
58245835
if (fs_info->sectorsize < PAGE_SIZE &&
58255836
offset_in_page(start) + len > PAGE_SIZE) {
58265837
btrfs_err(fs_info,

fs/btrfs/super.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,32 @@ void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
252252
}
253253
#endif
254254

255+
#if BITS_PER_LONG == 32
256+
void __cold btrfs_warn_32bit_limit(struct btrfs_fs_info *fs_info)
257+
{
258+
if (!test_and_set_bit(BTRFS_FS_32BIT_WARN, &fs_info->flags)) {
259+
btrfs_warn(fs_info, "reaching 32bit limit for logical addresses");
260+
btrfs_warn(fs_info,
261+
"due to page cache limit on 32bit systems, btrfs can't access metadata at or beyond %lluT",
262+
BTRFS_32BIT_MAX_FILE_SIZE >> 40);
263+
btrfs_warn(fs_info,
264+
"please consider upgrading to 64bit kernel/hardware");
265+
}
266+
}
267+
268+
void __cold btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info)
269+
{
270+
if (!test_and_set_bit(BTRFS_FS_32BIT_ERROR, &fs_info->flags)) {
271+
btrfs_err(fs_info, "reached 32bit limit for logical addresses");
272+
btrfs_err(fs_info,
273+
"due to page cache limit on 32bit systems, metadata beyond %lluT can't be accessed",
274+
BTRFS_32BIT_MAX_FILE_SIZE >> 40);
275+
btrfs_err(fs_info,
276+
"please consider upgrading to 64bit kernel/hardware");
277+
}
278+
}
279+
#endif
280+
255281
/*
256282
* We only mark the transaction aborted and then set the file system read-only.
257283
* This will prevent new transactions from starting or trying to join this

fs/btrfs/volumes.c

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6789,6 +6789,46 @@ static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
67896789
return div_u64(chunk_len, data_stripes);
67906790
}
67916791

6792+
#if BITS_PER_LONG == 32
6793+
/*
6794+
* Due to page cache limit, metadata beyond BTRFS_32BIT_MAX_FILE_SIZE
6795+
* can't be accessed on 32bit systems.
6796+
*
6797+
* This function do mount time check to reject the fs if it already has
6798+
* metadata chunk beyond that limit.
6799+
*/
6800+
static int check_32bit_meta_chunk(struct btrfs_fs_info *fs_info,
6801+
u64 logical, u64 length, u64 type)
6802+
{
6803+
if (!(type & BTRFS_BLOCK_GROUP_METADATA))
6804+
return 0;
6805+
6806+
if (logical + length < MAX_LFS_FILESIZE)
6807+
return 0;
6808+
6809+
btrfs_err_32bit_limit(fs_info);
6810+
return -EOVERFLOW;
6811+
}
6812+
6813+
/*
6814+
* This is to give early warning for any metadata chunk reaching
6815+
* BTRFS_32BIT_EARLY_WARN_THRESHOLD.
6816+
* Although we can still access the metadata, it's not going to be possible
6817+
* once the limit is reached.
6818+
*/
6819+
static void warn_32bit_meta_chunk(struct btrfs_fs_info *fs_info,
6820+
u64 logical, u64 length, u64 type)
6821+
{
6822+
if (!(type & BTRFS_BLOCK_GROUP_METADATA))
6823+
return;
6824+
6825+
if (logical + length < BTRFS_32BIT_EARLY_WARN_THRESHOLD)
6826+
return;
6827+
6828+
btrfs_warn_32bit_limit(fs_info);
6829+
}
6830+
#endif
6831+
67926832
static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
67936833
struct btrfs_chunk *chunk)
67946834
{
@@ -6799,15 +6839,24 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
67996839
u64 logical;
68006840
u64 length;
68016841
u64 devid;
6842+
u64 type;
68026843
u8 uuid[BTRFS_UUID_SIZE];
68036844
int num_stripes;
68046845
int ret;
68056846
int i;
68066847

68076848
logical = key->offset;
68086849
length = btrfs_chunk_length(leaf, chunk);
6850+
type = btrfs_chunk_type(leaf, chunk);
68096851
num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
68106852

6853+
#if BITS_PER_LONG == 32
6854+
ret = check_32bit_meta_chunk(fs_info, logical, length, type);
6855+
if (ret < 0)
6856+
return ret;
6857+
warn_32bit_meta_chunk(fs_info, logical, length, type);
6858+
#endif
6859+
68116860
/*
68126861
* Only need to verify chunk item if we're reading from sys chunk array,
68136862
* as chunk item in tree block is already verified by tree-checker.
@@ -6851,10 +6900,10 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
68516900
map->io_width = btrfs_chunk_io_width(leaf, chunk);
68526901
map->io_align = btrfs_chunk_io_align(leaf, chunk);
68536902
map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
6854-
map->type = btrfs_chunk_type(leaf, chunk);
6903+
map->type = type;
68556904
map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
68566905
map->verified_stripes = 0;
6857-
em->orig_block_len = calc_stripe_length(map->type, em->len,
6906+
em->orig_block_len = calc_stripe_length(type, em->len,
68586907
map->num_stripes);
68596908
for (i = 0; i < num_stripes; i++) {
68606909
map->stripes[i].physical =

0 commit comments

Comments
 (0)