@@ -429,6 +429,7 @@ static int caching_kthread(void *data)
429
429
430
430
static int cache_block_group (struct btrfs_block_group_cache * cache ,
431
431
struct btrfs_trans_handle * trans ,
432
+ struct btrfs_root * root ,
432
433
int load_cache_only )
433
434
{
434
435
struct btrfs_fs_info * fs_info = cache -> fs_info ;
@@ -442,9 +443,12 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
442
443
443
444
/*
444
445
* We can't do the read from on-disk cache during a commit since we need
445
- * to have the normal tree locking.
446
+ * to have the normal tree locking. Also if we are currently trying to
447
+ * allocate blocks for the tree root we can't do the fast caching since
448
+ * we likely hold important locks.
446
449
*/
447
- if (!trans -> transaction -> in_commit ) {
450
+ if (!trans -> transaction -> in_commit &&
451
+ (root && root != root -> fs_info -> tree_root )) {
448
452
spin_lock (& cache -> lock );
449
453
if (cache -> cached != BTRFS_CACHE_NO ) {
450
454
spin_unlock (& cache -> lock );
@@ -2741,6 +2745,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
2741
2745
struct btrfs_root * root = block_group -> fs_info -> tree_root ;
2742
2746
struct inode * inode = NULL ;
2743
2747
u64 alloc_hint = 0 ;
2748
+ int dcs = BTRFS_DC_ERROR ;
2744
2749
int num_pages = 0 ;
2745
2750
int retries = 0 ;
2746
2751
int ret = 0 ;
@@ -2795,6 +2800,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
2795
2800
2796
2801
spin_lock (& block_group -> lock );
2797
2802
if (block_group -> cached != BTRFS_CACHE_FINISHED ) {
2803
+ /* We're not cached, don't bother trying to write stuff out */
2804
+ dcs = BTRFS_DC_WRITTEN ;
2798
2805
spin_unlock (& block_group -> lock );
2799
2806
goto out_put ;
2800
2807
}
@@ -2821,17 +2828,16 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
2821
2828
ret = btrfs_prealloc_file_range_trans (inode , trans , 0 , 0 , num_pages ,
2822
2829
num_pages , num_pages ,
2823
2830
& alloc_hint );
2831
+ if (!ret )
2832
+ dcs = BTRFS_DC_SETUP ;
2824
2833
btrfs_free_reserved_data_space (inode , num_pages );
2825
2834
out_put :
2826
2835
iput (inode );
2827
2836
out_free :
2828
2837
btrfs_release_path (root , path );
2829
2838
out :
2830
2839
spin_lock (& block_group -> lock );
2831
- if (ret )
2832
- block_group -> disk_cache_state = BTRFS_DC_ERROR ;
2833
- else
2834
- block_group -> disk_cache_state = BTRFS_DC_SETUP ;
2840
+ block_group -> disk_cache_state = dcs ;
2835
2841
spin_unlock (& block_group -> lock );
2836
2842
2837
2843
return ret ;
@@ -3037,7 +3043,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3037
3043
3038
3044
u64 btrfs_reduce_alloc_profile (struct btrfs_root * root , u64 flags )
3039
3045
{
3040
- u64 num_devices = root -> fs_info -> fs_devices -> rw_devices ;
3046
+ /*
3047
+ * we add in the count of missing devices because we want
3048
+ * to make sure that any RAID levels on a degraded FS
3049
+ * continue to be honored.
3050
+ */
3051
+ u64 num_devices = root -> fs_info -> fs_devices -> rw_devices +
3052
+ root -> fs_info -> fs_devices -> missing_devices ;
3041
3053
3042
3054
if (num_devices == 1 )
3043
3055
flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0 );
@@ -4080,7 +4092,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4080
4092
* space back to the block group, otherwise we will leak space.
4081
4093
*/
4082
4094
if (!alloc && cache -> cached == BTRFS_CACHE_NO )
4083
- cache_block_group (cache , trans , 1 );
4095
+ cache_block_group (cache , trans , NULL , 1 );
4084
4096
4085
4097
byte_in_group = bytenr - cache -> key .objectid ;
4086
4098
WARN_ON (byte_in_group > cache -> key .offset );
@@ -4930,11 +4942,31 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4930
4942
btrfs_get_block_group (block_group );
4931
4943
search_start = block_group -> key .objectid ;
4932
4944
4945
+ /*
4946
+ * this can happen if we end up cycling through all the
4947
+ * raid types, but we want to make sure we only allocate
4948
+ * for the proper type.
4949
+ */
4950
+ if (!block_group_bits (block_group , data )) {
4951
+ u64 extra = BTRFS_BLOCK_GROUP_DUP |
4952
+ BTRFS_BLOCK_GROUP_RAID1 |
4953
+ BTRFS_BLOCK_GROUP_RAID10 ;
4954
+
4955
+ /*
4956
+ * if they asked for extra copies and this block group
4957
+ * doesn't provide them, bail. This does allow us to
4958
+ * fill raid0 from raid1.
4959
+ */
4960
+ if ((data & extra ) && !(block_group -> flags & extra ))
4961
+ goto loop ;
4962
+ }
4963
+
4933
4964
have_block_group :
4934
4965
if (unlikely (block_group -> cached == BTRFS_CACHE_NO )) {
4935
4966
u64 free_percent ;
4936
4967
4937
- ret = cache_block_group (block_group , trans , 1 );
4968
+ ret = cache_block_group (block_group , trans ,
4969
+ orig_root , 1 );
4938
4970
if (block_group -> cached == BTRFS_CACHE_FINISHED )
4939
4971
goto have_block_group ;
4940
4972
@@ -4958,7 +4990,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4958
4990
if (loop > LOOP_CACHING_NOWAIT ||
4959
4991
(loop > LOOP_FIND_IDEAL &&
4960
4992
atomic_read (& space_info -> caching_threads ) < 2 )) {
4961
- ret = cache_block_group (block_group , trans , 0 );
4993
+ ret = cache_block_group (block_group , trans ,
4994
+ orig_root , 0 );
4962
4995
BUG_ON (ret );
4963
4996
}
4964
4997
found_uncached_bg = true;
@@ -5515,7 +5548,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5515
5548
u64 num_bytes = ins -> offset ;
5516
5549
5517
5550
block_group = btrfs_lookup_block_group (root -> fs_info , ins -> objectid );
5518
- cache_block_group (block_group , trans , 0 );
5551
+ cache_block_group (block_group , trans , NULL , 0 );
5519
5552
caching_ctl = get_caching_control (block_group );
5520
5553
5521
5554
if (!caching_ctl ) {
@@ -6300,9 +6333,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6300
6333
NULL , NULL );
6301
6334
BUG_ON (ret < 0 );
6302
6335
if (ret > 0 ) {
6303
- ret = btrfs_del_orphan_item (trans , tree_root ,
6304
- root -> root_key .objectid );
6305
- BUG_ON (ret );
6336
+ /* if we fail to delete the orphan item this time
6337
+ * around, it'll get picked up the next time.
6338
+ *
6339
+ * The most common failure here is just -ENOENT.
6340
+ */
6341
+ btrfs_del_orphan_item (trans , tree_root ,
6342
+ root -> root_key .objectid );
6306
6343
}
6307
6344
}
6308
6345
@@ -7878,7 +7915,14 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
7878
7915
u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
7879
7916
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10 ;
7880
7917
7881
- num_devices = root -> fs_info -> fs_devices -> rw_devices ;
7918
+ /*
7919
+ * we add in the count of missing devices because we want
7920
+ * to make sure that any RAID levels on a degraded FS
7921
+ * continue to be honored.
7922
+ */
7923
+ num_devices = root -> fs_info -> fs_devices -> rw_devices +
7924
+ root -> fs_info -> fs_devices -> missing_devices ;
7925
+
7882
7926
if (num_devices == 1 ) {
7883
7927
stripped |= BTRFS_BLOCK_GROUP_DUP ;
7884
7928
stripped = flags & ~stripped ;
@@ -8247,7 +8291,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8247
8291
break ;
8248
8292
if (ret != 0 )
8249
8293
goto error ;
8250
-
8251
8294
leaf = path -> nodes [0 ];
8252
8295
btrfs_item_key_to_cpu (leaf , & found_key , path -> slots [0 ]);
8253
8296
cache = kzalloc (sizeof (* cache ), GFP_NOFS );
0 commit comments