Skip to content

Commit ff780b9

Browse files
harshadjstytso
authored andcommitted
jbd2: add fast commit machinery
This functions adds necessary APIs needed in JBD2 layer for fast commits. Signed-off-by: Harshad Shirwadkar <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Theodore Ts'o <[email protected]>
1 parent 6866d7b commit ff780b9

File tree

4 files changed

+268
-1
lines changed

4 files changed

+268
-1
lines changed

fs/ext4/fast_commit.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,19 @@
88
* Ext4 fast commits routines.
99
*/
1010
#include "ext4_jbd2.h"
11+
/*
12+
* Fast commit cleanup routine. This is called after every fast commit and
13+
* full commit. full is true if we are called after a full commit.
14+
*/
15+
static void ext4_fc_cleanup(journal_t *journal, int full)
16+
{
17+
}
1118

1219
void ext4_fc_init(struct super_block *sb, journal_t *journal)
1320
{
1421
if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
1522
return;
23+
journal->j_fc_cleanup_callback = ext4_fc_cleanup;
1624
if (jbd2_fc_init(journal, EXT4_NUM_FC_BLKS)) {
1725
pr_warn("Error while enabling fast commits, turning off.");
1826
ext4_clear_feature_fast_commit(sb);

fs/jbd2/commit.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,30 @@ int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
206206
return generic_writepages(mapping, &wbc);
207207
}
208208

209+
/* Send all the data buffers related to an inode */
210+
int jbd2_submit_inode_data(struct jbd2_inode *jinode)
211+
{
212+
213+
if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
214+
return 0;
215+
216+
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
217+
return jbd2_journal_submit_inode_data_buffers(jinode);
218+
219+
}
220+
EXPORT_SYMBOL(jbd2_submit_inode_data);
221+
222+
int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
223+
{
224+
if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
225+
!jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
226+
return 0;
227+
return filemap_fdatawait_range_keep_errors(
228+
jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
229+
jinode->i_dirty_end);
230+
}
231+
EXPORT_SYMBOL(jbd2_wait_inode_data);
232+
209233
/*
210234
* Submit all the data buffers of inode associated with the transaction to
211235
* disk.
@@ -415,13 +439,28 @@ void jbd2_journal_commit_transaction(journal_t *journal)
415439
J_ASSERT(journal->j_running_transaction != NULL);
416440
J_ASSERT(journal->j_committing_transaction == NULL);
417441

442+
write_lock(&journal->j_state_lock);
443+
journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
444+
while (journal->j_flags & JBD2_FAST_COMMIT_ONGOING) {
445+
DEFINE_WAIT(wait);
446+
447+
prepare_to_wait(&journal->j_fc_wait, &wait,
448+
TASK_UNINTERRUPTIBLE);
449+
write_unlock(&journal->j_state_lock);
450+
schedule();
451+
write_lock(&journal->j_state_lock);
452+
finish_wait(&journal->j_fc_wait, &wait);
453+
}
454+
write_unlock(&journal->j_state_lock);
455+
418456
commit_transaction = journal->j_running_transaction;
419457

420458
trace_jbd2_start_commit(journal, commit_transaction);
421459
jbd_debug(1, "JBD2: starting commit of transaction %d\n",
422460
commit_transaction->t_tid);
423461

424462
write_lock(&journal->j_state_lock);
463+
journal->j_fc_off = 0;
425464
J_ASSERT(commit_transaction->t_state == T_RUNNING);
426465
commit_transaction->t_state = T_LOCKED;
427466

@@ -1121,12 +1160,16 @@ void jbd2_journal_commit_transaction(journal_t *journal)
11211160

11221161
if (journal->j_commit_callback)
11231162
journal->j_commit_callback(journal, commit_transaction);
1163+
if (journal->j_fc_cleanup_callback)
1164+
journal->j_fc_cleanup_callback(journal, 1);
11241165

11251166
trace_jbd2_end_commit(journal, commit_transaction);
11261167
jbd_debug(1, "JBD2: commit %d complete, head %d\n",
11271168
journal->j_commit_sequence, journal->j_tail_sequence);
11281169

11291170
write_lock(&journal->j_state_lock);
1171+
journal->j_flags &= ~JBD2_FULL_COMMIT_ONGOING;
1172+
journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
11301173
spin_lock(&journal->j_list_lock);
11311174
commit_transaction->t_state = T_FINISHED;
11321175
/* Check if the transaction can be dropped now that we are finished */
@@ -1138,6 +1181,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
11381181
spin_unlock(&journal->j_list_lock);
11391182
write_unlock(&journal->j_state_lock);
11401183
wake_up(&journal->j_wait_done_commit);
1184+
wake_up(&journal->j_fc_wait);
11411185

11421186
/*
11431187
* Calculate overall stats

fs/jbd2/journal.c

Lines changed: 189 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,9 @@ static void commit_timeout(struct timer_list *t)
159159
*
160160
* 1) COMMIT: Every so often we need to commit the current state of the
161161
* filesystem to disk. The journal thread is responsible for writing
162-
* all of the metadata buffers to disk.
162+
* all of the metadata buffers to disk. If a fast commit is ongoing
163+
* journal thread waits until it's done and then continues from
164+
* there on.
163165
*
164166
* 2) CHECKPOINT: We cannot reuse a used section of the log file until all
165167
* of the data in that part of the log has been rewritten elsewhere on
@@ -716,6 +718,75 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
716718
return err;
717719
}
718720

721+
/*
722+
* Start a fast commit. If there's an ongoing fast or full commit wait for
723+
* it to complete. Returns 0 if a new fast commit was started. Returns -EALREADY
724+
* if a fast commit is not needed, either because there's an already a commit
725+
* going on or this tid has already been committed. Returns -EINVAL if no jbd2
726+
* commit has yet been performed.
727+
*/
728+
int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
729+
{
730+
/*
731+
* Fast commits only allowed if at least one full commit has
732+
* been processed.
733+
*/
734+
if (!journal->j_stats.ts_tid)
735+
return -EINVAL;
736+
737+
if (tid <= journal->j_commit_sequence)
738+
return -EALREADY;
739+
740+
write_lock(&journal->j_state_lock);
741+
if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
742+
(journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) {
743+
DEFINE_WAIT(wait);
744+
745+
prepare_to_wait(&journal->j_fc_wait, &wait,
746+
TASK_UNINTERRUPTIBLE);
747+
write_unlock(&journal->j_state_lock);
748+
schedule();
749+
finish_wait(&journal->j_fc_wait, &wait);
750+
return -EALREADY;
751+
}
752+
journal->j_flags |= JBD2_FAST_COMMIT_ONGOING;
753+
write_unlock(&journal->j_state_lock);
754+
755+
return 0;
756+
}
757+
EXPORT_SYMBOL(jbd2_fc_begin_commit);
758+
759+
/*
760+
* Stop a fast commit. If fallback is set, this function starts commit of
761+
* TID tid before any other fast commit can start.
762+
*/
763+
static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
764+
{
765+
if (journal->j_fc_cleanup_callback)
766+
journal->j_fc_cleanup_callback(journal, 0);
767+
write_lock(&journal->j_state_lock);
768+
journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
769+
if (fallback)
770+
journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
771+
write_unlock(&journal->j_state_lock);
772+
wake_up(&journal->j_fc_wait);
773+
if (fallback)
774+
return jbd2_complete_transaction(journal, tid);
775+
return 0;
776+
}
777+
778+
int jbd2_fc_end_commit(journal_t *journal)
779+
{
780+
return __jbd2_fc_end_commit(journal, 0, 0);
781+
}
782+
EXPORT_SYMBOL(jbd2_fc_end_commit);
783+
784+
int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid)
785+
{
786+
return __jbd2_fc_end_commit(journal, tid, 1);
787+
}
788+
EXPORT_SYMBOL(jbd2_fc_end_commit_fallback);
789+
719790
/* Return 1 when transaction with given tid has already committed. */
720791
int jbd2_transaction_committed(journal_t *journal, tid_t tid)
721792
{
@@ -784,6 +855,110 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
784855
return jbd2_journal_bmap(journal, blocknr, retp);
785856
}
786857

858+
/* Map one fast commit buffer for use by the file system */
859+
int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
860+
{
861+
unsigned long long pblock;
862+
unsigned long blocknr;
863+
int ret = 0;
864+
struct buffer_head *bh;
865+
int fc_off;
866+
867+
*bh_out = NULL;
868+
write_lock(&journal->j_state_lock);
869+
870+
if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
871+
fc_off = journal->j_fc_off;
872+
blocknr = journal->j_fc_first + fc_off;
873+
journal->j_fc_off++;
874+
} else {
875+
ret = -EINVAL;
876+
}
877+
write_unlock(&journal->j_state_lock);
878+
879+
if (ret)
880+
return ret;
881+
882+
ret = jbd2_journal_bmap(journal, blocknr, &pblock);
883+
if (ret)
884+
return ret;
885+
886+
bh = __getblk(journal->j_dev, pblock, journal->j_blocksize);
887+
if (!bh)
888+
return -ENOMEM;
889+
890+
lock_buffer(bh);
891+
892+
clear_buffer_uptodate(bh);
893+
set_buffer_dirty(bh);
894+
unlock_buffer(bh);
895+
journal->j_fc_wbuf[fc_off] = bh;
896+
897+
*bh_out = bh;
898+
899+
return 0;
900+
}
901+
EXPORT_SYMBOL(jbd2_fc_get_buf);
902+
903+
/*
904+
* Wait on fast commit buffers that were allocated by jbd2_fc_get_buf
905+
* for completion.
906+
*/
907+
int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
908+
{
909+
struct buffer_head *bh;
910+
int i, j_fc_off;
911+
912+
read_lock(&journal->j_state_lock);
913+
j_fc_off = journal->j_fc_off;
914+
read_unlock(&journal->j_state_lock);
915+
916+
/*
917+
* Wait in reverse order to minimize chances of us being woken up before
918+
* all IOs have completed
919+
*/
920+
for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) {
921+
bh = journal->j_fc_wbuf[i];
922+
wait_on_buffer(bh);
923+
put_bh(bh);
924+
journal->j_fc_wbuf[i] = NULL;
925+
if (unlikely(!buffer_uptodate(bh)))
926+
return -EIO;
927+
}
928+
929+
return 0;
930+
}
931+
EXPORT_SYMBOL(jbd2_fc_wait_bufs);
932+
933+
/*
934+
* Wait on fast commit buffers that were allocated by jbd2_fc_get_buf
935+
* for completion.
936+
*/
937+
int jbd2_fc_release_bufs(journal_t *journal)
938+
{
939+
struct buffer_head *bh;
940+
int i, j_fc_off;
941+
942+
read_lock(&journal->j_state_lock);
943+
j_fc_off = journal->j_fc_off;
944+
read_unlock(&journal->j_state_lock);
945+
946+
/*
947+
* Wait in reverse order to minimize chances of us being woken up before
948+
* all IOs have completed
949+
*/
950+
for (i = j_fc_off - 1; i >= 0; i--) {
951+
bh = journal->j_fc_wbuf[i];
952+
if (!bh)
953+
break;
954+
put_bh(bh);
955+
journal->j_fc_wbuf[i] = NULL;
956+
}
957+
958+
return 0;
959+
}
960+
EXPORT_SYMBOL(jbd2_fc_release_bufs);
961+
787962
/*
788963
* Conversion of logical to physical block numbers for the journal
789964
*
@@ -1142,6 +1317,7 @@ static journal_t *journal_init_common(struct block_device *bdev,
11421317
init_waitqueue_head(&journal->j_wait_commit);
11431318
init_waitqueue_head(&journal->j_wait_updates);
11441319
init_waitqueue_head(&journal->j_wait_reserved);
1320+
init_waitqueue_head(&journal->j_fc_wait);
11451321
mutex_init(&journal->j_abort_mutex);
11461322
mutex_init(&journal->j_barrier);
11471323
mutex_init(&journal->j_checkpoint_mutex);
@@ -1495,6 +1671,7 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
14951671
static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
14961672
{
14971673
journal_superblock_t *sb = journal->j_superblock;
1674+
bool had_fast_commit = false;
14981675

14991676
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
15001677
lock_buffer(journal->j_sb_buffer);
@@ -1508,9 +1685,20 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
15081685

15091686
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
15101687
sb->s_start = cpu_to_be32(0);
1688+
if (jbd2_has_feature_fast_commit(journal)) {
1689+
/*
1690+
* When journal is clean, no need to commit fast commit flag and
1691+
* make file system incompatible with older kernels.
1692+
*/
1693+
jbd2_clear_feature_fast_commit(journal);
1694+
had_fast_commit = true;
1695+
}
15111696

15121697
jbd2_write_superblock(journal, write_op);
15131698

1699+
if (had_fast_commit)
1700+
jbd2_set_feature_fast_commit(journal);
1701+
15141702
/* Log is no longer empty */
15151703
write_lock(&journal->j_state_lock);
15161704
journal->j_flags |= JBD2_FLUSHED;

include/linux/jbd2.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,13 @@ struct journal_s
861861
*/
862862
wait_queue_head_t j_wait_reserved;
863863

864+
/**
865+
* @j_fc_wait:
866+
*
867+
* Wait queue to wait for completion of async fast commits.
868+
*/
869+
wait_queue_head_t j_fc_wait;
870+
864871
/**
865872
* @j_checkpoint_mutex:
866873
*
@@ -1232,6 +1239,15 @@ struct journal_s
12321239
*/
12331240
struct lockdep_map j_trans_commit_map;
12341241
#endif
1242+
1243+
/**
1244+
* @j_fc_cleanup_callback:
1245+
*
1246+
* Clean-up after fast commit or full commit. JBD2 calls this function
1247+
* after every commit operation.
1248+
*/
1249+
void (*j_fc_cleanup_callback)(struct journal_s *journal, int);
1250+
12351251
};
12361252

12371253
#define jbd2_might_wait_for_commit(j) \
@@ -1316,6 +1332,8 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT)
13161332
#define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file
13171333
* data write error in ordered
13181334
* mode */
1335+
#define JBD2_FAST_COMMIT_ONGOING 0x100 /* Fast commit is ongoing */
1336+
#define JBD2_FULL_COMMIT_ONGOING 0x200 /* Full commit is ongoing */
13191337

13201338
/*
13211339
* Function declarations for the journaling transaction and buffer
@@ -1574,6 +1592,15 @@ extern int jbd2_cleanup_journal_tail(journal_t *);
15741592

15751593
/* Fast commit related APIs */
15761594
int jbd2_fc_init(journal_t *journal, int num_fc_blks);
1595+
int jbd2_fc_begin_commit(journal_t *journal, tid_t tid);
1596+
int jbd2_fc_end_commit(journal_t *journal);
1597+
int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid);
1598+
int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out);
1599+
int jbd2_submit_inode_data(struct jbd2_inode *jinode);
1600+
int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode);
1601+
int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
1602+
int jbd2_fc_release_bufs(journal_t *journal);
1603+
15771604
/*
15781605
* is_journal_abort
15791606
*

0 commit comments

Comments
 (0)