Skip to content

Commit 9140db0

Browse files
seedaMark Fasheh
authored andcommitted
ocfs2: recover orphans in offline slots during recovery and mount
During recovery, a node recovers orphans in it's slot and the dead node(s). But if the dead nodes were holding orphans in offline slots, they will be left unrecovered. If the dead node is the last one to die and is holding orphans in other slots and is the first one to mount, then it only recovers it's own slot, which leaves orphans in offline slots. This patch queues complete_recovery to clean orphans for all offline slots during mount and node recovery. Signed-off-by: Srinivas Eeda <[email protected]> Acked-by: Joel Becker <[email protected]> Signed-off-by: Mark Fasheh <[email protected]>
1 parent 1fca3a0 commit 9140db0

File tree

4 files changed

+132
-18
lines changed

4 files changed

+132
-18
lines changed

fs/ocfs2/journal.c

Lines changed: 123 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ static int ocfs2_trylock_journal(struct ocfs2_super *osb,
6565
static int ocfs2_recover_orphans(struct ocfs2_super *osb,
6666
int slot);
6767
static int ocfs2_commit_thread(void *arg);
68+
static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
69+
int slot_num,
70+
struct ocfs2_dinode *la_dinode,
71+
struct ocfs2_dinode *tl_dinode,
72+
struct ocfs2_quota_recovery *qrec);
6873

6974
static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
7075
{
@@ -76,6 +81,97 @@ static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
7681
return __ocfs2_wait_on_mount(osb, 1);
7782
}
7883

84+
/*
85+
* This replay_map is to track online/offline slots, so we could recover
86+
* offline slots during recovery and mount
87+
*/
88+
89+
enum ocfs2_replay_state {
90+
REPLAY_UNNEEDED = 0, /* Replay is not needed, so ignore this map */
91+
REPLAY_NEEDED, /* Replay slots marked in rm_replay_slots */
92+
REPLAY_DONE /* Replay was already queued */
93+
};
94+
95+
struct ocfs2_replay_map {
96+
unsigned int rm_slots;
97+
enum ocfs2_replay_state rm_state;
98+
unsigned char rm_replay_slots[0];
99+
};
100+
101+
void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
102+
{
103+
if (!osb->replay_map)
104+
return;
105+
106+
/* If we've already queued the replay, we don't have any more to do */
107+
if (osb->replay_map->rm_state == REPLAY_DONE)
108+
return;
109+
110+
osb->replay_map->rm_state = state;
111+
}
112+
113+
int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
114+
{
115+
struct ocfs2_replay_map *replay_map;
116+
int i, node_num;
117+
118+
/* If replay map is already set, we don't do it again */
119+
if (osb->replay_map)
120+
return 0;
121+
122+
replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
123+
(osb->max_slots * sizeof(char)), GFP_KERNEL);
124+
125+
if (!replay_map) {
126+
mlog_errno(-ENOMEM);
127+
return -ENOMEM;
128+
}
129+
130+
spin_lock(&osb->osb_lock);
131+
132+
replay_map->rm_slots = osb->max_slots;
133+
replay_map->rm_state = REPLAY_UNNEEDED;
134+
135+
/* set rm_replay_slots for offline slot(s) */
136+
for (i = 0; i < replay_map->rm_slots; i++) {
137+
if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT)
138+
replay_map->rm_replay_slots[i] = 1;
139+
}
140+
141+
osb->replay_map = replay_map;
142+
spin_unlock(&osb->osb_lock);
143+
return 0;
144+
}
145+
146+
void ocfs2_queue_replay_slots(struct ocfs2_super *osb)
147+
{
148+
struct ocfs2_replay_map *replay_map = osb->replay_map;
149+
int i;
150+
151+
if (!replay_map)
152+
return;
153+
154+
if (replay_map->rm_state != REPLAY_NEEDED)
155+
return;
156+
157+
for (i = 0; i < replay_map->rm_slots; i++)
158+
if (replay_map->rm_replay_slots[i])
159+
ocfs2_queue_recovery_completion(osb->journal, i, NULL,
160+
NULL, NULL);
161+
replay_map->rm_state = REPLAY_DONE;
162+
}
163+
164+
void ocfs2_free_replay_slots(struct ocfs2_super *osb)
165+
{
166+
struct ocfs2_replay_map *replay_map = osb->replay_map;
167+
168+
if (!osb->replay_map)
169+
return;
170+
171+
kfree(replay_map);
172+
osb->replay_map = NULL;
173+
}
174+
79175
int ocfs2_recovery_init(struct ocfs2_super *osb)
80176
{
81177
struct ocfs2_recovery_map *rm;
@@ -1194,24 +1290,24 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
11941290
}
11951291

11961292
/* Called by the mount code to queue recovery the last part of
1197-
* recovery for it's own slot. */
1293+
* recovery for it's own and offline slot(s). */
11981294
void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
11991295
{
12001296
struct ocfs2_journal *journal = osb->journal;
12011297

1202-
if (osb->dirty) {
1203-
/* No need to queue up our truncate_log as regular
1204-
* cleanup will catch that. */
1205-
ocfs2_queue_recovery_completion(journal,
1206-
osb->slot_num,
1207-
osb->local_alloc_copy,
1208-
NULL,
1209-
NULL);
1210-
ocfs2_schedule_truncate_log_flush(osb, 0);
1298+
/* No need to queue up our truncate_log as regular cleanup will catch
1299+
* that */
1300+
ocfs2_queue_recovery_completion(journal, osb->slot_num,
1301+
osb->local_alloc_copy, NULL, NULL);
1302+
ocfs2_schedule_truncate_log_flush(osb, 0);
12111303

1212-
osb->local_alloc_copy = NULL;
1213-
osb->dirty = 0;
1214-
}
1304+
osb->local_alloc_copy = NULL;
1305+
osb->dirty = 0;
1306+
1307+
/* queue to recover orphan slots for all offline slots */
1308+
ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
1309+
ocfs2_queue_replay_slots(osb);
1310+
ocfs2_free_replay_slots(osb);
12151311
}
12161312

12171313
void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
@@ -1254,6 +1350,14 @@ static int __ocfs2_recovery_thread(void *arg)
12541350
goto bail;
12551351
}
12561352

1353+
status = ocfs2_compute_replay_slots(osb);
1354+
if (status < 0)
1355+
mlog_errno(status);
1356+
1357+
/* queue recovery for our own slot */
1358+
ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
1359+
NULL, NULL);
1360+
12571361
spin_lock(&osb->osb_lock);
12581362
while (rm->rm_used) {
12591363
/* It's always safe to remove entry zero, as we won't
@@ -1319,11 +1423,8 @@ static int __ocfs2_recovery_thread(void *arg)
13191423

13201424
ocfs2_super_unlock(osb, 1);
13211425

1322-
/* We always run recovery on our own orphan dir - the dead
1323-
* node(s) may have disallowd a previos inode delete. Re-processing
1324-
* is therefore required. */
1325-
ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
1326-
NULL, NULL);
1426+
/* queue recovery for offline slots */
1427+
ocfs2_queue_replay_slots(osb);
13271428

13281429
bail:
13291430
mutex_lock(&osb->recovery_lock);
@@ -1332,6 +1433,7 @@ static int __ocfs2_recovery_thread(void *arg)
13321433
goto restart;
13331434
}
13341435

1436+
ocfs2_free_replay_slots(osb);
13351437
osb->recovery_thread_task = NULL;
13361438
mb(); /* sync with ocfs2_recovery_thread_running */
13371439
wake_up(&osb->recovery_event);
@@ -1483,6 +1585,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
14831585
goto done;
14841586
}
14851587

1588+
/* we need to run complete recovery for offline orphan slots */
1589+
ocfs2_replay_map_set_state(osb, REPLAY_NEEDED);
1590+
14861591
mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n",
14871592
node_num, slot_num,
14881593
MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));

fs/ocfs2/journal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
150150
int ocfs2_recovery_init(struct ocfs2_super *osb);
151151
void ocfs2_recovery_exit(struct ocfs2_super *osb);
152152

153+
int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
153154
/*
154155
* Journal Control:
155156
* Initialize, Load, Shutdown, Wipe a journal.

fs/ocfs2/ocfs2.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ enum ocfs2_mount_options
209209
struct ocfs2_journal;
210210
struct ocfs2_slot_info;
211211
struct ocfs2_recovery_map;
212+
struct ocfs2_replay_map;
212213
struct ocfs2_quota_recovery;
213214
struct ocfs2_dentry_lock;
214215
struct ocfs2_super
@@ -264,6 +265,7 @@ struct ocfs2_super
264265
atomic_t vol_state;
265266
struct mutex recovery_lock;
266267
struct ocfs2_recovery_map *recovery_map;
268+
struct ocfs2_replay_map *replay_map;
267269
struct task_struct *recovery_thread_task;
268270
int disable_recovery;
269271
wait_queue_head_t checkpoint_event;

fs/ocfs2/super.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2312,6 +2312,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
23122312
* lock, and it's marked as dirty, set the bit in the recover
23132313
* map and launch a recovery thread for it. */
23142314
status = ocfs2_mark_dead_nodes(osb);
2315+
if (status < 0) {
2316+
mlog_errno(status);
2317+
goto finally;
2318+
}
2319+
2320+
status = ocfs2_compute_replay_slots(osb);
23152321
if (status < 0)
23162322
mlog_errno(status);
23172323

0 commit comments

Comments
 (0)