Skip to content

Commit a33f643

Browse files
ukernelidryomov
authored andcommitted
ceph: encode inodes' parent/d_name in cap reconnect message
Since nautilus, MDS tracks dirfrags whose child inodes have caps in open file table. When MDS recovers, it prefetches all of these dirfrags. This avoids using backtrace to load inodes. But dirfrags prefetch may load lots of useless inodes into cache, and make MDS run out of memory. Recent MDS adds an option that disables dirfrags prefetch. When dirfrags prefetch is disabled. Recovering MDS only prefetches corresponding dir inodes. Including inodes' parent/d_name in cap reconnect message can help MDS to load inodes into its cache. Signed-off-by: "Yan, Zheng" <[email protected]> Reviewed-by: Jeff Layton <[email protected]> Signed-off-by: Ilya Dryomov <[email protected]>
1 parent bbf5c97 commit a33f643

File tree

1 file changed

+61
-28
lines changed

1 file changed

+61
-28
lines changed

fs/ceph/mds_client.c

Lines changed: 61 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3612,6 +3612,39 @@ static int send_reconnect_partial(struct ceph_reconnect_state *recon_state)
36123612
return err;
36133613
}
36143614

3615+
static struct dentry* d_find_primary(struct inode *inode)
3616+
{
3617+
struct dentry *alias, *dn = NULL;
3618+
3619+
if (hlist_empty(&inode->i_dentry))
3620+
return NULL;
3621+
3622+
spin_lock(&inode->i_lock);
3623+
if (hlist_empty(&inode->i_dentry))
3624+
goto out_unlock;
3625+
3626+
if (S_ISDIR(inode->i_mode)) {
3627+
alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
3628+
if (!IS_ROOT(alias))
3629+
dn = dget(alias);
3630+
goto out_unlock;
3631+
}
3632+
3633+
hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
3634+
spin_lock(&alias->d_lock);
3635+
if (!d_unhashed(alias) &&
3636+
(ceph_dentry(alias)->flags & CEPH_DENTRY_PRIMARY_LINK)) {
3637+
dn = dget_dlock(alias);
3638+
}
3639+
spin_unlock(&alias->d_lock);
3640+
if (dn)
3641+
break;
3642+
}
3643+
out_unlock:
3644+
spin_unlock(&inode->i_lock);
3645+
return dn;
3646+
}
3647+
36153648
/*
36163649
* Encode information about a cap for a reconnect with the MDS.
36173650
*/
@@ -3625,13 +3658,32 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
36253658
struct ceph_inode_info *ci = cap->ci;
36263659
struct ceph_reconnect_state *recon_state = arg;
36273660
struct ceph_pagelist *pagelist = recon_state->pagelist;
3628-
int err;
3661+
struct dentry *dentry;
3662+
char *path;
3663+
int pathlen, err;
3664+
u64 pathbase;
36293665
u64 snap_follows;
36303666

36313667
dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
36323668
inode, ceph_vinop(inode), cap, cap->cap_id,
36333669
ceph_cap_string(cap->issued));
36343670

3671+
dentry = d_find_primary(inode);
3672+
if (dentry) {
3673+
/* set pathbase to parent dir when msg_version >= 2 */
3674+
path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase,
3675+
recon_state->msg_version >= 2);
3676+
dput(dentry);
3677+
if (IS_ERR(path)) {
3678+
err = PTR_ERR(path);
3679+
goto out_err;
3680+
}
3681+
} else {
3682+
path = NULL;
3683+
pathlen = 0;
3684+
pathbase = 0;
3685+
}
3686+
36353687
spin_lock(&ci->i_ceph_lock);
36363688
cap->seq = 0; /* reset cap seq */
36373689
cap->issue_seq = 0; /* and issue_seq */
@@ -3652,7 +3704,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
36523704
rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
36533705
rec.v2.issued = cpu_to_le32(cap->issued);
36543706
rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
3655-
rec.v2.pathbase = 0;
3707+
rec.v2.pathbase = cpu_to_le64(pathbase);
36563708
rec.v2.flock_len = (__force __le32)
36573709
((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
36583710
} else {
@@ -3663,7 +3715,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
36633715
ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime);
36643716
ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime);
36653717
rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
3666-
rec.v1.pathbase = 0;
3718+
rec.v1.pathbase = cpu_to_le64(pathbase);
36673719
}
36683720

36693721
if (list_empty(&ci->i_cap_snaps)) {
@@ -3725,7 +3777,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
37253777
sizeof(struct ceph_filelock);
37263778
rec.v2.flock_len = cpu_to_le32(struct_len);
37273779

3728-
struct_len += sizeof(u32) + sizeof(rec.v2);
3780+
struct_len += sizeof(u32) + pathlen + sizeof(rec.v2);
37293781

37303782
if (struct_v >= 2)
37313783
struct_len += sizeof(u64); /* snap_follows */
@@ -3749,7 +3801,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
37493801
ceph_pagelist_encode_8(pagelist, 1);
37503802
ceph_pagelist_encode_32(pagelist, struct_len);
37513803
}
3752-
ceph_pagelist_encode_string(pagelist, NULL, 0);
3804+
ceph_pagelist_encode_string(pagelist, path, pathlen);
37533805
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
37543806
ceph_locks_to_pagelist(flocks, pagelist,
37553807
num_fcntl_locks, num_flock_locks);
@@ -3758,39 +3810,20 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
37583810
out_freeflocks:
37593811
kfree(flocks);
37603812
} else {
3761-
u64 pathbase = 0;
3762-
int pathlen = 0;
3763-
char *path = NULL;
3764-
struct dentry *dentry;
3765-
3766-
dentry = d_find_alias(inode);
3767-
if (dentry) {
3768-
path = ceph_mdsc_build_path(dentry,
3769-
&pathlen, &pathbase, 0);
3770-
dput(dentry);
3771-
if (IS_ERR(path)) {
3772-
err = PTR_ERR(path);
3773-
goto out_err;
3774-
}
3775-
rec.v1.pathbase = cpu_to_le64(pathbase);
3776-
}
3777-
37783813
err = ceph_pagelist_reserve(pagelist,
37793814
sizeof(u64) + sizeof(u32) +
37803815
pathlen + sizeof(rec.v1));
3781-
if (err) {
3782-
goto out_freepath;
3783-
}
3816+
if (err)
3817+
goto out_err;
37843818

37853819
ceph_pagelist_encode_64(pagelist, ceph_ino(inode));
37863820
ceph_pagelist_encode_string(pagelist, path, pathlen);
37873821
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
3788-
out_freepath:
3789-
ceph_mdsc_free_path(path, pathlen);
37903822
}
37913823

37923824
out_err:
3793-
if (err >= 0)
3825+
ceph_mdsc_free_path(path, pathlen);
3826+
if (!err)
37943827
recon_state->nr_caps++;
37953828
return err;
37963829
}

0 commit comments

Comments
 (0)