Skip to content

Commit a5cbd5f

Browse files
committed
libceph, ceph: get and handle cluster maps with addrvecs
In preparation for msgr2, make the cluster send us maps with addrvecs including both LEGACY and MSGR2 addrs instead of a single LEGACY addr. This means advertising support for SERVER_NAUTILUS and also some older features: SERVER_MIMIC, MONENC and MONNAMES. MONNAMES and MONENC are actually pre-argonaut, we just never updated ceph_monmap_decode() for them. Decoding is unconditional, see commit 23c625c ("libceph: assume argonaut on the server side"). SERVER_MIMIC doesn't bear any meaning for the kernel client. Since ceph_decode_entity_addrvec() is guarded by encoding version checks (and in msgr2 case it is guarded implicitly by the fact that server is speaking msgr2), we assume MSG_ADDR2 for it. Signed-off-by: Ilya Dryomov <[email protected]>
1 parent 8921f25 commit a5cbd5f

File tree

10 files changed

+222
-72
lines changed

10 files changed

+222
-72
lines changed

fs/ceph/mds_client.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5014,7 +5014,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
50145014
return;
50155015
}
50165016

5017-
newmap = ceph_mdsmap_decode(&p, end);
5017+
newmap = ceph_mdsmap_decode(&p, end, false);
50185018
if (IS_ERR(newmap)) {
50195019
err = PTR_ERR(newmap);
50205020
goto bad_unlock;

fs/ceph/mdsmap.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ static int __decode_and_drop_compat_set(void **p, void* end)
114114
* Ignore any fields we don't care about (there are quite a few of
115115
* them).
116116
*/
117-
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
117+
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
118118
{
119119
struct ceph_mdsmap *m;
120120
const void *start = *p;
@@ -201,18 +201,19 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
201201
namelen = ceph_decode_32(p); /* skip mds name */
202202
*p += namelen;
203203

204-
ceph_decode_need(p, end,
205-
4*sizeof(u32) + sizeof(u64) +
206-
sizeof(addr) + sizeof(struct ceph_timespec),
207-
bad);
208-
mds = ceph_decode_32(p);
209-
inc = ceph_decode_32(p);
210-
state = ceph_decode_32(p);
204+
ceph_decode_32_safe(p, end, mds, bad);
205+
ceph_decode_32_safe(p, end, inc, bad);
206+
ceph_decode_32_safe(p, end, state, bad);
211207
*p += sizeof(u64); /* state_seq */
212-
err = ceph_decode_entity_addr(p, end, &addr);
208+
if (info_v >= 8)
209+
err = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
210+
else
211+
err = ceph_decode_entity_addr(p, end, &addr);
213212
if (err)
214213
goto corrupt;
215-
ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
214+
215+
ceph_decode_copy_safe(p, end, &laggy_since, sizeof(laggy_since),
216+
bad);
216217
laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0;
217218
*p += sizeof(u32);
218219
ceph_decode_32_safe(p, end, namelen, bad);

include/linux/ceph/ceph_features.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
* feature. Base case is 1 (first use).
99
*/
1010
#define CEPH_FEATURE_INCARNATION_1 (0ull)
11-
#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL
11+
#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // SERVER_JEWEL
12+
#define CEPH_FEATURE_INCARNATION_3 ((1ull<<57)|(1ull<<28)) // SERVER_MIMIC
1213

1314
#define DEFINE_CEPH_FEATURE(bit, incarnation, name) \
1415
static const uint64_t __maybe_unused CEPH_FEATURE_##name = (1ULL<<bit); \
@@ -75,7 +76,7 @@
7576
DEFINE_CEPH_FEATURE( 0, 1, UID)
7677
DEFINE_CEPH_FEATURE( 1, 1, NOSRCADDR)
7778
DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS)
78-
79+
DEFINE_CEPH_FEATURE( 2, 3, SERVER_NAUTILUS)
7980
DEFINE_CEPH_FEATURE( 3, 1, FLOCK)
8081
DEFINE_CEPH_FEATURE( 4, 1, SUBSCRIBE2)
8182
DEFINE_CEPH_FEATURE( 5, 1, MONNAMES)
@@ -114,7 +115,7 @@ DEFINE_CEPH_FEATURE(25, 1, CRUSH_TUNABLES2)
114115
DEFINE_CEPH_FEATURE(26, 1, CREATEPOOLID)
115116
DEFINE_CEPH_FEATURE(27, 1, REPLY_CREATE_INODE)
116117
DEFINE_CEPH_FEATURE_RETIRED(28, 1, OSD_HBMSGS, HAMMER, JEWEL)
117-
DEFINE_CEPH_FEATURE(28, 2, SERVER_M)
118+
DEFINE_CEPH_FEATURE(28, 2, SERVER_MIMIC)
118119
DEFINE_CEPH_FEATURE(29, 1, MDSENC)
119120
DEFINE_CEPH_FEATURE(30, 1, OSDHASHPSPOOL)
120121
DEFINE_CEPH_FEATURE(31, 1, MON_SINGLE_PAXOS) // deprecate me
@@ -177,13 +178,16 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
177178
*/
178179
#define CEPH_FEATURES_SUPPORTED_DEFAULT \
179180
(CEPH_FEATURE_NOSRCADDR | \
181+
CEPH_FEATURE_SERVER_NAUTILUS | \
180182
CEPH_FEATURE_FLOCK | \
181183
CEPH_FEATURE_SUBSCRIBE2 | \
184+
CEPH_FEATURE_MONNAMES | \
182185
CEPH_FEATURE_RECONNECT_SEQ | \
183186
CEPH_FEATURE_DIRLAYOUTHASH | \
184187
CEPH_FEATURE_PGID64 | \
185188
CEPH_FEATURE_PGPOOL3 | \
186189
CEPH_FEATURE_OSDENC | \
190+
CEPH_FEATURE_MONENC | \
187191
CEPH_FEATURE_CRUSH_TUNABLES | \
188192
CEPH_FEATURE_SERVER_LUMINOUS | \
189193
CEPH_FEATURE_RESEND_ON_SPLIT | \
@@ -193,6 +197,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
193197
CEPH_FEATURE_MSG_AUTH | \
194198
CEPH_FEATURE_CRUSH_TUNABLES2 | \
195199
CEPH_FEATURE_REPLY_CREATE_INODE | \
200+
CEPH_FEATURE_SERVER_MIMIC | \
196201
CEPH_FEATURE_MDSENC | \
197202
CEPH_FEATURE_OSDHASHPSPOOL | \
198203
CEPH_FEATURE_OSD_CACHEPOOL | \

include/linux/ceph/decode.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
220220
*/
221221
#define CEPH_ENTITY_ADDR_TYPE_NONE 0
222222
#define CEPH_ENTITY_ADDR_TYPE_LEGACY __cpu_to_le32(1)
223+
#define CEPH_ENTITY_ADDR_TYPE_MSGR2 __cpu_to_le32(2)
223224

224225
static inline void ceph_encode_banner_addr(struct ceph_entity_addr *a)
225226
{
@@ -239,6 +240,9 @@ static inline void ceph_decode_banner_addr(struct ceph_entity_addr *a)
239240

240241
extern int ceph_decode_entity_addr(void **p, void *end,
241242
struct ceph_entity_addr *addr);
243+
int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
244+
struct ceph_entity_addr *addr);
245+
242246
/*
243247
* encoders
244248
*/

include/linux/ceph/mdsmap.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
6464
}
6565

6666
extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
67-
extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
67+
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2);
6868
extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
6969
extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m);
7070

include/linux/ceph/osdmap.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,8 +251,8 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
251251
}
252252

253253
struct ceph_osdmap *ceph_osdmap_alloc(void);
254-
extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end);
255-
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
254+
struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2);
255+
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2,
256256
struct ceph_osdmap *map);
257257
extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
258258

net/ceph/decode.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// SPDX-License-Identifier: GPL-2.0
2+
#include <linux/ceph/ceph_debug.h>
23

34
#include <linux/ceph/decode.h>
45

@@ -82,3 +83,58 @@ ceph_decode_entity_addr(void **p, void *end, struct ceph_entity_addr *addr)
8283
}
8384
EXPORT_SYMBOL(ceph_decode_entity_addr);
8485

86+
/*
87+
* Return addr of desired type (MSGR2 or LEGACY) or error.
88+
* Make sure there is only one match.
89+
*
90+
* Assume encoding with MSG_ADDR2.
91+
*/
92+
int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
93+
struct ceph_entity_addr *addr)
94+
{
95+
__le32 my_type = msgr2 ? CEPH_ENTITY_ADDR_TYPE_MSGR2 :
96+
CEPH_ENTITY_ADDR_TYPE_LEGACY;
97+
struct ceph_entity_addr tmp_addr;
98+
int addr_cnt;
99+
bool found;
100+
u8 marker;
101+
int ret;
102+
int i;
103+
104+
ceph_decode_8_safe(p, end, marker, e_inval);
105+
if (marker != 2) {
106+
pr_err("bad addrvec marker %d\n", marker);
107+
return -EINVAL;
108+
}
109+
110+
ceph_decode_32_safe(p, end, addr_cnt, e_inval);
111+
112+
found = false;
113+
for (i = 0; i < addr_cnt; i++) {
114+
ret = ceph_decode_entity_addr(p, end, &tmp_addr);
115+
if (ret)
116+
return ret;
117+
118+
if (tmp_addr.type == my_type) {
119+
if (found) {
120+
pr_err("another match of type %d in addrvec\n",
121+
le32_to_cpu(my_type));
122+
return -EINVAL;
123+
}
124+
125+
memcpy(addr, &tmp_addr, sizeof(*addr));
126+
found = true;
127+
}
128+
}
129+
if (!found && addr_cnt != 0) {
130+
pr_err("no match of type %d in addrvec\n",
131+
le32_to_cpu(my_type));
132+
return -ENOENT;
133+
}
134+
135+
return 0;
136+
137+
e_inval:
138+
return -EINVAL;
139+
}
140+
EXPORT_SYMBOL(ceph_decode_entity_addrvec);

net/ceph/mon_client.c

Lines changed: 105 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -36,57 +36,122 @@ static const struct ceph_connection_operations mon_con_ops;
3636

3737
static int __validate_auth(struct ceph_mon_client *monc);
3838

39+
static int decode_mon_info(void **p, void *end, bool msgr2,
40+
struct ceph_entity_addr *addr)
41+
{
42+
void *mon_info_end;
43+
u32 struct_len;
44+
u8 struct_v;
45+
int ret;
46+
47+
ret = ceph_start_decoding(p, end, 1, "mon_info_t", &struct_v,
48+
&struct_len);
49+
if (ret)
50+
return ret;
51+
52+
mon_info_end = *p + struct_len;
53+
ceph_decode_skip_string(p, end, e_inval); /* skip mon name */
54+
ret = ceph_decode_entity_addrvec(p, end, msgr2, addr);
55+
if (ret)
56+
return ret;
57+
58+
*p = mon_info_end;
59+
return 0;
60+
61+
e_inval:
62+
return -EINVAL;
63+
}
64+
3965
/*
4066
* Decode a monmap blob (e.g., during mount).
67+
*
68+
* Assume MonMap v3 (i.e. encoding with MONNAMES and MONENC).
4169
*/
42-
static struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
70+
static struct ceph_monmap *ceph_monmap_decode(void **p, void *end, bool msgr2)
4371
{
44-
struct ceph_monmap *m = NULL;
45-
int i, err = -EINVAL;
72+
struct ceph_monmap *monmap = NULL;
4673
struct ceph_fsid fsid;
47-
u32 epoch, num_mon;
48-
u32 len;
74+
u32 struct_len;
75+
int blob_len;
76+
int num_mon;
77+
u8 struct_v;
78+
u32 epoch;
79+
int ret;
80+
int i;
81+
82+
ceph_decode_32_safe(p, end, blob_len, e_inval);
83+
ceph_decode_need(p, end, blob_len, e_inval);
84+
85+
ret = ceph_start_decoding(p, end, 6, "monmap", &struct_v, &struct_len);
86+
if (ret)
87+
goto fail;
88+
89+
dout("%s struct_v %d\n", __func__, struct_v);
90+
ceph_decode_copy_safe(p, end, &fsid, sizeof(fsid), e_inval);
91+
ceph_decode_32_safe(p, end, epoch, e_inval);
92+
if (struct_v >= 6) {
93+
u32 feat_struct_len;
94+
u8 feat_struct_v;
4995

50-
ceph_decode_32_safe(&p, end, len, bad);
51-
ceph_decode_need(&p, end, len, bad);
96+
*p += sizeof(struct ceph_timespec); /* skip last_changed */
97+
*p += sizeof(struct ceph_timespec); /* skip created */
5298

53-
dout("monmap_decode %p %p len %d (%d)\n", p, end, len, (int)(end-p));
54-
p += sizeof(u16); /* skip version */
99+
ret = ceph_start_decoding(p, end, 1, "mon_feature_t",
100+
&feat_struct_v, &feat_struct_len);
101+
if (ret)
102+
goto fail;
55103

56-
ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad);
57-
ceph_decode_copy(&p, &fsid, sizeof(fsid));
58-
epoch = ceph_decode_32(&p);
104+
*p += feat_struct_len; /* skip persistent_features */
59105

60-
num_mon = ceph_decode_32(&p);
106+
ret = ceph_start_decoding(p, end, 1, "mon_feature_t",
107+
&feat_struct_v, &feat_struct_len);
108+
if (ret)
109+
goto fail;
61110

111+
*p += feat_struct_len; /* skip optional_features */
112+
}
113+
ceph_decode_32_safe(p, end, num_mon, e_inval);
114+
115+
dout("%s fsid %pU epoch %u num_mon %d\n", __func__, &fsid, epoch,
116+
num_mon);
62117
if (num_mon > CEPH_MAX_MON)
63-
goto bad;
64-
m = kmalloc(struct_size(m, mon_inst, num_mon), GFP_NOFS);
65-
if (m == NULL)
66-
return ERR_PTR(-ENOMEM);
67-
m->fsid = fsid;
68-
m->epoch = epoch;
69-
m->num_mon = num_mon;
70-
for (i = 0; i < num_mon; ++i) {
71-
struct ceph_entity_inst *inst = &m->mon_inst[i];
72-
73-
/* copy name portion */
74-
ceph_decode_copy_safe(&p, end, &inst->name,
75-
sizeof(inst->name), bad);
76-
err = ceph_decode_entity_addr(&p, end, &inst->addr);
77-
if (err)
78-
goto bad;
118+
goto e_inval;
119+
120+
monmap = kmalloc(struct_size(monmap, mon_inst, num_mon), GFP_NOIO);
121+
if (!monmap) {
122+
ret = -ENOMEM;
123+
goto fail;
79124
}
80-
dout("monmap_decode epoch %d, num_mon %d\n", m->epoch,
81-
m->num_mon);
82-
for (i = 0; i < m->num_mon; i++)
83-
dout("monmap_decode mon%d is %s\n", i,
84-
ceph_pr_addr(&m->mon_inst[i].addr));
85-
return m;
86-
bad:
87-
dout("monmap_decode failed with %d\n", err);
88-
kfree(m);
89-
return ERR_PTR(err);
125+
monmap->fsid = fsid;
126+
monmap->epoch = epoch;
127+
monmap->num_mon = num_mon;
128+
129+
/* legacy_mon_addr map or mon_info map */
130+
for (i = 0; i < num_mon; i++) {
131+
struct ceph_entity_inst *inst = &monmap->mon_inst[i];
132+
133+
ceph_decode_skip_string(p, end, e_inval); /* skip mon name */
134+
inst->name.type = CEPH_ENTITY_TYPE_MON;
135+
inst->name.num = cpu_to_le64(i);
136+
137+
if (struct_v >= 6)
138+
ret = decode_mon_info(p, end, msgr2, &inst->addr);
139+
else
140+
ret = ceph_decode_entity_addr(p, end, &inst->addr);
141+
if (ret)
142+
goto fail;
143+
144+
dout("%s mon%d addr %s\n", __func__, i,
145+
ceph_pr_addr(&inst->addr));
146+
}
147+
148+
return monmap;
149+
150+
e_inval:
151+
ret = -EINVAL;
152+
fail:
153+
kfree(monmap);
154+
return ERR_PTR(ret);
90155
}
91156

92157
/*
@@ -476,7 +541,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
476541
p = msg->front.iov_base;
477542
end = p + msg->front.iov_len;
478543

479-
monmap = ceph_monmap_decode(p, end);
544+
monmap = ceph_monmap_decode(&p, end, false);
480545
if (IS_ERR(monmap)) {
481546
pr_err("problem decoding monmap, %d\n",
482547
(int)PTR_ERR(monmap));

net/ceph/osd_client.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3918,9 +3918,9 @@ static int handle_one_map(struct ceph_osd_client *osdc,
39183918
set_pool_was_full(osdc);
39193919

39203920
if (incremental)
3921-
newmap = osdmap_apply_incremental(&p, end, osdc->osdmap);
3921+
newmap = osdmap_apply_incremental(&p, end, false, osdc->osdmap);
39223922
else
3923-
newmap = ceph_osdmap_decode(&p, end);
3923+
newmap = ceph_osdmap_decode(&p, end, false);
39243924
if (IS_ERR(newmap))
39253925
return PTR_ERR(newmap);
39263926

0 commit comments

Comments
 (0)