Skip to content

Commit 4dbd2b2

Browse files
shroffnikeithbusch
authored andcommitted
nvme-multipath: Add visibility for round-robin io-policy
This patch helps add nvme native multipath visibility for round-robin io-policy. It creates a "multipath" sysfs directory under head gendisk device node directory and then from "multipath" directory it adds a link to each namespace path device the head node refers. For instance, if we have a shared namespace accessible from two different controllers/paths then we create a soft link to each path device from head disk node as shown below: $ ls -l /sys/block/nvme1n1/multipath/ nvme1c1n1 -> ../../../../../pci052e:78/052e:78:00.0/nvme/nvme1/nvme1c1n1 nvme1c3n1 -> ../../../../../pci058e:78/058e:78:00.0/nvme/nvme3/nvme1c3n1 In the above example, nvme1n1 is head gendisk node created for a shared namespace and the namespace is accessible from nvme1c1n1 and nvme1c3n1 paths. For round-robin I/O policy, we could easily infer from the above output that I/O workload targeted to nvme1n1 would toggle across paths nvme1c1n1 and nvme1c3n1. Reviewed-by: Hannes Reinecke <[email protected]> Signed-off-by: Nilay Shroff <[email protected]> Signed-off-by: Keith Busch <[email protected]>
1 parent 316dabe commit 4dbd2b2

File tree

4 files changed

+130
-4
lines changed

4 files changed

+130
-4
lines changed

drivers/nvme/host/core.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4020,6 +4020,9 @@ static void nvme_ns_remove(struct nvme_ns *ns)
40204020

40214021
if (!nvme_ns_head_multipath(ns->head))
40224022
nvme_cdev_del(&ns->cdev, &ns->cdev_device);
4023+
4024+
nvme_mpath_remove_sysfs_link(ns);
4025+
40234026
del_gendisk(ns->disk);
40244027

40254028
mutex_lock(&ns->ctrl->namespaces_lock);

drivers/nvme/host/multipath.c

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,8 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
686686
kblockd_schedule_work(&head->partition_scan_work);
687687
}
688688

689+
nvme_mpath_add_sysfs_link(ns->head);
690+
689691
mutex_lock(&head->lock);
690692
if (nvme_path_is_optimized(ns)) {
691693
int node, srcu_idx;
@@ -768,6 +770,25 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
768770
if (nvme_state_is_live(ns->ana_state) &&
769771
nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE)
770772
nvme_mpath_set_live(ns);
773+
else {
774+
/*
775+
* Add sysfs link from multipath head gendisk node to path
776+
* device gendisk node.
777+
* If path's ana state is live (i.e. state is either optimized
778+
* or non-optimized) while we alloc the ns then sysfs link would
779+
* be created from nvme_mpath_set_live(). In that case we would
780+
* not fallthrough this code path. However for the path's ana
781+
* state other than live, we call nvme_mpath_set_live() only
782+
* after ana state transitioned to the live state. But we still
783+
* want to create the sysfs link from head node to a path device
784+
* irrespctive of the path's ana state.
785+
* If we reach through here then it means that path's ana state
786+
* is not live but still create the sysfs link to this path from
787+
* head node if head node of the path has already come alive.
788+
*/
789+
if (test_bit(NVME_NSHEAD_DISK_LIVE, &ns->head->flags))
790+
nvme_mpath_add_sysfs_link(ns->head);
791+
}
771792
}
772793

773794
static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
@@ -967,6 +988,84 @@ static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
967988
return -ENXIO; /* just break out of the loop */
968989
}
969990

991+
void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head)
992+
{
993+
struct device *target;
994+
int rc, srcu_idx;
995+
struct nvme_ns *ns;
996+
struct kobject *kobj;
997+
998+
/*
999+
* Ensure head disk node is already added otherwise we may get invalid
1000+
* kobj for head disk node
1001+
*/
1002+
if (!test_bit(GD_ADDED, &head->disk->state))
1003+
return;
1004+
1005+
kobj = &disk_to_dev(head->disk)->kobj;
1006+
1007+
/*
1008+
* loop through each ns chained through the head->list and create the
1009+
* sysfs link from head node to the ns path node
1010+
*/
1011+
srcu_idx = srcu_read_lock(&head->srcu);
1012+
1013+
list_for_each_entry_rcu(ns, &head->list, siblings) {
1014+
/*
1015+
* Avoid creating link if it already exists for the given path.
1016+
* When path ana state transitions from optimized to non-
1017+
* optimized or vice-versa, the nvme_mpath_set_live() is
1018+
* invoked which in truns call this function. Now if the sysfs
1019+
* link already exists for the given path and we attempt to re-
1020+
* create the link then sysfs code would warn about it loudly.
1021+
* So we evaluate NVME_NS_SYSFS_ATTR_LINK flag here to ensure
1022+
* that we're not creating duplicate link.
1023+
* The test_and_set_bit() is used because it is protecting
1024+
* against multiple nvme paths being simultaneously added.
1025+
*/
1026+
if (test_and_set_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags))
1027+
continue;
1028+
1029+
/*
1030+
* Ensure that ns path disk node is already added otherwise we
1031+
* may get invalid kobj name for target
1032+
*/
1033+
if (!test_bit(GD_ADDED, &ns->disk->state))
1034+
continue;
1035+
1036+
target = disk_to_dev(ns->disk);
1037+
/*
1038+
* Create sysfs link from head gendisk kobject @kobj to the
1039+
* ns path gendisk kobject @target->kobj.
1040+
*/
1041+
rc = sysfs_add_link_to_group(kobj, nvme_ns_mpath_attr_group.name,
1042+
&target->kobj, dev_name(target));
1043+
if (unlikely(rc)) {
1044+
dev_err(disk_to_dev(ns->head->disk),
1045+
"failed to create link to %s\n",
1046+
dev_name(target));
1047+
clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags);
1048+
}
1049+
}
1050+
1051+
srcu_read_unlock(&head->srcu, srcu_idx);
1052+
}
1053+
1054+
void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns)
1055+
{
1056+
struct device *target;
1057+
struct kobject *kobj;
1058+
1059+
if (!test_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags))
1060+
return;
1061+
1062+
target = disk_to_dev(ns->disk);
1063+
kobj = &disk_to_dev(ns->head->disk)->kobj;
1064+
sysfs_remove_link_from_group(kobj, nvme_ns_mpath_attr_group.name,
1065+
dev_name(target));
1066+
clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags);
1067+
}
1068+
9701069
void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
9711070
{
9721071
if (nvme_ctrl_use_ana(ns->ctrl)) {

drivers/nvme/host/nvme.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -534,10 +534,11 @@ struct nvme_ns {
534534
struct nvme_ns_head *head;
535535

536536
unsigned long flags;
537-
#define NVME_NS_REMOVING 0
538-
#define NVME_NS_ANA_PENDING 2
539-
#define NVME_NS_FORCE_RO 3
540-
#define NVME_NS_READY 4
537+
#define NVME_NS_REMOVING 0
538+
#define NVME_NS_ANA_PENDING 2
539+
#define NVME_NS_FORCE_RO 3
540+
#define NVME_NS_READY 4
541+
#define NVME_NS_SYSFS_ATTR_LINK 5
541542

542543
struct cdev cdev;
543544
struct device cdev_device;
@@ -933,6 +934,7 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
933934
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
934935

935936
extern const struct attribute_group *nvme_ns_attr_groups[];
937+
extern const struct attribute_group nvme_ns_mpath_attr_group;
936938
extern const struct pr_ops nvme_pr_ops;
937939
extern const struct block_device_operations nvme_ns_head_ops;
938940
extern const struct attribute_group nvme_dev_attrs_group;
@@ -955,6 +957,8 @@ void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys);
955957
void nvme_failover_req(struct request *req);
956958
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
957959
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
960+
void nvme_mpath_add_sysfs_link(struct nvme_ns_head *ns);
961+
void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns);
958962
void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid);
959963
void nvme_mpath_remove_disk(struct nvme_ns_head *head);
960964
int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
@@ -1009,6 +1013,12 @@ static inline void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
10091013
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
10101014
{
10111015
}
1016+
static inline void nvme_mpath_add_sysfs_link(struct nvme_ns *ns)
1017+
{
1018+
}
1019+
static inline void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns)
1020+
{
1021+
}
10121022
static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
10131023
{
10141024
return false;

drivers/nvme/host/sysfs.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,8 +299,22 @@ static const struct attribute_group nvme_ns_attr_group = {
299299
.is_visible = nvme_ns_attrs_are_visible,
300300
};
301301

302+
#ifdef CONFIG_NVME_MULTIPATH
303+
static struct attribute *nvme_ns_mpath_attrs[] = {
304+
NULL,
305+
};
306+
307+
const struct attribute_group nvme_ns_mpath_attr_group = {
308+
.name = "multipath",
309+
.attrs = nvme_ns_mpath_attrs,
310+
};
311+
#endif
312+
302313
const struct attribute_group *nvme_ns_attr_groups[] = {
303314
&nvme_ns_attr_group,
315+
#ifdef CONFIG_NVME_MULTIPATH
316+
&nvme_ns_mpath_attr_group,
317+
#endif
304318
NULL,
305319
};
306320

0 commit comments

Comments
 (0)