4040#include <linux/netdevice.h>
4141#include <linux/security.h>
4242#include <linux/notifier.h>
43+ #include <linux/hashtable.h>
4344#include <rdma/rdma_netlink.h>
4445#include <rdma/ib_addr.h>
4546#include <rdma/ib_cache.h>
@@ -134,6 +135,10 @@ static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
134135 !xa_is_err(entry); \
135136 (index)++, entry = xan_find_marked(xa, &(index), filter))
136137
138+ /* RCU hash table mapping netdevice pointers to struct ib_port_data */
139+ static DEFINE_SPINLOCK (ndev_hash_lock );
140+ static DECLARE_HASHTABLE (ndev_hash , 5 ) ;
141+
137142static void free_netdevs (struct ib_device * ib_dev );
138143static int ib_security_change (struct notifier_block * nb , unsigned long event ,
139144 void * lsm_data );
@@ -144,6 +149,12 @@ static struct notifier_block ibdev_lsm_nb = {
144149 .notifier_call = ib_security_change ,
145150};
146151
152+ /* Pointer to the RCU head at the start of the ib_port_data array */
153+ struct ib_port_data_rcu {
154+ struct rcu_head rcu_head ;
155+ struct ib_port_data pdata [];
156+ };
157+
147158static int ib_device_check_mandatory (struct ib_device * device )
148159{
149160#define IB_MANDATORY_FUNC (x ) { offsetof(struct ib_device_ops, x), #x }
@@ -295,9 +306,12 @@ static void ib_device_release(struct device *device)
295306 WARN_ON (refcount_read (& dev -> refcount ));
296307 ib_cache_release_one (dev );
297308 ib_security_release_port_pkey_list (dev );
298- kfree (dev -> port_data );
299309 xa_destroy (& dev -> client_data );
300- kfree (dev );
310+ if (dev -> port_data )
311+ kfree_rcu (container_of (dev -> port_data , struct ib_port_data_rcu ,
312+ pdata [0 ]),
313+ rcu_head );
314+ kfree_rcu (dev , rcu_head );
301315}
302316
303317static int ib_device_uevent (struct device * device ,
@@ -468,6 +482,7 @@ static void remove_client_context(struct ib_device *device,
468482
469483static int alloc_port_data (struct ib_device * device )
470484{
485+ struct ib_port_data_rcu * pdata_rcu ;
471486 unsigned int port ;
472487
473488 if (device -> port_data )
@@ -484,17 +499,26 @@ static int alloc_port_data(struct ib_device *device)
484499 * Therefore port_data is declared as a 1 based array with potential
485500 * empty slots at the beginning.
486501 */
487- device -> port_data = kcalloc (rdma_end_port (device ) + 1 ,
488- sizeof (* device -> port_data ), GFP_KERNEL );
489- if (!device -> port_data )
502+ pdata_rcu = kzalloc (struct_size (pdata_rcu , pdata ,
503+ rdma_end_port (device ) + 1 ),
504+ GFP_KERNEL );
505+ if (!pdata_rcu )
490506 return - ENOMEM ;
507+ /*
508+ * The rcu_head is put in front of the port data array and the stored
509+ * pointer is adjusted since we never need to see that member until
510+ * kfree_rcu.
511+ */
512+ device -> port_data = pdata_rcu -> pdata ;
491513
492514 rdma_for_each_port (device , port ) {
493515 struct ib_port_data * pdata = & device -> port_data [port ];
494516
517+ pdata -> ib_dev = device ;
495518 spin_lock_init (& pdata -> pkey_list_lock );
496519 INIT_LIST_HEAD (& pdata -> pkey_list );
497520 spin_lock_init (& pdata -> netdev_lock );
521+ INIT_HLIST_NODE (& pdata -> ndev_hash_link );
498522 }
499523 return 0 ;
500524}
@@ -1042,6 +1066,29 @@ int ib_query_port(struct ib_device *device,
10421066}
10431067EXPORT_SYMBOL (ib_query_port );
10441068
1069+ static void add_ndev_hash (struct ib_port_data * pdata )
1070+ {
1071+ unsigned long flags ;
1072+
1073+ might_sleep ();
1074+
1075+ spin_lock_irqsave (& ndev_hash_lock , flags );
1076+ if (hash_hashed (& pdata -> ndev_hash_link )) {
1077+ hash_del_rcu (& pdata -> ndev_hash_link );
1078+ spin_unlock_irqrestore (& ndev_hash_lock , flags );
1079+ /*
1080+ * We cannot do hash_add_rcu after a hash_del_rcu until the
1081+ * grace period
1082+ */
1083+ synchronize_rcu ();
1084+ spin_lock_irqsave (& ndev_hash_lock , flags );
1085+ }
1086+ if (pdata -> netdev )
1087+ hash_add_rcu (ndev_hash , & pdata -> ndev_hash_link ,
1088+ (uintptr_t )pdata -> netdev );
1089+ spin_unlock_irqrestore (& ndev_hash_lock , flags );
1090+ }
1091+
10451092/**
10461093 * ib_device_set_netdev - Associate the ib_dev with an underlying net_device
10471094 * @ib_dev: Device to modify
@@ -1078,17 +1125,19 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
10781125
10791126 pdata = & ib_dev -> port_data [port ];
10801127 spin_lock_irqsave (& pdata -> netdev_lock , flags );
1081- if (pdata -> netdev == ndev ) {
1128+ old_ndev = rcu_dereference_protected (
1129+ pdata -> netdev , lockdep_is_held (& pdata -> netdev_lock ));
1130+ if (old_ndev == ndev ) {
10821131 spin_unlock_irqrestore (& pdata -> netdev_lock , flags );
10831132 return 0 ;
10841133 }
1085- old_ndev = pdata -> netdev ;
10861134
10871135 if (ndev )
10881136 dev_hold (ndev );
1089- pdata -> netdev = ndev ;
1137+ rcu_assign_pointer ( pdata -> netdev , ndev ) ;
10901138 spin_unlock_irqrestore (& pdata -> netdev_lock , flags );
10911139
1140+ add_ndev_hash (pdata );
10921141 if (old_ndev )
10931142 dev_put (old_ndev );
10941143
@@ -1103,11 +1152,24 @@ static void free_netdevs(struct ib_device *ib_dev)
11031152
11041153 rdma_for_each_port (ib_dev , port ) {
11051154 struct ib_port_data * pdata = & ib_dev -> port_data [port ];
1155+ struct net_device * ndev ;
11061156
11071157 spin_lock_irqsave (& pdata -> netdev_lock , flags );
1108- if (pdata -> netdev ) {
1109- dev_put (pdata -> netdev );
1110- pdata -> netdev = NULL ;
1158+ ndev = rcu_dereference_protected (
1159+ pdata -> netdev , lockdep_is_held (& pdata -> netdev_lock ));
1160+ if (ndev ) {
1161+ spin_lock (& ndev_hash_lock );
1162+ hash_del_rcu (& pdata -> ndev_hash_link );
1163+ spin_unlock (& ndev_hash_lock );
1164+
1165+ /*
1166+ * If this is the last dev_put there is still a
1167+ * synchronize_rcu before the netdev is kfreed, so we
1168+ * can continue to rely on unlocked pointer
1169+ * comparisons after the put
1170+ */
1171+ rcu_assign_pointer (pdata -> netdev , NULL );
1172+ dev_put (ndev );
11111173 }
11121174 spin_unlock_irqrestore (& pdata -> netdev_lock , flags );
11131175 }
@@ -1132,7 +1194,8 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
11321194 res = ib_dev -> ops .get_netdev (ib_dev , port );
11331195 else {
11341196 spin_lock (& pdata -> netdev_lock );
1135- res = pdata -> netdev ;
1197+ res = rcu_dereference_protected (
1198+ pdata -> netdev , lockdep_is_held (& pdata -> netdev_lock ));
11361199 if (res )
11371200 dev_hold (res );
11381201 spin_unlock (& pdata -> netdev_lock );
@@ -1150,6 +1213,38 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
11501213 return res ;
11511214}
11521215
1216+ /**
1217+ * ib_device_get_by_netdev - Find an IB device associated with a netdev
1218+ * @ndev: netdev to locate
1219+ * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all)
1220+ *
1221+ * Find and hold an ib_device that is associated with a netdev via
1222+ * ib_device_set_netdev(). The caller must call ib_device_put() on the
1223+ * returned pointer.
1224+ */
1225+ struct ib_device * ib_device_get_by_netdev (struct net_device * ndev ,
1226+ enum rdma_driver_id driver_id )
1227+ {
1228+ struct ib_device * res = NULL ;
1229+ struct ib_port_data * cur ;
1230+
1231+ rcu_read_lock ();
1232+ hash_for_each_possible_rcu (ndev_hash , cur , ndev_hash_link ,
1233+ (uintptr_t )ndev ) {
1234+ if (rcu_access_pointer (cur -> netdev ) == ndev &&
1235+ (driver_id == RDMA_DRIVER_UNKNOWN ||
1236+ cur -> ib_dev -> driver_id == driver_id ) &&
1237+ ib_device_try_get (cur -> ib_dev )) {
1238+ res = cur -> ib_dev ;
1239+ break ;
1240+ }
1241+ }
1242+ rcu_read_unlock ();
1243+
1244+ return res ;
1245+ }
1246+ EXPORT_SYMBOL (ib_device_get_by_netdev );
1247+
11531248/**
11541249 * ib_enum_roce_netdev - enumerate all RoCE ports
11551250 * @ib_dev : IB device we want to query
0 commit comments