Skip to content

Commit 58ec3b4

Browse files
herbertxdavem330
authored andcommitted
net: Fix netdev_run_todo dead-lock
Benjamin Thery tracked down a bug that explains many instances of the error unregister_netdevice: waiting for %s to become free. Usage count = %d It turns out that netdev_run_todo can dead-lock with itself if a second instance of it is run in a thread that will then free a reference to the device waited on by the first instance. The problem is really quite silly. We were trying to create parallelism where none was required. As netdev_run_todo always follows a RTNL section, and that todo tasks can only be added with the RTNL held, by definition you should only need to wait for the very ones that you've added and be done with it. There is no need for a second mutex or spinlock. This is exactly what the following patch does. Signed-off-by: Herbert Xu <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 742201e commit 58ec3b4

File tree

2 files changed

+7
-22
lines changed

2 files changed

+7
-22
lines changed

net/core/dev.c

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3812,14 +3812,11 @@ static int dev_new_index(struct net *net)
38123812
}
38133813

38143814
/* Delayed registration/unregisteration */
3815-
static DEFINE_SPINLOCK(net_todo_list_lock);
38163815
static LIST_HEAD(net_todo_list);
38173816

38183817
static void net_set_todo(struct net_device *dev)
38193818
{
3820-
spin_lock(&net_todo_list_lock);
38213819
list_add_tail(&dev->todo_list, &net_todo_list);
3822-
spin_unlock(&net_todo_list_lock);
38233820
}
38243821

38253822
static void rollback_registered(struct net_device *dev)
@@ -4146,33 +4143,24 @@ static void netdev_wait_allrefs(struct net_device *dev)
41464143
* free_netdev(y1);
41474144
* free_netdev(y2);
41484145
*
4149-
* We are invoked by rtnl_unlock() after it drops the semaphore.
4146+
* We are invoked by rtnl_unlock().
41504147
* This allows us to deal with problems:
41514148
* 1) We can delete sysfs objects which invoke hotplug
41524149
* without deadlocking with linkwatch via keventd.
41534150
* 2) Since we run with the RTNL semaphore not held, we can sleep
41544151
* safely in order to wait for the netdev refcnt to drop to zero.
4152+
*
4153+
* We must not return until all unregister events added during
4154+
* the interval the lock was held have been completed.
41554155
*/
4156-
static DEFINE_MUTEX(net_todo_run_mutex);
41574156
void netdev_run_todo(void)
41584157
{
41594158
struct list_head list;
41604159

4161-
/* Need to guard against multiple cpu's getting out of order. */
4162-
mutex_lock(&net_todo_run_mutex);
4163-
4164-
/* Not safe to do outside the semaphore. We must not return
4165-
* until all unregister events invoked by the local processor
4166-
* have been completed (either by this todo run, or one on
4167-
* another cpu).
4168-
*/
4169-
if (list_empty(&net_todo_list))
4170-
goto out;
4171-
41724160
/* Snapshot list, allow later requests */
4173-
spin_lock(&net_todo_list_lock);
41744161
list_replace_init(&net_todo_list, &list);
4175-
spin_unlock(&net_todo_list_lock);
4162+
4163+
__rtnl_unlock();
41764164

41774165
while (!list_empty(&list)) {
41784166
struct net_device *dev
@@ -4204,9 +4192,6 @@ void netdev_run_todo(void)
42044192
/* Free network device */
42054193
kobject_put(&dev->dev.kobj);
42064194
}
4207-
4208-
out:
4209-
mutex_unlock(&net_todo_run_mutex);
42104195
}
42114196

42124197
static struct net_device_stats *internal_stats(struct net_device *dev)

net/core/rtnetlink.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ void __rtnl_unlock(void)
7373

7474
void rtnl_unlock(void)
7575
{
76-
mutex_unlock(&rtnl_mutex);
76+
/* This fellow will unlock it for us. */
7777
netdev_run_todo();
7878
}
7979

0 commit comments

Comments
 (0)