@@ -326,8 +326,7 @@ struct workqueue_struct {
326
326
327
327
static struct kmem_cache * pwq_cache ;
328
328
329
- static cpumask_var_t * wq_numa_possible_cpumask ;
330
- /* possible CPUs of each node */
329
+ static cpumask_var_t * wq_pod_cpus ; /* possible CPUs of each node */
331
330
332
331
/*
333
332
* Per-cpu work items which run for longer than the following threshold are
@@ -345,10 +344,10 @@ module_param_named(power_efficient, wq_power_efficient, bool, 0444);
345
344
346
345
static bool wq_online ; /* can kworkers be created yet? */
347
346
348
- static bool wq_numa_enabled ; /* unbound NUMA affinity enabled */
347
+ static bool wq_pod_enabled ; /* unbound CPU pod affinity enabled */
349
348
350
- /* buf for wq_update_unbound_numa_attrs (), protected by CPU hotplug exclusion */
351
- static struct workqueue_attrs * wq_update_unbound_numa_attrs_buf ;
349
+ /* buf for wq_update_unbound_pod_attrs (), protected by CPU hotplug exclusion */
350
+ static struct workqueue_attrs * wq_update_pod_attrs_buf ;
352
351
353
352
static DEFINE_MUTEX (wq_pool_mutex ); /* protects pools and workqueues list */
354
353
static DEFINE_MUTEX (wq_pool_attach_mutex ); /* protects worker attach/detach */
@@ -1762,20 +1761,20 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
1762
1761
EXPORT_SYMBOL (queue_work_on );
1763
1762
1764
1763
/**
1765
- * workqueue_select_cpu_near - Select a CPU based on NUMA node
1764
+ * select_numa_node_cpu - Select a CPU based on NUMA node
1766
1765
* @node: NUMA node ID that we want to select a CPU from
1767
1766
*
1768
1767
* This function will attempt to find a "random" cpu available on a given
1769
1768
* node. If there are no CPUs available on the given node it will return
1770
1769
* WORK_CPU_UNBOUND indicating that we should just schedule to any
1771
1770
* available CPU if we need to schedule this work.
1772
1771
*/
1773
- static int workqueue_select_cpu_near (int node )
1772
+ static int select_numa_node_cpu (int node )
1774
1773
{
1775
1774
int cpu ;
1776
1775
1777
1776
/* No point in doing this if NUMA isn't enabled for workqueues */
1778
- if (!wq_numa_enabled )
1777
+ if (!wq_pod_enabled )
1779
1778
return WORK_CPU_UNBOUND ;
1780
1779
1781
1780
/* Delay binding to CPU if node is not valid or online */
@@ -1834,7 +1833,7 @@ bool queue_work_node(int node, struct workqueue_struct *wq,
1834
1833
local_irq_save (flags );
1835
1834
1836
1835
if (!test_and_set_bit (WORK_STRUCT_PENDING_BIT , work_data_bits (work ))) {
1837
- int cpu = workqueue_select_cpu_near (node );
1836
+ int cpu = select_numa_node_cpu (node );
1838
1837
1839
1838
__queue_work (cpu , wq , work );
1840
1839
ret = true;
@@ -3900,8 +3899,8 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3900
3899
{
3901
3900
u32 hash = wqattrs_hash (attrs );
3902
3901
struct worker_pool * pool ;
3903
- int node ;
3904
- int target_node = NUMA_NO_NODE ;
3902
+ int pod ;
3903
+ int target_pod = NUMA_NO_NODE ;
3905
3904
3906
3905
lockdep_assert_held (& wq_pool_mutex );
3907
3906
@@ -3913,24 +3912,23 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3913
3912
}
3914
3913
}
3915
3914
3916
- /* if cpumask is contained inside a NUMA node, we belong to that node */
3917
- if (wq_numa_enabled ) {
3918
- for_each_node (node ) {
3919
- if (cpumask_subset (attrs -> cpumask ,
3920
- wq_numa_possible_cpumask [node ])) {
3921
- target_node = node ;
3915
+ /* if cpumask is contained inside a pod, we belong to that pod */
3916
+ if (wq_pod_enabled ) {
3917
+ for_each_node (pod ) {
3918
+ if (cpumask_subset (attrs -> cpumask , wq_pod_cpus [pod ])) {
3919
+ target_pod = pod ;
3922
3920
break ;
3923
3921
}
3924
3922
}
3925
3923
}
3926
3924
3927
3925
/* nope, create a new one */
3928
- pool = kzalloc_node (sizeof (* pool ), GFP_KERNEL , target_node );
3926
+ pool = kzalloc_node (sizeof (* pool ), GFP_KERNEL , target_pod );
3929
3927
if (!pool || init_worker_pool (pool ) < 0 )
3930
3928
goto fail ;
3931
3929
3932
3930
copy_workqueue_attrs (pool -> attrs , attrs );
3933
- pool -> node = target_node ;
3931
+ pool -> node = target_pod ;
3934
3932
3935
3933
/*
3936
3934
* ordered isn't a worker_pool attribute, always clear it. See
@@ -4120,40 +4118,38 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
4120
4118
}
4121
4119
4122
4120
/**
4123
- * wq_calc_node_cpumask - calculate a wq_attrs' cpumask for the specified node
4121
+ * wq_calc_pod_cpumask - calculate a wq_attrs' cpumask for a pod
4124
4122
* @attrs: the wq_attrs of the default pwq of the target workqueue
4125
- * @node : the target NUMA node
4123
+ * @pod : the target CPU pod
4126
4124
* @cpu_going_down: if >= 0, the CPU to consider as offline
4127
4125
* @cpumask: outarg, the resulting cpumask
4128
4126
*
4129
- * Calculate the cpumask a workqueue with @attrs should use on @node. If
4130
- * @cpu_going_down is >= 0, that cpu is considered offline during
4131
- * calculation. The result is stored in @cpumask.
4127
+ * Calculate the cpumask a workqueue with @attrs should use on @pod. If
4128
+ * @cpu_going_down is >= 0, that cpu is considered offline during calculation.
4129
+ * The result is stored in @cpumask.
4132
4130
*
4133
- * If NUMA affinity is not enabled, @attrs->cpumask is always used. If
4134
- * enabled and @node has online CPUs requested by @attrs, the returned
4135
- * cpumask is the intersection of the possible CPUs of @node and
4136
- * @attrs->cpumask.
4131
+ * If pod affinity is not enabled, @attrs->cpumask is always used. If enabled
4132
+ * and @pod has online CPUs requested by @attrs, the returned cpumask is the
4133
+ * intersection of the possible CPUs of @pod and @attrs->cpumask.
4137
4134
*
4138
- * The caller is responsible for ensuring that the cpumask of @node stays
4139
- * stable.
4135
+ * The caller is responsible for ensuring that the cpumask of @pod stays stable.
4140
4136
*/
4141
- static void wq_calc_node_cpumask (const struct workqueue_attrs * attrs , int node ,
4137
+ static void wq_calc_pod_cpumask (const struct workqueue_attrs * attrs , int pod ,
4142
4138
int cpu_going_down , cpumask_t * cpumask )
4143
4139
{
4144
- if (!wq_numa_enabled || attrs -> ordered )
4140
+ if (!wq_pod_enabled || attrs -> ordered )
4145
4141
goto use_dfl ;
4146
4142
4147
- /* does @node have any online CPUs @attrs wants? */
4148
- cpumask_and (cpumask , cpumask_of_node (node ), attrs -> cpumask );
4143
+ /* does @pod have any online CPUs @attrs wants? */
4144
+ cpumask_and (cpumask , cpumask_of_node (pod ), attrs -> cpumask );
4149
4145
if (cpu_going_down >= 0 )
4150
4146
cpumask_clear_cpu (cpu_going_down , cpumask );
4151
4147
4152
4148
if (cpumask_empty (cpumask ))
4153
4149
goto use_dfl ;
4154
4150
4155
- /* yeap, return possible CPUs in @node that @attrs wants */
4156
- cpumask_and (cpumask , attrs -> cpumask , wq_numa_possible_cpumask [ node ]);
4151
+ /* yeap, return possible CPUs in @pod that @attrs wants */
4152
+ cpumask_and (cpumask , attrs -> cpumask , wq_pod_cpus [ pod ]);
4157
4153
4158
4154
if (cpumask_empty (cpumask ))
4159
4155
pr_warn_once ("WARNING: workqueue cpumask: online intersect > "
@@ -4257,8 +4253,8 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
4257
4253
ctx -> dfl_pwq -> refcnt ++ ;
4258
4254
ctx -> pwq_tbl [cpu ] = ctx -> dfl_pwq ;
4259
4255
} else {
4260
- wq_calc_node_cpumask (new_attrs , cpu_to_node (cpu ), -1 ,
4261
- tmp_attrs -> cpumask );
4256
+ wq_calc_pod_cpumask (new_attrs , cpu_to_node (cpu ), -1 ,
4257
+ tmp_attrs -> cpumask );
4262
4258
ctx -> pwq_tbl [cpu ] = alloc_unbound_pwq (wq , tmp_attrs );
4263
4259
if (!ctx -> pwq_tbl [cpu ])
4264
4260
goto out_free ;
@@ -4349,12 +4345,11 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
4349
4345
* @wq: the target workqueue
4350
4346
* @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs()
4351
4347
*
4352
- * Apply @attrs to an unbound workqueue @wq. Unless disabled, on NUMA
4353
- * machines, this function maps a separate pwq to each NUMA node with
4354
- * possibles CPUs in @attrs->cpumask so that work items are affine to the
4355
- * NUMA node it was issued on. Older pwqs are released as in-flight work
4356
- * items finish. Note that a work item which repeatedly requeues itself
4357
- * back-to-back will stay on its current pwq.
4348
+ * Apply @attrs to an unbound workqueue @wq. Unless disabled, this function maps
4349
+ * a separate pwq to each CPU pod with possibles CPUs in @attrs->cpumask so that
4350
+ * work items are affine to the pod it was issued on. Older pwqs are released as
4351
+ * in-flight work items finish. Note that a work item which repeatedly requeues
4352
+ * itself back-to-back will stay on its current pwq.
4358
4353
*
4359
4354
* Performs GFP_KERNEL allocations.
4360
4355
*
@@ -4377,40 +4372,39 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
4377
4372
}
4378
4373
4379
4374
/**
4380
- * wq_update_unbound_numa - update NUMA affinity of a wq for CPU hot[un]plug
4375
+ * wq_update_pod - update pod affinity of a wq for CPU hot[un]plug
4381
4376
* @wq: the target workqueue
4382
4377
* @cpu: the CPU to update pool association for
4383
4378
* @hotplug_cpu: the CPU coming up or going down
4384
4379
* @online: whether @cpu is coming up or going down
4385
4380
*
4386
4381
* This function is to be called from %CPU_DOWN_PREPARE, %CPU_ONLINE and
4387
- * %CPU_DOWN_FAILED. @cpu is being hot[un]plugged, update NUMA affinity of
4382
+ * %CPU_DOWN_FAILED. @cpu is being hot[un]plugged, update pod affinity of
4388
4383
* @wq accordingly.
4389
4384
*
4390
- * If NUMA affinity can't be adjusted due to memory allocation failure, it
4391
- * falls back to @wq->dfl_pwq which may not be optimal but is always
4392
- * correct.
4393
- *
4394
- * Note that when the last allowed CPU of a NUMA node goes offline for a
4395
- * workqueue with a cpumask spanning multiple nodes, the workers which were
4396
- * already executing the work items for the workqueue will lose their CPU
4397
- * affinity and may execute on any CPU. This is similar to how per-cpu
4398
- * workqueues behave on CPU_DOWN. If a workqueue user wants strict
4399
- * affinity, it's the user's responsibility to flush the work item from
4400
- * CPU_DOWN_PREPARE.
4385
+ *
4386
+ * If pod affinity can't be adjusted due to memory allocation failure, it falls
4387
+ * back to @wq->dfl_pwq which may not be optimal but is always correct.
4388
+ *
4389
+ * Note that when the last allowed CPU of a pod goes offline for a workqueue
4390
+ * with a cpumask spanning multiple pods, the workers which were already
4391
+ * executing the work items for the workqueue will lose their CPU affinity and
4392
+ * may execute on any CPU. This is similar to how per-cpu workqueues behave on
4393
+ * CPU_DOWN. If a workqueue user wants strict affinity, it's the user's
4394
+ * responsibility to flush the work item from CPU_DOWN_PREPARE.
4401
4395
*/
4402
- static void wq_update_unbound_numa (struct workqueue_struct * wq , int cpu ,
4403
- int hotplug_cpu , bool online )
4396
+ static void wq_update_pod (struct workqueue_struct * wq , int cpu ,
4397
+ int hotplug_cpu , bool online )
4404
4398
{
4405
- int node = cpu_to_node (cpu );
4399
+ int pod = cpu_to_node (cpu );
4406
4400
int off_cpu = online ? -1 : hotplug_cpu ;
4407
4401
struct pool_workqueue * old_pwq = NULL , * pwq ;
4408
4402
struct workqueue_attrs * target_attrs ;
4409
4403
cpumask_t * cpumask ;
4410
4404
4411
4405
lockdep_assert_held (& wq_pool_mutex );
4412
4406
4413
- if (!wq_numa_enabled || !(wq -> flags & WQ_UNBOUND ) ||
4407
+ if (!wq_pod_enabled || !(wq -> flags & WQ_UNBOUND ) ||
4414
4408
wq -> unbound_attrs -> ordered )
4415
4409
return ;
4416
4410
@@ -4419,13 +4413,13 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
4419
4413
* Let's use a preallocated one. The following buf is protected by
4420
4414
* CPU hotplug exclusion.
4421
4415
*/
4422
- target_attrs = wq_update_unbound_numa_attrs_buf ;
4416
+ target_attrs = wq_update_pod_attrs_buf ;
4423
4417
cpumask = target_attrs -> cpumask ;
4424
4418
4425
4419
copy_workqueue_attrs (target_attrs , wq -> unbound_attrs );
4426
4420
4427
4421
/* nothing to do if the target cpumask matches the current pwq */
4428
- wq_calc_node_cpumask (wq -> dfl_pwq -> pool -> attrs , node , off_cpu , cpumask );
4422
+ wq_calc_pod_cpumask (wq -> dfl_pwq -> pool -> attrs , pod , off_cpu , cpumask );
4429
4423
pwq = rcu_dereference_protected (* per_cpu_ptr (wq -> cpu_pwq , cpu ),
4430
4424
lockdep_is_held (& wq_pool_mutex ));
4431
4425
if (cpumask_equal (cpumask , pwq -> pool -> attrs -> cpumask ))
@@ -4434,7 +4428,7 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
4434
4428
/* create a new pwq */
4435
4429
pwq = alloc_unbound_pwq (wq , target_attrs );
4436
4430
if (!pwq ) {
4437
- pr_warn ("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n" ,
4431
+ pr_warn ("workqueue: allocation failed while updating CPU pod affinity of \"%s\"\n" ,
4438
4432
wq -> name );
4439
4433
goto use_dfl_pwq ;
4440
4434
}
@@ -4565,11 +4559,10 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
4565
4559
struct pool_workqueue * pwq ;
4566
4560
4567
4561
/*
4568
- * Unbound && max_active == 1 used to imply ordered, which is no
4569
- * longer the case on NUMA machines due to per-node pools. While
4562
+ * Unbound && max_active == 1 used to imply ordered, which is no longer
4563
+ * the case on many machines due to per-pod pools. While
4570
4564
* alloc_ordered_workqueue() is the right way to create an ordered
4571
- * workqueue, keep the previous behavior to avoid subtle breakages
4572
- * on NUMA.
4565
+ * workqueue, keep the previous behavior to avoid subtle breakages.
4573
4566
*/
4574
4567
if ((flags & WQ_UNBOUND ) && max_active == 1 )
4575
4568
flags |= __WQ_ORDERED ;
@@ -5450,13 +5443,13 @@ int workqueue_online_cpu(unsigned int cpu)
5450
5443
mutex_unlock (& wq_pool_attach_mutex );
5451
5444
}
5452
5445
5453
- /* update NUMA affinity of unbound workqueues */
5446
+ /* update pod affinity of unbound workqueues */
5454
5447
list_for_each_entry (wq , & workqueues , list ) {
5455
5448
int tcpu ;
5456
5449
5457
5450
for_each_possible_cpu (tcpu ) {
5458
5451
if (cpu_to_node (tcpu ) == cpu_to_node (cpu )) {
5459
- wq_update_unbound_numa (wq , tcpu , cpu , true);
5452
+ wq_update_pod (wq , tcpu , cpu , true);
5460
5453
}
5461
5454
}
5462
5455
}
@@ -5475,14 +5468,14 @@ int workqueue_offline_cpu(unsigned int cpu)
5475
5468
5476
5469
unbind_workers (cpu );
5477
5470
5478
- /* update NUMA affinity of unbound workqueues */
5471
+ /* update pod affinity of unbound workqueues */
5479
5472
mutex_lock (& wq_pool_mutex );
5480
5473
list_for_each_entry (wq , & workqueues , list ) {
5481
5474
int tcpu ;
5482
5475
5483
5476
for_each_possible_cpu (tcpu ) {
5484
5477
if (cpu_to_node (tcpu ) == cpu_to_node (cpu )) {
5485
- wq_update_unbound_numa (wq , tcpu , cpu , false);
5478
+ wq_update_pod (wq , tcpu , cpu , false);
5486
5479
}
5487
5480
}
5488
5481
}
@@ -6263,7 +6256,7 @@ static inline void wq_watchdog_init(void) { }
6263
6256
6264
6257
#endif /* CONFIG_WQ_WATCHDOG */
6265
6258
6266
- static void __init wq_numa_init (void )
6259
+ static void __init wq_pod_init (void )
6267
6260
{
6268
6261
cpumask_var_t * tbl ;
6269
6262
int node , cpu ;
@@ -6278,8 +6271,8 @@ static void __init wq_numa_init(void)
6278
6271
}
6279
6272
}
6280
6273
6281
- wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs ();
6282
- BUG_ON (!wq_update_unbound_numa_attrs_buf );
6274
+ wq_update_pod_attrs_buf = alloc_workqueue_attrs ();
6275
+ BUG_ON (!wq_update_pod_attrs_buf );
6283
6276
6284
6277
/*
6285
6278
* We want masks of possible CPUs of each node which isn't readily
@@ -6298,8 +6291,8 @@ static void __init wq_numa_init(void)
6298
6291
cpumask_set_cpu (cpu , tbl [node ]);
6299
6292
}
6300
6293
6301
- wq_numa_possible_cpumask = tbl ;
6302
- wq_numa_enabled = true;
6294
+ wq_pod_cpus = tbl ;
6295
+ wq_pod_enabled = true;
6303
6296
}
6304
6297
6305
6298
/**
@@ -6440,15 +6433,14 @@ void __init workqueue_init(void)
6440
6433
wq_cpu_intensive_thresh_init ();
6441
6434
6442
6435
/*
6443
- * It'd be simpler to initialize NUMA in workqueue_init_early() but
6444
- * CPU to node mapping may not be available that early on some
6445
- * archs such as power and arm64. As per-cpu pools created
6446
- * previously could be missing node hint and unbound pools NUMA
6447
- * affinity, fix them up.
6436
+ * It'd be simpler to initialize pods in workqueue_init_early() but CPU
6437
+ * to node mapping may not be available that early on some archs such as
6438
+ * power and arm64. As per-cpu pools created previously could be missing
6439
+ * node hint and unbound pool pod affinity, fix them up.
6448
6440
*
6449
6441
* Also, while iterating workqueues, create rescuers if requested.
6450
6442
*/
6451
- wq_numa_init ();
6443
+ wq_pod_init ();
6452
6444
6453
6445
mutex_lock (& wq_pool_mutex );
6454
6446
@@ -6459,8 +6451,7 @@ void __init workqueue_init(void)
6459
6451
}
6460
6452
6461
6453
list_for_each_entry (wq , & workqueues , list ) {
6462
- wq_update_unbound_numa (wq , smp_processor_id (), smp_processor_id (),
6463
- true);
6454
+ wq_update_pod (wq , smp_processor_id (), smp_processor_id (), true);
6464
6455
WARN (init_rescuer (wq ),
6465
6456
"workqueue: failed to create early rescuer for %s" ,
6466
6457
wq -> name );
0 commit comments