Skip to content

Commit 53b7d2d

Browse files
committed
sched/topology: improve topology_span_sane speed
JIRA: https://issues.redhat.com/browse/RHEL-110301 commit f55dac1 Author: Steve Wahl <[email protected]> Date: Tue Mar 4 10:08:43 2025 -0600 sched/topology: improve topology_span_sane speed Use a different approach to topology_span_sane(), that checks for the same constraint of no partial overlaps for any two CPU sets for non-NUMA topology levels, but does so in a way that is O(N) rather than O(N^2). Instead of comparing with all other masks to detect collisions, keep one mask that includes all CPUs seen so far and detect collisions with a single cpumask_intersects test. If the current mask has no collisions with previously seen masks, it should be a new mask, which can be uniquely identified by the lowest bit set in this mask. Keep a pointer to this mask for future reference (in an array indexed by the lowest bit set), and add the CPUs in this mask to the list of those seen. If the current mask does collide with previously seen masks, it should be exactly equal to a mask seen before, looked up in the same array indexed by the lowest bit set in the mask, a single comparison. Move the topology_span_sane() check out of the existing topology level loop, let it use its own loop so that the array allocation can be done only once, shared across levels. On a system with 1920 processors (16 sockets, 60 cores, 2 threads), the average time to take one processor offline is reduced from 2.18 seconds to 1.01 seconds. (Off-lining 959 of 1920 processors took 34m49.765s without this change, 16m10.038s with this change in place.) Signed-off-by: Steve Wahl <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Valentin Schneider <[email protected]> Reviewed-by: Madadi Vineeth Reddy <[email protected]> Tested-by: K Prateek Nayak <[email protected]> Tested-by: Valentin Schneider <[email protected]> Tested-by: Madadi Vineeth Reddy <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Phil Auld <[email protected]>
1 parent 3335d6a commit 53b7d2d

File tree

1 file changed

+58
-25
lines changed

1 file changed

+58
-25
lines changed

kernel/sched/topology.c

Lines changed: 58 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2395,36 +2395,69 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
23952395

23962396
/*
23972397
* Ensure topology masks are sane, i.e. there are no conflicts (overlaps) for
2398-
* any two given CPUs at this (non-NUMA) topology level.
2398+
* any two given CPUs on non-NUMA topology levels.
23992399
*/
2400-
static bool topology_span_sane(struct sched_domain_topology_level *tl,
2401-
const struct cpumask *cpu_map, int cpu)
2400+
static bool topology_span_sane(const struct cpumask *cpu_map)
24022401
{
2403-
int i = cpu + 1;
2402+
struct sched_domain_topology_level *tl;
2403+
const struct cpumask **masks;
2404+
struct cpumask *covered;
2405+
int cpu, id;
2406+
bool ret = false;
24042407

2405-
/* NUMA levels are allowed to overlap */
2406-
if (tl->flags & SDTL_OVERLAP)
2407-
return true;
2408+
lockdep_assert_held(&sched_domains_mutex);
2409+
covered = sched_domains_tmpmask;
2410+
2411+
masks = kmalloc_array(nr_cpu_ids, sizeof(struct cpumask *), GFP_KERNEL);
2412+
if (!masks)
2413+
return ret;
2414+
2415+
for_each_sd_topology(tl) {
2416+
2417+
/* NUMA levels are allowed to overlap */
2418+
if (tl->flags & SDTL_OVERLAP)
2419+
continue;
2420+
2421+
cpumask_clear(covered);
2422+
memset(masks, 0, nr_cpu_ids * sizeof(struct cpumask *));
24082423

2409-
/*
2410-
* Non-NUMA levels cannot partially overlap - they must be either
2411-
* completely equal or completely disjoint. Otherwise we can end up
2412-
* breaking the sched_group lists - i.e. a later get_group() pass
2413-
* breaks the linking done for an earlier span.
2414-
*/
2415-
for_each_cpu_from(i, cpu_map) {
24162424
/*
2417-
* We should 'and' all those masks with 'cpu_map' to exactly
2418-
* match the topology we're about to build, but that can only
2419-
* remove CPUs, which only lessens our ability to detect
2420-
* overlaps
2425+
* Non-NUMA levels cannot partially overlap - they must be either
2426+
* completely equal or completely disjoint. Otherwise we can end up
2427+
* breaking the sched_group lists - i.e. a later get_group() pass
2428+
* breaks the linking done for an earlier span.
24212429
*/
2422-
if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) &&
2423-
cpumask_intersects(tl->mask(cpu), tl->mask(i)))
2424-
return false;
2430+
for_each_cpu(cpu, cpu_map) {
2431+
/* lowest bit set in this mask is used as a unique id */
2432+
id = cpumask_first(tl->mask(cpu));
2433+
2434+
/* zeroed masks cannot possibly collide */
2435+
if (id >= nr_cpu_ids)
2436+
continue;
2437+
2438+
/* if this mask doesn't collide with what we've already seen */
2439+
if (!cpumask_intersects(tl->mask(cpu), covered)) {
2440+
/* this failing would be an error in this algorithm */
2441+
if (WARN_ON(masks[id]))
2442+
goto notsane;
2443+
2444+
/* record the mask we saw for this id */
2445+
masks[id] = tl->mask(cpu);
2446+
cpumask_or(covered, tl->mask(cpu), covered);
2447+
} else if ((!masks[id]) || !cpumask_equal(masks[id], tl->mask(cpu))) {
2448+
/*
2449+
* a collision with covered should have exactly matched
2450+
* a previously seen mask with the same id
2451+
*/
2452+
goto notsane;
2453+
}
2454+
}
24252455
}
2456+
ret = true;
24262457

2427-
return true;
2458+
notsane:
2459+
kfree(masks);
2460+
return ret;
24282461
}
24292462

24302463
/*
@@ -2456,9 +2489,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
24562489
sd = NULL;
24572490
for_each_sd_topology(tl) {
24582491

2459-
if (WARN_ON(!topology_span_sane(tl, cpu_map, i)))
2460-
goto error;
2461-
24622492
sd = build_sched_domain(tl, cpu_map, attr, sd, i);
24632493

24642494
has_asym |= sd->flags & SD_ASYM_CPUCAPACITY;
@@ -2472,6 +2502,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
24722502
}
24732503
}
24742504

2505+
if (WARN_ON(!topology_span_sane(cpu_map)))
2506+
goto error;
2507+
24752508
/* Build the groups for the domains */
24762509
for_each_cpu(i, cpu_map) {
24772510
for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {

0 commit comments

Comments
 (0)