Skip to content

Commit 69a0831

Browse files
KAGA-KOKOSebastian Andrzej Siewior
authored andcommitted
sched: Distangle worker accounting from rqlock
The worker accounting for cpu bound workers is plugged into the core scheduler code and the wakeup code. This is not a hard requirement and can be avoided by keeping track of the state in the workqueue code itself. Keep track of the sleeping state in the worker itself and call the notifier before entering the core scheduler. There might be false positives when the task is woken between that call and actually scheduling, but that's not really different from scheduling and being woken immediately after switching away. There is also no harm from updating nr_running when the task returns from scheduling instead of accounting it in the wakeup code. Signed-off-by: Thomas Gleixner <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Tejun Heo <[email protected]> Cc: Jens Axboe <[email protected]> Cc: Linus Torvalds <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Thomas Gleixner <[email protected]>
1 parent 205b60d commit 69a0831

File tree

3 files changed

+41
-97
lines changed

3 files changed

+41
-97
lines changed

kernel/sched/core.c

Lines changed: 15 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1711,10 +1711,6 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl
17111711
{
17121712
activate_task(rq, p, en_flags);
17131713
p->on_rq = TASK_ON_RQ_QUEUED;
1714-
1715-
/* if a worker is waking up, notify workqueue */
1716-
if (p->flags & PF_WQ_WORKER)
1717-
wq_worker_waking_up(p, cpu_of(rq));
17181714
}
17191715

17201716
/*
@@ -2151,53 +2147,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
21512147
return success;
21522148
}
21532149

2154-
/**
2155-
* try_to_wake_up_local - try to wake up a local task with rq lock held
2156-
* @p: the thread to be awakened
2157-
* @cookie: context's cookie for pinning
2158-
*
2159-
* Put @p on the run-queue if it's not already there. The caller must
2160-
* ensure that this_rq() is locked, @p is bound to this_rq() and not
2161-
* the current task.
2162-
*/
2163-
static void try_to_wake_up_local(struct task_struct *p, struct pin_cookie cookie)
2164-
{
2165-
struct rq *rq = task_rq(p);
2166-
2167-
if (WARN_ON_ONCE(rq != this_rq()) ||
2168-
WARN_ON_ONCE(p == current))
2169-
return;
2170-
2171-
lockdep_assert_held(&rq->lock);
2172-
2173-
if (!raw_spin_trylock(&p->pi_lock)) {
2174-
/*
2175-
* This is OK, because current is on_cpu, which avoids it being
2176-
* picked for load-balance and preemption/IRQs are still
2177-
* disabled avoiding further scheduler activity on it and we've
2178-
* not yet picked a replacement task.
2179-
*/
2180-
lockdep_unpin_lock(&rq->lock, cookie);
2181-
raw_spin_unlock(&rq->lock);
2182-
raw_spin_lock(&p->pi_lock);
2183-
raw_spin_lock(&rq->lock);
2184-
lockdep_repin_lock(&rq->lock, cookie);
2185-
}
2186-
2187-
if (!(p->state & TASK_NORMAL))
2188-
goto out;
2189-
2190-
trace_sched_waking(p);
2191-
2192-
if (!task_on_rq_queued(p))
2193-
ttwu_activate(rq, p, ENQUEUE_WAKEUP);
2194-
2195-
ttwu_do_wakeup(rq, p, 0, cookie);
2196-
ttwu_stat(p, smp_processor_id(), 0);
2197-
out:
2198-
raw_spin_unlock(&p->pi_lock);
2199-
}
2200-
22012150
/**
22022151
* wake_up_process - Wake up a specific process
22032152
* @p: The process to be woken up.
@@ -3494,21 +3443,6 @@ static void __sched notrace __schedule(bool preempt)
34943443
} else {
34953444
deactivate_task(rq, prev, DEQUEUE_SLEEP);
34963445
prev->on_rq = 0;
3497-
3498-
/*
3499-
* If a worker went to sleep, notify and ask workqueue
3500-
* whether it wants to wake up a task to maintain
3501-
* concurrency.
3502-
* Only call wake up if prev isn't blocked on a sleeping
3503-
* spin lock.
3504-
*/
3505-
if (prev->flags & PF_WQ_WORKER && !prev->saved_state) {
3506-
struct task_struct *to_wakeup;
3507-
3508-
to_wakeup = wq_worker_sleeping(prev);
3509-
if (to_wakeup)
3510-
try_to_wake_up_local(to_wakeup, cookie);
3511-
}
35123446
}
35133447
switch_count = &prev->nvcsw;
35143448
}
@@ -3567,6 +3501,14 @@ static inline void sched_submit_work(struct task_struct *tsk)
35673501
{
35683502
if (!tsk->state || tsk_is_pi_blocked(tsk))
35693503
return;
3504+
3505+
/*
3506+
* If a worker went to sleep, notify and ask workqueue whether
3507+
* it wants to wake up a task to maintain concurrency.
3508+
*/
3509+
if (tsk->flags & PF_WQ_WORKER)
3510+
wq_worker_sleeping(tsk);
3511+
35703512
/*
35713513
* If we are going to sleep and we have plugged IO queued,
35723514
* make sure to submit it to avoid deadlocks.
@@ -3575,6 +3517,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
35753517
blk_schedule_flush_plug(tsk);
35763518
}
35773519

3520+
static void sched_update_worker(struct task_struct *tsk)
3521+
{
3522+
if (tsk->flags & PF_WQ_WORKER)
3523+
wq_worker_running(tsk);
3524+
}
3525+
35783526
asmlinkage __visible void __sched schedule(void)
35793527
{
35803528
struct task_struct *tsk = current;
@@ -3585,6 +3533,7 @@ asmlinkage __visible void __sched schedule(void)
35853533
__schedule(false);
35863534
sched_preempt_enable_no_resched();
35873535
} while (need_resched());
3536+
sched_update_worker(tsk);
35883537
}
35893538
EXPORT_SYMBOL(schedule);
35903539

kernel/workqueue.c

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -841,43 +841,32 @@ static void wake_up_worker(struct worker_pool *pool)
841841
}
842842

843843
/**
844-
* wq_worker_waking_up - a worker is waking up
844+
* wq_worker_running - a worker is running again
845845
* @task: task waking up
846-
* @cpu: CPU @task is waking up to
847846
*
848-
* This function is called during try_to_wake_up() when a worker is
849-
* being awoken.
850-
*
851-
* CONTEXT:
852-
* spin_lock_irq(rq->lock)
847+
* This function is called when a worker returns from schedule()
853848
*/
854-
void wq_worker_waking_up(struct task_struct *task, int cpu)
849+
void wq_worker_running(struct task_struct *task)
855850
{
856851
struct worker *worker = kthread_data(task);
857852

858-
if (!(worker->flags & WORKER_NOT_RUNNING)) {
859-
WARN_ON_ONCE(worker->pool->cpu != cpu);
853+
if (!worker->sleeping)
854+
return;
855+
if (!(worker->flags & WORKER_NOT_RUNNING))
860856
atomic_inc(&worker->pool->nr_running);
861-
}
857+
worker->sleeping = 0;
862858
}
863859

864860
/**
865861
* wq_worker_sleeping - a worker is going to sleep
866862
* @task: task going to sleep
867863
*
868-
* This function is called during schedule() when a busy worker is
869-
* going to sleep. Worker on the same cpu can be woken up by
870-
* returning pointer to its task.
871-
*
872-
* CONTEXT:
873-
* spin_lock_irq(rq->lock)
874-
*
875-
* Return:
876-
* Worker task on @cpu to wake up, %NULL if none.
864+
* This function is called from schedule() when a busy worker is
865+
* going to sleep.
877866
*/
878-
struct task_struct *wq_worker_sleeping(struct task_struct *task)
867+
void wq_worker_sleeping(struct task_struct *task)
879868
{
880-
struct worker *worker = kthread_data(task), *to_wakeup = NULL;
869+
struct worker *next, *worker = kthread_data(task);
881870
struct worker_pool *pool;
882871

883872
/*
@@ -886,13 +875,15 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
886875
* checking NOT_RUNNING.
887876
*/
888877
if (worker->flags & WORKER_NOT_RUNNING)
889-
return NULL;
878+
return;
890879

891880
pool = worker->pool;
892881

893-
/* this can only happen on the local cpu */
894-
if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
895-
return NULL;
882+
if (WARN_ON_ONCE(worker->sleeping))
883+
return;
884+
885+
worker->sleeping = 1;
886+
spin_lock_irq(&pool->lock);
896887

897888
/*
898889
* The counterpart of the following dec_and_test, implied mb,
@@ -906,9 +897,12 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
906897
* lock is safe.
907898
*/
908899
if (atomic_dec_and_test(&pool->nr_running) &&
909-
!list_empty(&pool->worklist))
910-
to_wakeup = first_idle_worker(pool);
911-
return to_wakeup ? to_wakeup->task : NULL;
900+
!list_empty(&pool->worklist)) {
901+
next = first_idle_worker(pool);
902+
if (next)
903+
wake_up_process(next->task);
904+
}
905+
spin_unlock_irq(&pool->lock);
912906
}
913907

914908
/**

kernel/workqueue_internal.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ struct worker {
4343
unsigned long last_active; /* L: last active timestamp */
4444
unsigned int flags; /* X: flags */
4545
int id; /* I: worker id */
46+
int sleeping; /* None */
4647

4748
/*
4849
* Opaque string set with work_set_desc(). Printed out with task
@@ -68,7 +69,7 @@ static inline struct worker *current_wq_worker(void)
6869
* Scheduler hooks for concurrency managed workqueue. Only to be used from
6970
* sched/core.c and workqueue.c.
7071
*/
71-
void wq_worker_waking_up(struct task_struct *task, int cpu);
72-
struct task_struct *wq_worker_sleeping(struct task_struct *task);
72+
void wq_worker_running(struct task_struct *task);
73+
void wq_worker_sleeping(struct task_struct *task);
7374

7475
#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */

0 commit comments

Comments
 (0)