Skip to content

Commit 3645402

Browse files
committed
sched_ext: Track tasks that are subjects of the in-flight SCX operation
When some SCX operations are in flight, it is known that the subject task's rq lock is held throughout which makes it safe to access certain fields of the task - e.g. its current task_group. We want to add SCX kfunc helpers that can make use of this guarantee - e.g. to help determining the currently associated CPU cgroup from the task's current task_group. As it'd be dangerous call such a helper on a task which isn't rq lock protected, the helper should be able to verify the input task and reject accordingly. This patch adds sched_ext_entity.kf_tasks[] that track the tasks which are currently being operated on by a terminal SCX operation. The new SCX_CALL_OP_[2]TASK[_RET]() can be used when invoking SCX operations which take tasks as arguments and the scx_kf_allowed_on_arg_tasks() can be used by kfunc helpers to verify the input task status. Note that as sched_ext_entity.kf_tasks[] can't handle nesting, the tracking is currently only limited to terminal SCX operations. If needed in the future, this restriction can be removed by moving the tracking to the task side with a couple per-task counters. v2: Updated to reflect the addition of SCX_KF_SELECT_CPU. Signed-off-by: Tejun Heo <[email protected]> Reviewed-by: David Vernet <[email protected]>
1 parent 22a9202 commit 3645402

File tree

2 files changed

+76
-17
lines changed

2 files changed

+76
-17
lines changed

include/linux/sched/ext.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ enum scx_kf_mask {
106106

107107
__SCX_KF_RQ_LOCKED = SCX_KF_DISPATCH |
108108
SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
109+
__SCX_KF_TERMINAL = SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
109110
};
110111

111112
/*
@@ -120,6 +121,7 @@ struct sched_ext_entity {
120121
s32 sticky_cpu;
121122
s32 holding_cpu;
122123
u32 kf_mask; /* see scx_kf_mask above */
124+
struct task_struct *kf_tasks[2]; /* see SCX_CALL_OP_TASK() */
123125
atomic_long_t ops_state;
124126

125127
struct list_head runnable_node; /* rq->scx.runnable_list */

kernel/sched/ext.c

Lines changed: 74 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -817,6 +817,47 @@ do { \
817817
__ret; \
818818
})
819819

820+
/*
821+
* Some kfuncs are allowed only on the tasks that are subjects of the
822+
* in-progress scx_ops operation for, e.g., locking guarantees. To enforce such
823+
* restrictions, the following SCX_CALL_OP_*() variants should be used when
824+
* invoking scx_ops operations that take task arguments. These can only be used
825+
* for non-nesting operations due to the way the tasks are tracked.
826+
*
827+
* kfuncs which can only operate on such tasks can in turn use
828+
* scx_kf_allowed_on_arg_tasks() to test whether the invocation is allowed on
829+
* the specific task.
830+
*/
831+
#define SCX_CALL_OP_TASK(mask, op, task, args...) \
832+
do { \
833+
BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \
834+
current->scx.kf_tasks[0] = task; \
835+
SCX_CALL_OP(mask, op, task, ##args); \
836+
current->scx.kf_tasks[0] = NULL; \
837+
} while (0)
838+
839+
#define SCX_CALL_OP_TASK_RET(mask, op, task, args...) \
840+
({ \
841+
__typeof__(scx_ops.op(task, ##args)) __ret; \
842+
BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \
843+
current->scx.kf_tasks[0] = task; \
844+
__ret = SCX_CALL_OP_RET(mask, op, task, ##args); \
845+
current->scx.kf_tasks[0] = NULL; \
846+
__ret; \
847+
})
848+
849+
#define SCX_CALL_OP_2TASKS_RET(mask, op, task0, task1, args...) \
850+
({ \
851+
__typeof__(scx_ops.op(task0, task1, ##args)) __ret; \
852+
BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \
853+
current->scx.kf_tasks[0] = task0; \
854+
current->scx.kf_tasks[1] = task1; \
855+
__ret = SCX_CALL_OP_RET(mask, op, task0, task1, ##args); \
856+
current->scx.kf_tasks[0] = NULL; \
857+
current->scx.kf_tasks[1] = NULL; \
858+
__ret; \
859+
})
860+
820861
/* @mask is constant, always inline to cull unnecessary branches */
821862
static __always_inline bool scx_kf_allowed(u32 mask)
822863
{
@@ -846,6 +887,22 @@ static __always_inline bool scx_kf_allowed(u32 mask)
846887
return true;
847888
}
848889

890+
/* see SCX_CALL_OP_TASK() */
891+
static __always_inline bool scx_kf_allowed_on_arg_tasks(u32 mask,
892+
struct task_struct *p)
893+
{
894+
if (!scx_kf_allowed(mask))
895+
return false;
896+
897+
if (unlikely((p != current->scx.kf_tasks[0] &&
898+
p != current->scx.kf_tasks[1]))) {
899+
scx_ops_error("called on a task not being operated on");
900+
return false;
901+
}
902+
903+
return true;
904+
}
905+
849906

850907
/*
851908
* SCX task iterator.
@@ -1342,7 +1399,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
13421399
WARN_ON_ONCE(*ddsp_taskp);
13431400
*ddsp_taskp = p;
13441401

1345-
SCX_CALL_OP(SCX_KF_ENQUEUE, enqueue, p, enq_flags);
1402+
SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, p, enq_flags);
13461403

13471404
*ddsp_taskp = NULL;
13481405
if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID)
@@ -1427,7 +1484,7 @@ static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags
14271484
add_nr_running(rq, 1);
14281485

14291486
if (SCX_HAS_OP(runnable))
1430-
SCX_CALL_OP(SCX_KF_REST, runnable, p, enq_flags);
1487+
SCX_CALL_OP_TASK(SCX_KF_REST, runnable, p, enq_flags);
14311488

14321489
do_enqueue_task(rq, p, enq_flags, sticky_cpu);
14331490
}
@@ -1453,7 +1510,7 @@ static void ops_dequeue(struct task_struct *p, u64 deq_flags)
14531510
BUG();
14541511
case SCX_OPSS_QUEUED:
14551512
if (SCX_HAS_OP(dequeue))
1456-
SCX_CALL_OP(SCX_KF_REST, dequeue, p, deq_flags);
1513+
SCX_CALL_OP_TASK(SCX_KF_REST, dequeue, p, deq_flags);
14571514

14581515
if (atomic_long_try_cmpxchg(&p->scx.ops_state, &opss,
14591516
SCX_OPSS_NONE))
@@ -1502,11 +1559,11 @@ static void dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
15021559
*/
15031560
if (SCX_HAS_OP(stopping) && task_current(rq, p)) {
15041561
update_curr_scx(rq);
1505-
SCX_CALL_OP(SCX_KF_REST, stopping, p, false);
1562+
SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, false);
15061563
}
15071564

15081565
if (SCX_HAS_OP(quiescent))
1509-
SCX_CALL_OP(SCX_KF_REST, quiescent, p, deq_flags);
1566+
SCX_CALL_OP_TASK(SCX_KF_REST, quiescent, p, deq_flags);
15101567

15111568
if (deq_flags & SCX_DEQ_SLEEP)
15121569
p->scx.flags |= SCX_TASK_DEQD_FOR_SLEEP;
@@ -1525,7 +1582,7 @@ static void yield_task_scx(struct rq *rq)
15251582
struct task_struct *p = rq->curr;
15261583

15271584
if (SCX_HAS_OP(yield))
1528-
SCX_CALL_OP_RET(SCX_KF_REST, yield, p, NULL);
1585+
SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, p, NULL);
15291586
else
15301587
p->scx.slice = 0;
15311588
}
@@ -1535,7 +1592,7 @@ static bool yield_to_task_scx(struct rq *rq, struct task_struct *to)
15351592
struct task_struct *from = rq->curr;
15361593

15371594
if (SCX_HAS_OP(yield))
1538-
return SCX_CALL_OP_RET(SCX_KF_REST, yield, from, to);
1595+
return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, from, to);
15391596
else
15401597
return false;
15411598
}
@@ -2091,7 +2148,7 @@ static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first)
20912148

20922149
/* see dequeue_task_scx() on why we skip when !QUEUED */
20932150
if (SCX_HAS_OP(running) && (p->scx.flags & SCX_TASK_QUEUED))
2094-
SCX_CALL_OP(SCX_KF_REST, running, p);
2151+
SCX_CALL_OP_TASK(SCX_KF_REST, running, p);
20952152

20962153
clr_task_runnable(p, true);
20972154

@@ -2155,7 +2212,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p)
21552212

21562213
/* see dequeue_task_scx() on why we skip when !QUEUED */
21572214
if (SCX_HAS_OP(stopping) && (p->scx.flags & SCX_TASK_QUEUED))
2158-
SCX_CALL_OP(SCX_KF_REST, stopping, p, true);
2215+
SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, true);
21592216

21602217
/*
21612218
* If we're being called from put_prev_task_balance(), balance_scx() may
@@ -2377,8 +2434,8 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
23772434
WARN_ON_ONCE(*ddsp_taskp);
23782435
*ddsp_taskp = p;
23792436

2380-
cpu = SCX_CALL_OP_RET(SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU,
2381-
select_cpu, p, prev_cpu, wake_flags);
2437+
cpu = SCX_CALL_OP_TASK_RET(SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU,
2438+
select_cpu, p, prev_cpu, wake_flags);
23822439
*ddsp_taskp = NULL;
23832440
if (ops_cpu_valid(cpu, "from ops.select_cpu()"))
23842441
return cpu;
@@ -2411,8 +2468,8 @@ static void set_cpus_allowed_scx(struct task_struct *p,
24112468
* designation pointless. Cast it away when calling the operation.
24122469
*/
24132470
if (SCX_HAS_OP(set_cpumask))
2414-
SCX_CALL_OP(SCX_KF_REST, set_cpumask, p,
2415-
(struct cpumask *)p->cpus_ptr);
2471+
SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p,
2472+
(struct cpumask *)p->cpus_ptr);
24162473
}
24172474

24182475
static void reset_idle_masks(void)
@@ -2647,7 +2704,7 @@ static void scx_ops_enable_task(struct task_struct *p)
26472704
*/
26482705
set_task_scx_weight(p);
26492706
if (SCX_HAS_OP(enable))
2650-
SCX_CALL_OP(SCX_KF_REST, enable, p);
2707+
SCX_CALL_OP_TASK(SCX_KF_REST, enable, p);
26512708
scx_set_task_state(p, SCX_TASK_ENABLED);
26522709

26532710
if (SCX_HAS_OP(set_weight))
@@ -2801,7 +2858,7 @@ static void reweight_task_scx(struct rq *rq, struct task_struct *p, int newprio)
28012858

28022859
set_task_scx_weight(p);
28032860
if (SCX_HAS_OP(set_weight))
2804-
SCX_CALL_OP(SCX_KF_REST, set_weight, p, p->scx.weight);
2861+
SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight);
28052862
}
28062863

28072864
static void prio_changed_scx(struct rq *rq, struct task_struct *p, int oldprio)
@@ -2817,8 +2874,8 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p)
28172874
* different scheduler class. Keep the BPF scheduler up-to-date.
28182875
*/
28192876
if (SCX_HAS_OP(set_cpumask))
2820-
SCX_CALL_OP(SCX_KF_REST, set_cpumask, p,
2821-
(struct cpumask *)p->cpus_ptr);
2877+
SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p,
2878+
(struct cpumask *)p->cpus_ptr);
28222879
}
28232880

28242881
static void switched_from_scx(struct rq *rq, struct task_struct *p)

0 commit comments

Comments
 (0)