|
1 | 1 | /* SPDX-License-Identifier: GPL-2.0 */
|
| 2 | +/* |
| 3 | + * Copyright (c) 2022 Meta Platforms, Inc. and affiliates. |
| 4 | + * Copyright (c) 2022 Tejun Heo <[email protected]> |
| 5 | + * Copyright (c) 2022 David Vernet <[email protected]> |
| 6 | + */ |
2 | 7 | #ifndef _LINUX_SCHED_EXT_H
|
3 | 8 | #define _LINUX_SCHED_EXT_H
|
4 | 9 |
|
5 | 10 | #ifdef CONFIG_SCHED_CLASS_EXT
|
6 |
| -#error "NOT IMPLEMENTED YET" |
| 11 | + |
| 12 | +#include <linux/llist.h> |
| 13 | +#include <linux/rhashtable-types.h> |
| 14 | + |
| 15 | +enum scx_public_consts { |
| 16 | + SCX_OPS_NAME_LEN = 128, |
| 17 | + |
| 18 | + SCX_SLICE_DFL = 20 * 1000000, /* 20ms */ |
| 19 | +}; |
| 20 | + |
| 21 | +/* |
| 22 | + * DSQ (dispatch queue) IDs are 64bit of the format: |
| 23 | + * |
| 24 | + * Bits: [63] [62 .. 0] |
| 25 | + * [ B] [ ID ] |
| 26 | + * |
| 27 | + * B: 1 for IDs for built-in DSQs, 0 for ops-created user DSQs |
| 28 | + * ID: 63 bit ID |
| 29 | + * |
| 30 | + * Built-in IDs: |
| 31 | + * |
| 32 | + * Bits: [63] [62] [61..32] [31 .. 0] |
| 33 | + * [ 1] [ L] [ R ] [ V ] |
| 34 | + * |
| 35 | + * 1: 1 for built-in DSQs. |
| 36 | + * L: 1 for LOCAL_ON DSQ IDs, 0 for others |
| 37 | + * V: For LOCAL_ON DSQ IDs, a CPU number. For others, a pre-defined value. |
| 38 | + */ |
| 39 | +enum scx_dsq_id_flags { |
| 40 | + SCX_DSQ_FLAG_BUILTIN = 1LLU << 63, |
| 41 | + SCX_DSQ_FLAG_LOCAL_ON = 1LLU << 62, |
| 42 | + |
| 43 | + SCX_DSQ_INVALID = SCX_DSQ_FLAG_BUILTIN | 0, |
| 44 | + SCX_DSQ_GLOBAL = SCX_DSQ_FLAG_BUILTIN | 1, |
| 45 | + SCX_DSQ_LOCAL = SCX_DSQ_FLAG_BUILTIN | 2, |
| 46 | + SCX_DSQ_LOCAL_ON = SCX_DSQ_FLAG_BUILTIN | SCX_DSQ_FLAG_LOCAL_ON, |
| 47 | + SCX_DSQ_LOCAL_CPU_MASK = 0xffffffffLLU, |
| 48 | +}; |
| 49 | + |
| 50 | +/* |
| 51 | + * Dispatch queue (dsq) is a simple FIFO which is used to buffer between the |
| 52 | + * scheduler core and the BPF scheduler. See the documentation for more details. |
| 53 | + */ |
| 54 | +struct scx_dispatch_q { |
| 55 | + raw_spinlock_t lock; |
| 56 | + struct list_head list; /* tasks in dispatch order */ |
| 57 | + u32 nr; |
| 58 | + u64 id; |
| 59 | + struct rhash_head hash_node; |
| 60 | + struct llist_node free_node; |
| 61 | + struct rcu_head rcu; |
| 62 | +}; |
| 63 | + |
| 64 | +/* scx_entity.flags */ |
| 65 | +enum scx_ent_flags { |
| 66 | + SCX_TASK_QUEUED = 1 << 0, /* on ext runqueue */ |
| 67 | + SCX_TASK_BAL_KEEP = 1 << 1, /* balance decided to keep current */ |
| 68 | + SCX_TASK_RESET_RUNNABLE_AT = 1 << 2, /* runnable_at should be reset */ |
| 69 | + SCX_TASK_DEQD_FOR_SLEEP = 1 << 3, /* last dequeue was for SLEEP */ |
| 70 | + |
| 71 | + SCX_TASK_STATE_SHIFT = 8, /* bit 8 and 9 are used to carry scx_task_state */ |
| 72 | + SCX_TASK_STATE_BITS = 2, |
| 73 | + SCX_TASK_STATE_MASK = ((1 << SCX_TASK_STATE_BITS) - 1) << SCX_TASK_STATE_SHIFT, |
| 74 | + |
| 75 | + SCX_TASK_CURSOR = 1 << 31, /* iteration cursor, not a task */ |
| 76 | +}; |
| 77 | + |
| 78 | +/* scx_entity.flags & SCX_TASK_STATE_MASK */ |
| 79 | +enum scx_task_state { |
| 80 | + SCX_TASK_NONE, /* ops.init_task() not called yet */ |
| 81 | + SCX_TASK_INIT, /* ops.init_task() succeeded, but task can be cancelled */ |
| 82 | + SCX_TASK_READY, /* fully initialized, but not in sched_ext */ |
| 83 | + SCX_TASK_ENABLED, /* fully initialized and in sched_ext */ |
| 84 | + |
| 85 | + SCX_TASK_NR_STATES, |
| 86 | +}; |
| 87 | + |
| 88 | +/* |
| 89 | + * Mask bits for scx_entity.kf_mask. Not all kfuncs can be called from |
| 90 | + * everywhere and the following bits track which kfunc sets are currently |
| 91 | + * allowed for %current. This simple per-task tracking works because SCX ops |
| 92 | + * nest in a limited way. BPF will likely implement a way to allow and disallow |
| 93 | + * kfuncs depending on the calling context which will replace this manual |
| 94 | + * mechanism. See scx_kf_allow(). |
| 95 | + */ |
| 96 | +enum scx_kf_mask { |
| 97 | + SCX_KF_UNLOCKED = 0, /* not sleepable, not rq locked */ |
| 98 | + /* all non-sleepables may be nested inside SLEEPABLE */ |
| 99 | + SCX_KF_SLEEPABLE = 1 << 0, /* sleepable init operations */ |
| 100 | + /* ops.dequeue (in REST) may be nested inside DISPATCH */ |
| 101 | + SCX_KF_DISPATCH = 1 << 2, /* ops.dispatch() */ |
| 102 | + SCX_KF_ENQUEUE = 1 << 3, /* ops.enqueue() and ops.select_cpu() */ |
| 103 | + SCX_KF_SELECT_CPU = 1 << 4, /* ops.select_cpu() */ |
| 104 | + SCX_KF_REST = 1 << 5, /* other rq-locked operations */ |
| 105 | + |
| 106 | + __SCX_KF_RQ_LOCKED = SCX_KF_DISPATCH | |
| 107 | + SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, |
| 108 | +}; |
| 109 | + |
| 110 | +/* |
| 111 | + * The following is embedded in task_struct and contains all fields necessary |
| 112 | + * for a task to be scheduled by SCX. |
| 113 | + */ |
| 114 | +struct sched_ext_entity { |
| 115 | + struct scx_dispatch_q *dsq; |
| 116 | + struct list_head dsq_node; |
| 117 | + u32 flags; /* protected by rq lock */ |
| 118 | + u32 weight; |
| 119 | + s32 sticky_cpu; |
| 120 | + s32 holding_cpu; |
| 121 | + u32 kf_mask; /* see scx_kf_mask above */ |
| 122 | + atomic_long_t ops_state; |
| 123 | + |
| 124 | + struct list_head runnable_node; /* rq->scx.runnable_list */ |
| 125 | + |
| 126 | + u64 ddsp_dsq_id; |
| 127 | + u64 ddsp_enq_flags; |
| 128 | + |
| 129 | + /* BPF scheduler modifiable fields */ |
| 130 | + |
| 131 | + /* |
| 132 | + * Runtime budget in nsecs. This is usually set through |
| 133 | + * scx_bpf_dispatch() but can also be modified directly by the BPF |
| 134 | + * scheduler. Automatically decreased by SCX as the task executes. On |
| 135 | + * depletion, a scheduling event is triggered. |
| 136 | + */ |
| 137 | + u64 slice; |
| 138 | + |
| 139 | + /* cold fields */ |
| 140 | + /* must be the last field, see init_scx_entity() */ |
| 141 | + struct list_head tasks_node; |
| 142 | +}; |
| 143 | + |
| 144 | +void sched_ext_free(struct task_struct *p); |
| 145 | + |
7 | 146 | #else /* !CONFIG_SCHED_CLASS_EXT */
|
8 | 147 |
|
9 | 148 | static inline void sched_ext_free(struct task_struct *p) {}
|
|
0 commit comments