Skip to content

Commit 030dcdd

Browse files
anna-marialxKAGA-KOKO
authored andcommitted
timers: Prepare support for PREEMPT_RT
When PREEMPT_RT is enabled, the soft interrupt thread can be preempted. If the soft interrupt thread is preempted in the middle of a timer callback, then calling del_timer_sync() can lead to two issues: - If the caller is on a remote CPU then it has to spin wait for the timer handler to complete. This can result in unbound priority inversion. - If the caller originates from the task which preempted the timer handler on the same CPU, then spin waiting for the timer handler to complete is never going to end. To avoid these issues, add a new lock to the timer base which is held around the execution of the timer callbacks. If del_timer_sync() detects that the timer callback is currently running, it blocks on the expiry lock. When the callback is finished, the expiry lock is dropped by the softirq thread which wakes up the waiter and the system makes progress. This addresses both the priority inversion and the life lock issues. This mechanism is not used for timers which are marked IRQSAFE as for those preemption is disabled accross the callback and therefore this situation cannot happen. The callbacks for such timers need to be individually audited for RT compliance. The same issue can happen in virtual machines when the vCPU which runs a timer callback is scheduled out. If a second vCPU of the same guest calls del_timer_sync() it will spin wait for the other vCPU to be scheduled back in. The expiry lock mechanism would avoid that. It'd be trivial to enable this when paravirt spinlocks are enabled in a guest, but it's not clear whether this is an actual problem in the wild, so for now it's an RT only mechanism. As the softirq thread can be preempted with PREEMPT_RT=y, the SMP variant of del_timer_sync() needs to be used on UP as well. [ tglx: Refactored it for mainline ] Signed-off-by: Anna-Maria Gleixner <[email protected]> Signed-off-by: Sebastian Andrzej Siewior <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Acked-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent f61eff8 commit 030dcdd

File tree

2 files changed

+96
-9
lines changed

2 files changed

+96
-9
lines changed

include/linux/timer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ extern void add_timer(struct timer_list *timer);
183183

184184
extern int try_to_del_timer_sync(struct timer_list *timer);
185185

186-
#ifdef CONFIG_SMP
186+
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
187187
extern int del_timer_sync(struct timer_list *timer);
188188
#else
189189
# define del_timer_sync(t) del_timer(t)

kernel/time/timer.c

Lines changed: 95 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,10 @@ EXPORT_SYMBOL(jiffies_64);
196196
struct timer_base {
197197
raw_spinlock_t lock;
198198
struct timer_list *running_timer;
199+
#ifdef CONFIG_PREEMPT_RT
200+
spinlock_t expiry_lock;
201+
atomic_t timer_waiters;
202+
#endif
199203
unsigned long clk;
200204
unsigned long next_expiry;
201205
unsigned int cpu;
@@ -1227,7 +1231,78 @@ int try_to_del_timer_sync(struct timer_list *timer)
12271231
}
12281232
EXPORT_SYMBOL(try_to_del_timer_sync);
12291233

1230-
#ifdef CONFIG_SMP
1234+
#ifdef CONFIG_PREEMPT_RT
1235+
static __init void timer_base_init_expiry_lock(struct timer_base *base)
1236+
{
1237+
spin_lock_init(&base->expiry_lock);
1238+
}
1239+
1240+
static inline void timer_base_lock_expiry(struct timer_base *base)
1241+
{
1242+
spin_lock(&base->expiry_lock);
1243+
}
1244+
1245+
static inline void timer_base_unlock_expiry(struct timer_base *base)
1246+
{
1247+
spin_unlock(&base->expiry_lock);
1248+
}
1249+
1250+
/*
1251+
* The counterpart to del_timer_wait_running().
1252+
*
1253+
* If there is a waiter for base->expiry_lock, then it was waiting for the
1254+
* timer callback to finish. Drop expiry_lock and reaquire it. That allows
1255+
* the waiter to acquire the lock and make progress.
1256+
*/
1257+
static void timer_sync_wait_running(struct timer_base *base)
1258+
{
1259+
if (atomic_read(&base->timer_waiters)) {
1260+
spin_unlock(&base->expiry_lock);
1261+
spin_lock(&base->expiry_lock);
1262+
}
1263+
}
1264+
1265+
/*
1266+
* This function is called on PREEMPT_RT kernels when the fast path
1267+
* deletion of a timer failed because the timer callback function was
1268+
* running.
1269+
*
1270+
* This prevents priority inversion, if the softirq thread on a remote CPU
1271+
* got preempted, and it prevents a life lock when the task which tries to
1272+
* delete a timer preempted the softirq thread running the timer callback
1273+
* function.
1274+
*/
1275+
static void del_timer_wait_running(struct timer_list *timer)
1276+
{
1277+
u32 tf;
1278+
1279+
tf = READ_ONCE(timer->flags);
1280+
if (!(tf & TIMER_MIGRATING)) {
1281+
struct timer_base *base = get_timer_base(tf);
1282+
1283+
/*
1284+
* Mark the base as contended and grab the expiry lock,
1285+
* which is held by the softirq across the timer
1286+
* callback. Drop the lock immediately so the softirq can
1287+
* expire the next timer. In theory the timer could already
1288+
* be running again, but that's more than unlikely and just
1289+
* causes another wait loop.
1290+
*/
1291+
atomic_inc(&base->timer_waiters);
1292+
spin_lock_bh(&base->expiry_lock);
1293+
atomic_dec(&base->timer_waiters);
1294+
spin_unlock_bh(&base->expiry_lock);
1295+
}
1296+
}
1297+
#else
1298+
static inline void timer_base_init_expiry_lock(struct timer_base *base) { }
1299+
static inline void timer_base_lock_expiry(struct timer_base *base) { }
1300+
static inline void timer_base_unlock_expiry(struct timer_base *base) { }
1301+
static inline void timer_sync_wait_running(struct timer_base *base) { }
1302+
static inline void del_timer_wait_running(struct timer_list *timer) { }
1303+
#endif
1304+
1305+
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
12311306
/**
12321307
* del_timer_sync - deactivate a timer and wait for the handler to finish.
12331308
* @timer: the timer to be deactivated
@@ -1266,6 +1341,8 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
12661341
*/
12671342
int del_timer_sync(struct timer_list *timer)
12681343
{
1344+
int ret;
1345+
12691346
#ifdef CONFIG_LOCKDEP
12701347
unsigned long flags;
12711348

@@ -1283,12 +1360,17 @@ int del_timer_sync(struct timer_list *timer)
12831360
* could lead to deadlock.
12841361
*/
12851362
WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
1286-
for (;;) {
1287-
int ret = try_to_del_timer_sync(timer);
1288-
if (ret >= 0)
1289-
return ret;
1290-
cpu_relax();
1291-
}
1363+
1364+
do {
1365+
ret = try_to_del_timer_sync(timer);
1366+
1367+
if (unlikely(ret < 0)) {
1368+
del_timer_wait_running(timer);
1369+
cpu_relax();
1370+
}
1371+
} while (ret < 0);
1372+
1373+
return ret;
12921374
}
12931375
EXPORT_SYMBOL(del_timer_sync);
12941376
#endif
@@ -1360,10 +1442,13 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
13601442
if (timer->flags & TIMER_IRQSAFE) {
13611443
raw_spin_unlock(&base->lock);
13621444
call_timer_fn(timer, fn, baseclk);
1445+
base->running_timer = NULL;
13631446
raw_spin_lock(&base->lock);
13641447
} else {
13651448
raw_spin_unlock_irq(&base->lock);
13661449
call_timer_fn(timer, fn, baseclk);
1450+
base->running_timer = NULL;
1451+
timer_sync_wait_running(base);
13671452
raw_spin_lock_irq(&base->lock);
13681453
}
13691454
}
@@ -1658,6 +1743,7 @@ static inline void __run_timers(struct timer_base *base)
16581743
if (!time_after_eq(jiffies, base->clk))
16591744
return;
16601745

1746+
timer_base_lock_expiry(base);
16611747
raw_spin_lock_irq(&base->lock);
16621748

16631749
/*
@@ -1684,8 +1770,8 @@ static inline void __run_timers(struct timer_base *base)
16841770
while (levels--)
16851771
expire_timers(base, heads + levels);
16861772
}
1687-
base->running_timer = NULL;
16881773
raw_spin_unlock_irq(&base->lock);
1774+
timer_base_unlock_expiry(base);
16891775
}
16901776

16911777
/*
@@ -1930,6 +2016,7 @@ static void __init init_timer_cpu(int cpu)
19302016
base->cpu = cpu;
19312017
raw_spin_lock_init(&base->lock);
19322018
base->clk = jiffies;
2019+
timer_base_init_expiry_lock(base);
19332020
}
19342021
}
19352022

0 commit comments

Comments
 (0)