Skip to content

Commit db3a34e

Browse files
paulmckrcuKAGA-KOKO
authored andcommitted
clocksource: Retry clock read if long delays detected
When the clocksource watchdog marks a clock as unstable, this might be due to that clock being unstable or it might be due to delays that happen to occur between the reads of the two clocks. Yes, interrupts are disabled across those two reads, but there are no shortage of things that can delay interrupts-disabled regions of code ranging from SMI handlers to vCPU preemption. It would be good to have some indication as to why the clock was marked unstable. Therefore, re-read the watchdog clock on either side of the read from the clock under test. If the watchdog clock shows an excessive time delta between its pair of reads, the reads are retried. The maximum number of retries is specified by a new kernel boot parameter clocksource.max_cswd_read_retries, which defaults to three, that is, up to four reads, one initial and up to three retries. If more than one retry was required, a message is printed on the console (the occasional single retry is expected behavior, especially in guest OSes). If the maximum number of retries is exceeded, the clock under test will be marked unstable. However, the probability of this happening due to various sorts of delays is quite small. In addition, the reason (clock-read delays) for the unstable marking will be apparent. Reported-by: Chris Mason <[email protected]> Signed-off-by: Paul E. McKenney <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Acked-by: Feng Tang <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 64ab707 commit db3a34e

File tree

2 files changed

+53
-6
lines changed

2 files changed

+53
-6
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,12 @@
581581
loops can be debugged more effectively on production
582582
systems.
583583

584+
clocksource.max_cswd_read_retries= [KNL]
585+
Number of clocksource_watchdog() retries due to
586+
external delays before the clock will be marked
587+
unstable. Defaults to three retries, that is,
588+
four attempts to read the clock under test.
589+
584590
clearcpuid=BITNUM[,BITNUM...] [X86]
585591
Disable CPUID feature X for the kernel. See
586592
arch/x86/include/asm/cpufeatures.h for the valid bit

kernel/time/clocksource.c

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,13 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating);
124124
#define WATCHDOG_INTERVAL (HZ >> 1)
125125
#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
126126

127+
/*
128+
* Maximum permissible delay between two readouts of the watchdog
129+
* clocksource surrounding a read of the clocksource being validated.
130+
* This delay could be due to SMIs, NMIs, or to VCPU preemptions.
131+
*/
132+
#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC)
133+
127134
static void clocksource_watchdog_work(struct work_struct *work)
128135
{
129136
/*
@@ -184,12 +191,45 @@ void clocksource_mark_unstable(struct clocksource *cs)
184191
spin_unlock_irqrestore(&watchdog_lock, flags);
185192
}
186193

194+
static ulong max_cswd_read_retries = 3;
195+
module_param(max_cswd_read_retries, ulong, 0644);
196+
197+
static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
198+
{
199+
unsigned int nretries;
200+
u64 wd_end, wd_delta;
201+
int64_t wd_delay;
202+
203+
for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) {
204+
local_irq_disable();
205+
*wdnow = watchdog->read(watchdog);
206+
*csnow = cs->read(cs);
207+
wd_end = watchdog->read(watchdog);
208+
local_irq_enable();
209+
210+
wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask);
211+
wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult,
212+
watchdog->shift);
213+
if (wd_delay <= WATCHDOG_MAX_SKEW) {
214+
if (nretries > 1 || nretries >= max_cswd_read_retries) {
215+
pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
216+
smp_processor_id(), watchdog->name, nretries);
217+
}
218+
return true;
219+
}
220+
}
221+
222+
pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, marking unstable\n",
223+
smp_processor_id(), watchdog->name, wd_delay, nretries);
224+
return false;
225+
}
226+
187227
static void clocksource_watchdog(struct timer_list *unused)
188228
{
189-
struct clocksource *cs;
190229
u64 csnow, wdnow, cslast, wdlast, delta;
191-
int64_t wd_nsec, cs_nsec;
192230
int next_cpu, reset_pending;
231+
int64_t wd_nsec, cs_nsec;
232+
struct clocksource *cs;
193233

194234
spin_lock(&watchdog_lock);
195235
if (!watchdog_running)
@@ -206,10 +246,11 @@ static void clocksource_watchdog(struct timer_list *unused)
206246
continue;
207247
}
208248

209-
local_irq_disable();
210-
csnow = cs->read(cs);
211-
wdnow = watchdog->read(watchdog);
212-
local_irq_enable();
249+
if (!cs_watchdog_read(cs, &csnow, &wdnow)) {
250+
/* Clock readout unreliable, so give it up. */
251+
__clocksource_unstable(cs);
252+
continue;
253+
}
213254

214255
/* Clocksource initialized ? */
215256
if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||

0 commit comments

Comments
 (0)