Skip to content

Commit cd9c57c

Browse files
suryasaimadhuKAGA-KOKO
authored andcommitted
x86/MCE: Dump MCE to dmesg if no consumers
When there are no error record consumers registered with the kernel, the only thing that appears in dmesg is something like: [ 300.000326] mce: [Hardware Error]: Machine check events logged and the error records are gone. Which is seriously counterproductive. So let's dump them to dmesg instead, in such a case. Requested-by: Eric Morton <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Cc: Tony Luck <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Thomas Gleixner <[email protected]>
1 parent 8c203db commit cd9c57c

File tree

1 file changed

+46
-6
lines changed
  • arch/x86/kernel/cpu/mcheck

1 file changed

+46
-6
lines changed

arch/x86/kernel/cpu/mcheck/mce.c

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,12 @@ EXPORT_SYMBOL_GPL(mce_inject_log);
207207

208208
static struct notifier_block mce_srao_nb;
209209

210+
static atomic_t num_notifiers;
211+
210212
void mce_register_decode_chain(struct notifier_block *nb)
211213
{
214+
atomic_inc(&num_notifiers);
215+
212216
/* Ensure SRAO notifier has the highest priority in the decode chain. */
213217
if (nb != &mce_srao_nb && nb->priority == INT_MAX)
214218
nb->priority -= 1;
@@ -219,6 +223,8 @@ EXPORT_SYMBOL_GPL(mce_register_decode_chain);
219223

220224
void mce_unregister_decode_chain(struct notifier_block *nb)
221225
{
226+
atomic_dec(&num_notifiers);
227+
222228
atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
223229
}
224230
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
@@ -270,17 +276,17 @@ struct mca_msr_regs msr_ops = {
270276
.misc = misc_reg
271277
};
272278

273-
static void print_mce(struct mce *m)
279+
static void __print_mce(struct mce *m)
274280
{
275-
int ret = 0;
276-
277-
pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
278-
m->extcpu, m->mcgstatus, m->bank, m->status);
281+
pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
282+
m->extcpu,
283+
(m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
284+
m->mcgstatus, m->bank, m->status);
279285

280286
if (m->ip) {
281287
pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",
282288
!(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
283-
m->cs, m->ip);
289+
m->cs, m->ip);
284290

285291
if (m->cs == __KERNEL_CS)
286292
print_symbol("{%s}", m->ip);
@@ -308,6 +314,13 @@ static void print_mce(struct mce *m)
308314
pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
309315
m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
310316
cpu_data(m->extcpu).microcode);
317+
}
318+
319+
static void print_mce(struct mce *m)
320+
{
321+
int ret = 0;
322+
323+
__print_mce(m);
311324

312325
/*
313326
* Print out human-readable details about the MCE error,
@@ -569,6 +582,32 @@ static struct notifier_block mce_srao_nb = {
569582
.priority = INT_MAX,
570583
};
571584

585+
static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
586+
void *data)
587+
{
588+
struct mce *m = (struct mce *)data;
589+
590+
if (!m)
591+
return NOTIFY_DONE;
592+
593+
/*
594+
* Run the default notifier if we have only the SRAO
595+
* notifier and us registered.
596+
*/
597+
if (atomic_read(&num_notifiers) > 2)
598+
return NOTIFY_DONE;
599+
600+
__print_mce(m);
601+
602+
return NOTIFY_DONE;
603+
}
604+
605+
static struct notifier_block mce_default_nb = {
606+
.notifier_call = mce_default_notifier,
607+
/* lowest prio, we want it to run last. */
608+
.priority = 0,
609+
};
610+
572611
/*
573612
* Read ADDR and MISC registers.
574613
*/
@@ -2138,6 +2177,7 @@ int __init mcheck_init(void)
21382177
{
21392178
mcheck_intel_therm_init();
21402179
mce_register_decode_chain(&mce_srao_nb);
2180+
mce_register_decode_chain(&mce_default_nb);
21412181
mcheck_vendor_init_severity();
21422182

21432183
INIT_WORK(&mce_work, mce_process_work);

0 commit comments

Comments
 (0)