3535#include <linux/poll.h>
3636#include <linux/nmi.h>
3737#include <linux/cpu.h>
38+ #include <linux/ras.h>
3839#include <linux/smp.h>
3940#include <linux/fs.h>
4041#include <linux/mm.h>
@@ -160,47 +161,8 @@ static struct mce_log_buffer mcelog_buf = {
160161
161162void mce_log (struct mce * m )
162163{
163- unsigned next , entry ;
164-
165- /* Emit the trace record: */
166- trace_mce_record (m );
167-
168164 if (!mce_gen_pool_add (m ))
169165 irq_work_queue (& mce_irq_work );
170-
171- wmb ();
172- for (;;) {
173- entry = mce_log_get_idx_check (mcelog_buf .next );
174- for (;;) {
175-
176- /*
177- * When the buffer fills up discard new entries.
178- * Assume that the earlier errors are the more
179- * interesting ones:
180- */
181- if (entry >= MCE_LOG_LEN ) {
182- set_bit (MCE_OVERFLOW ,
183- (unsigned long * )& mcelog_buf .flags );
184- return ;
185- }
186- /* Old left over entry. Skip: */
187- if (mcelog_buf .entry [entry ].finished ) {
188- entry ++ ;
189- continue ;
190- }
191- break ;
192- }
193- smp_rmb ();
194- next = entry + 1 ;
195- if (cmpxchg (& mcelog_buf .next , entry , next ) == entry )
196- break ;
197- }
198- memcpy (mcelog_buf .entry + entry , m , sizeof (struct mce ));
199- wmb ();
200- mcelog_buf .entry [entry ].finished = 1 ;
201- wmb ();
202-
203- set_bit (0 , & mce_need_notify );
204166}
205167
206168void mce_inject_log (struct mce * m )
@@ -213,6 +175,12 @@ EXPORT_SYMBOL_GPL(mce_inject_log);
213175
214176static struct notifier_block mce_srao_nb ;
215177
178+ /*
179+ * We run the default notifier if we have only the SRAO, the first and the
180+ * default notifier registered. I.e., the mandatory NUM_DEFAULT_NOTIFIERS
181+ * notifiers registered on the chain.
182+ */
183+ #define NUM_DEFAULT_NOTIFIERS 3
216184static atomic_t num_notifiers ;
217185
218186void mce_register_decode_chain (struct notifier_block * nb )
@@ -522,7 +490,6 @@ static void mce_schedule_work(void)
522490
523491static void mce_irq_work_cb (struct irq_work * entry )
524492{
525- mce_notify_irq ();
526493 mce_schedule_work ();
527494}
528495
@@ -565,6 +532,111 @@ static int mce_usable_address(struct mce *m)
565532 return 1 ;
566533}
567534
535+ static bool memory_error (struct mce * m )
536+ {
537+ struct cpuinfo_x86 * c = & boot_cpu_data ;
538+
539+ if (c -> x86_vendor == X86_VENDOR_AMD ) {
540+ /* ErrCodeExt[20:16] */
541+ u8 xec = (m -> status >> 16 ) & 0x1f ;
542+
543+ return (xec == 0x0 || xec == 0x8 );
544+ } else if (c -> x86_vendor == X86_VENDOR_INTEL ) {
545+ /*
546+ * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
547+ *
548+ * Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
549+ * indicating a memory error. Bit 8 is used for indicating a
550+ * cache hierarchy error. The combination of bit 2 and bit 3
551+ * is used for indicating a `generic' cache hierarchy error
552+ * But we can't just blindly check the above bits, because if
553+ * bit 11 is set, then it is a bus/interconnect error - and
554+ * either way the above bits just gives more detail on what
555+ * bus/interconnect error happened. Note that bit 12 can be
556+ * ignored, as it's the "filter" bit.
557+ */
558+ return (m -> status & 0xef80 ) == BIT (7 ) ||
559+ (m -> status & 0xef00 ) == BIT (8 ) ||
560+ (m -> status & 0xeffc ) == 0xc ;
561+ }
562+
563+ return false;
564+ }
565+
566+ static bool cec_add_mce (struct mce * m )
567+ {
568+ if (!m )
569+ return false;
570+
571+ /* We eat only correctable DRAM errors with usable addresses. */
572+ if (memory_error (m ) &&
573+ !(m -> status & MCI_STATUS_UC ) &&
574+ mce_usable_address (m ))
575+ if (!cec_add_elem (m -> addr >> PAGE_SHIFT ))
576+ return true;
577+
578+ return false;
579+ }
580+
581+ static int mce_first_notifier (struct notifier_block * nb , unsigned long val ,
582+ void * data )
583+ {
584+ struct mce * m = (struct mce * )data ;
585+ unsigned int next , entry ;
586+
587+ if (!m )
588+ return NOTIFY_DONE ;
589+
590+ if (cec_add_mce (m ))
591+ return NOTIFY_STOP ;
592+
593+ /* Emit the trace record: */
594+ trace_mce_record (m );
595+
596+ wmb ();
597+ for (;;) {
598+ entry = mce_log_get_idx_check (mcelog_buf .next );
599+ for (;;) {
600+
601+ /*
602+ * When the buffer fills up discard new entries.
603+ * Assume that the earlier errors are the more
604+ * interesting ones:
605+ */
606+ if (entry >= MCE_LOG_LEN ) {
607+ set_bit (MCE_OVERFLOW ,
608+ (unsigned long * )& mcelog_buf .flags );
609+ return NOTIFY_DONE ;
610+ }
611+ /* Old left over entry. Skip: */
612+ if (mcelog_buf .entry [entry ].finished ) {
613+ entry ++ ;
614+ continue ;
615+ }
616+ break ;
617+ }
618+ smp_rmb ();
619+ next = entry + 1 ;
620+ if (cmpxchg (& mcelog_buf .next , entry , next ) == entry )
621+ break ;
622+ }
623+ memcpy (mcelog_buf .entry + entry , m , sizeof (struct mce ));
624+ wmb ();
625+ mcelog_buf .entry [entry ].finished = 1 ;
626+ wmb ();
627+
628+ set_bit (0 , & mce_need_notify );
629+
630+ mce_notify_irq ();
631+
632+ return NOTIFY_DONE ;
633+ }
634+
635+ static struct notifier_block first_nb = {
636+ .notifier_call = mce_first_notifier ,
637+ .priority = MCE_PRIO_FIRST ,
638+ };
639+
568640static int srao_decode_notifier (struct notifier_block * nb , unsigned long val ,
569641 void * data )
570642{
@@ -594,11 +666,7 @@ static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
594666 if (!m )
595667 return NOTIFY_DONE ;
596668
597- /*
598- * Run the default notifier if we have only the SRAO
599- * notifier and us registered.
600- */
601- if (atomic_read (& num_notifiers ) > 2 )
669+ if (atomic_read (& num_notifiers ) > NUM_DEFAULT_NOTIFIERS )
602670 return NOTIFY_DONE ;
603671
604672 /* Don't print when mcelog is running */
@@ -655,37 +723,6 @@ static void mce_read_aux(struct mce *m, int i)
655723 }
656724}
657725
658- static bool memory_error (struct mce * m )
659- {
660- struct cpuinfo_x86 * c = & boot_cpu_data ;
661-
662- if (c -> x86_vendor == X86_VENDOR_AMD ) {
663- /* ErrCodeExt[20:16] */
664- u8 xec = (m -> status >> 16 ) & 0x1f ;
665-
666- return (xec == 0x0 || xec == 0x8 );
667- } else if (c -> x86_vendor == X86_VENDOR_INTEL ) {
668- /*
669- * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
670- *
671- * Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
672- * indicating a memory error. Bit 8 is used for indicating a
673- * cache hierarchy error. The combination of bit 2 and bit 3
674- * is used for indicating a `generic' cache hierarchy error
675- * But we can't just blindly check the above bits, because if
676- * bit 11 is set, then it is a bus/interconnect error - and
677- * either way the above bits just gives more detail on what
678- * bus/interconnect error happened. Note that bit 12 can be
679- * ignored, as it's the "filter" bit.
680- */
681- return (m -> status & 0xef80 ) == BIT (7 ) ||
682- (m -> status & 0xef00 ) == BIT (8 ) ||
683- (m -> status & 0xeffc ) == 0xc ;
684- }
685-
686- return false;
687- }
688-
689726DEFINE_PER_CPU (unsigned , mce_poll_count );
690727
691728/*
@@ -2167,6 +2204,7 @@ __setup("mce", mcheck_enable);
21672204int __init mcheck_init (void )
21682205{
21692206 mcheck_intel_therm_init ();
2207+ mce_register_decode_chain (& first_nb );
21702208 mce_register_decode_chain (& mce_srao_nb );
21712209 mce_register_decode_chain (& mce_default_nb );
21722210 mcheck_vendor_init_severity ();
@@ -2716,6 +2754,7 @@ static int __init mcheck_late_init(void)
27162754 static_branch_inc (& mcsafe_key );
27172755
27182756 mcheck_debugfs_init ();
2757+ cec_init ();
27192758
27202759 /*
27212760 * Flush out everything that has been logged during early boot, now that
0 commit comments