Skip to content

Commit 9eb07a7

Browse files
author
Mauro Carvalho Chehab
committed
edac: edac_mc_handle_error(): add an error_count parameter
In order to avoid loosing error events, it is desirable to group error events together and generate a single trace for several identical errors. The trace API already allows reporting multiple errors. Change the handle_error function to also allow that. The changes at the drivers were made by this small script: $file .=$_ while (<>); $file =~ s/(edac_mc_handle_error)\s*\(([^\,]+)\,([^\,]+)\,/$1($2,$3, 1,/g; print $file; Signed-off-by: Mauro Carvalho Chehab <[email protected]>
1 parent 03f7eae commit 9eb07a7

28 files changed

+104
-95
lines changed

drivers/edac/amd64_edac.c

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,7 +1046,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
10461046
if (!src_mci) {
10471047
amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
10481048
(unsigned long)sys_addr);
1049-
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
1049+
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
10501050
page, offset, syndrome,
10511051
-1, -1, -1,
10521052
"failed to map error addr to a node",
@@ -1057,7 +1057,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
10571057
/* Now map the sys_addr to a CSROW */
10581058
csrow = sys_addr_to_csrow(src_mci, sys_addr);
10591059
if (csrow < 0) {
1060-
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
1060+
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
10611061
page, offset, syndrome,
10621062
-1, -1, -1,
10631063
"failed to map error addr to a csrow",
@@ -1077,7 +1077,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
10771077
amd64_mc_warn(src_mci, "unknown syndrome 0x%04x - "
10781078
"possible error reporting race\n",
10791079
syndrome);
1080-
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
1080+
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
10811081
page, offset, syndrome,
10821082
csrow, -1, -1,
10831083
"unknown syndrome - possible error reporting race",
@@ -1096,7 +1096,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
10961096
channel = ((sys_addr & BIT(3)) != 0);
10971097
}
10981098

1099-
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci,
1099+
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci, 1,
11001100
page, offset, syndrome,
11011101
csrow, channel, -1,
11021102
"", "");
@@ -1608,7 +1608,7 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
16081608
csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);
16091609

16101610
if (csrow < 0) {
1611-
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
1611+
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
16121612
page, offset, syndrome,
16131613
-1, -1, -1,
16141614
"failed to map error addr to a csrow",
@@ -1624,7 +1624,7 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
16241624
if (dct_ganging_enabled(pvt))
16251625
chan = get_channel_from_ecc_syndrome(mci, syndrome);
16261626

1627-
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
1627+
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
16281628
page, offset, syndrome,
16291629
csrow, chan, -1,
16301630
"", "");
@@ -1909,7 +1909,7 @@ static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m)
19091909
/* Ensure that the Error Address is VALID */
19101910
if (!(m->status & MCI_STATUS_ADDRV)) {
19111911
amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
1912-
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
1912+
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
19131913
0, 0, 0,
19141914
-1, -1, -1,
19151915
"HW has no ERROR_ADDRESS available",
@@ -1937,7 +1937,7 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
19371937

19381938
if (!(m->status & MCI_STATUS_ADDRV)) {
19391939
amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
1940-
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
1940+
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
19411941
0, 0, 0,
19421942
-1, -1, -1,
19431943
"HW has no ERROR_ADDRESS available",
@@ -1956,7 +1956,7 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
19561956
if (!src_mci) {
19571957
amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n",
19581958
(unsigned long)sys_addr);
1959-
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
1959+
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
19601960
page, offset, 0,
19611961
-1, -1, -1,
19621962
"ERROR ADDRESS NOT mapped to a MC",
@@ -1970,13 +1970,13 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
19701970
if (csrow < 0) {
19711971
amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n",
19721972
(unsigned long)sys_addr);
1973-
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
1973+
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
19741974
page, offset, 0,
19751975
-1, -1, -1,
19761976
"ERROR ADDRESS NOT mapped to CS",
19771977
"");
19781978
} else {
1979-
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
1979+
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
19801980
page, offset, 0,
19811981
csrow, -1, -1,
19821982
"", "");

drivers/edac/amd76x_edac.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci,
145145

146146
if (handle_errors) {
147147
row = (info->ecc_mode_status >> 4) & 0xf;
148-
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
148+
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
149149
mci->csrows[row]->first_page, 0, 0,
150150
row, 0, -1,
151151
mci->ctl_name, "");
@@ -160,7 +160,7 @@ static int amd76x_process_error_info(struct mem_ctl_info *mci,
160160

161161
if (handle_errors) {
162162
row = info->ecc_mode_status & 0xf;
163-
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
163+
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
164164
mci->csrows[row]->first_page, 0, 0,
165165
row, 0, -1,
166166
mci->ctl_name, "");

drivers/edac/cell_edac.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ static void cell_edac_count_ce(struct mem_ctl_info *mci, int chan, u64 ar)
4848
syndrome = (ar & 0x000000001fe00000ul) >> 21;
4949

5050
/* TODO: Decoding of the error address */
51-
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
51+
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
5252
csrow->first_page + pfn, offset, syndrome,
5353
0, chan, -1, "", "");
5454
}
@@ -70,7 +70,7 @@ static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar)
7070
offset = address & ~PAGE_MASK;
7171

7272
/* TODO: Decoding of the error address */
73-
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
73+
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
7474
csrow->first_page + pfn, offset, 0,
7575
0, chan, -1, "", "");
7676
}

drivers/edac/cpc925_edac.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -554,15 +554,15 @@ static void cpc925_mc_check(struct mem_ctl_info *mci)
554554
if (apiexcp & CECC_EXCP_DETECTED) {
555555
cpc925_mc_printk(mci, KERN_INFO, "DRAM CECC Fault\n");
556556
channel = cpc925_mc_find_channel(mci, syndrome);
557-
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
557+
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
558558
pfn, offset, syndrome,
559559
csrow, channel, -1,
560560
mci->ctl_name, "");
561561
}
562562

563563
if (apiexcp & UECC_EXCP_DETECTED) {
564564
cpc925_mc_printk(mci, KERN_INFO, "DRAM UECC Fault\n");
565-
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
565+
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
566566
pfn, offset, 0,
567567
csrow, -1, -1,
568568
mci->ctl_name, "");

drivers/edac/e752x_edac.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ static void do_process_ce(struct mem_ctl_info *mci, u16 error_one,
371371
channel = !(error_one & 1);
372372

373373
/* e752x mc reads 34:6 of the DRAM linear address */
374-
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
374+
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
375375
page, offset_in_page(sec1_add << 4), sec1_syndrome,
376376
row, channel, -1,
377377
"e752x CE", "");
@@ -408,7 +408,7 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one,
408408
edac_mc_find_csrow_by_page(mci, block_page);
409409

410410
/* e752x mc reads 34:6 of the DRAM linear address */
411-
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
411+
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
412412
block_page,
413413
offset_in_page(error_2b << 4), 0,
414414
row, -1, -1,
@@ -427,7 +427,7 @@ static void do_process_ue(struct mem_ctl_info *mci, u16 error_one,
427427
edac_mc_find_csrow_by_page(mci, block_page);
428428

429429
/* e752x mc reads 34:6 of the DRAM linear address */
430-
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
430+
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
431431
block_page,
432432
offset_in_page(error_2b << 4), 0,
433433
row, -1, -1,
@@ -454,7 +454,7 @@ static inline void process_ue_no_info_wr(struct mem_ctl_info *mci,
454454
return;
455455

456456
edac_dbg(3, "\n");
457-
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0,
457+
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
458458
-1, -1, -1,
459459
"e752x UE log memory write", "");
460460
}

drivers/edac/e7xxx_edac.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -219,14 +219,14 @@ static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
219219
row = edac_mc_find_csrow_by_page(mci, page);
220220
/* convert syndrome to channel */
221221
channel = e7xxx_find_channel(syndrome);
222-
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, page, 0, syndrome,
222+
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, page, 0, syndrome,
223223
row, channel, -1, "e7xxx CE", "");
224224
}
225225

226226
static void process_ce_no_info(struct mem_ctl_info *mci)
227227
{
228228
edac_dbg(3, "\n");
229-
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0, -1, -1, -1,
229+
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1,
230230
"e7xxx CE log register overflow", "");
231231
}
232232

@@ -242,15 +242,15 @@ static void process_ue(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
242242
block_page = error_2b >> 6; /* convert to 4k address */
243243
row = edac_mc_find_csrow_by_page(mci, block_page);
244244

245-
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, block_page, 0, 0,
245+
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, block_page, 0, 0,
246246
row, -1, -1, "e7xxx UE", "");
247247
}
248248

249249
static void process_ue_no_info(struct mem_ctl_info *mci)
250250
{
251251
edac_dbg(3, "\n");
252252

253-
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 0, 0, 0, -1, -1, -1,
253+
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1,
254254
"e7xxx UE log register overflow", "");
255255
}
256256

drivers/edac/edac_core.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,7 @@ extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
455455
unsigned long page);
456456
void edac_mc_handle_error(const enum hw_event_mc_err_type type,
457457
struct mem_ctl_info *mci,
458+
const u16 error_count,
458459
const unsigned long page_frame_number,
459460
const unsigned long offset_in_page,
460461
const unsigned long syndrome,

0 commit comments

Comments
 (0)