Skip to content

Commit b4af279

Browse files
vipinparasharmpe
authored andcommitted
powerpc/pseries: Limit EPOW reset event warnings
Kernel prints respective warnings about various EPOW events for user information/action after parsing EPOW interrupts. At times below EPOW reset event warning is seen to be flooding kernel log over a period of time. May 25 03:46:34 alp kernel: Non critical power or cooling issue cleared May 25 03:46:52 alp kernel: Non critical power or cooling issue cleared May 25 03:53:48 alp kernel: Non critical power or cooling issue cleared May 25 03:55:46 alp kernel: Non critical power or cooling issue cleared May 25 03:56:34 alp kernel: Non critical power or cooling issue cleared May 25 03:59:04 alp kernel: Non critical power or cooling issue cleared May 25 04:02:01 alp kernel: Non critical power or cooling issue cleared These EPOW reset events are spurious in nature and are triggered by firmware without an actual EPOW event being reset. This patch avoids these multiple EPOW reset warnings by using a counter variable. This variable is incremented every time an EPOW event is reported. Upon receiving a EPOW reset event the same variable is checked to filter out spurious events and decremented accordingly. This patch also improves log messages to better describe EPOW event being reported. Merged adjacent log messages into single one to reduce number of lines printed per event. Signed-off-by: Kamalesh Babulal <[email protected]> Signed-off-by: Vipin K Parashar <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
1 parent a26f415 commit b4af279

File tree

1 file changed

+31
-24
lines changed
  • arch/powerpc/platforms/pseries

1 file changed

+31
-24
lines changed

arch/powerpc/platforms/pseries/ras.c

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ static int ras_check_exception_token;
4040
#define EPOW_SENSOR_TOKEN 9
4141
#define EPOW_SENSOR_INDEX 0
4242

43+
/* EPOW events counter variable */
44+
static int num_epow_events;
45+
4346
static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
4447
static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
4548

@@ -82,32 +85,30 @@ static void handle_system_shutdown(char event_modifier)
8285
{
8386
switch (event_modifier) {
8487
case EPOW_SHUTDOWN_NORMAL:
85-
pr_emerg("Firmware initiated power off");
88+
pr_emerg("Power off requested\n");
8689
orderly_poweroff(true);
8790
break;
8891

8992
case EPOW_SHUTDOWN_ON_UPS:
90-
pr_emerg("Loss of power reported by firmware, system is "
91-
"running on UPS/battery");
92-
pr_emerg("Check RTAS error log for details");
93+
pr_emerg("Loss of system power detected. System is running on"
94+
" UPS/battery. Check RTAS error log for details\n");
9395
orderly_poweroff(true);
9496
break;
9597

9698
case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
97-
pr_emerg("Loss of system critical functions reported by "
98-
"firmware");
99-
pr_emerg("Check RTAS error log for details");
99+
pr_emerg("Loss of system critical functions detected. Check"
100+
" RTAS error log for details\n");
100101
orderly_poweroff(true);
101102
break;
102103

103104
case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
104-
pr_emerg("Ambient temperature too high reported by firmware");
105-
pr_emerg("Check RTAS error log for details");
105+
pr_emerg("High ambient temperature detected. Check RTAS"
106+
" error log for details\n");
106107
orderly_poweroff(true);
107108
break;
108109

109110
default:
110-
pr_err("Unknown power/cooling shutdown event (modifier %d)",
111+
pr_err("Unknown power/cooling shutdown event (modifier = %d)\n",
111112
event_modifier);
112113
}
113114
}
@@ -145,41 +146,48 @@ static void rtas_parse_epow_errlog(struct rtas_error_log *log)
145146

146147
switch (action_code) {
147148
case EPOW_RESET:
148-
pr_err("Non critical power or cooling issue cleared");
149+
if (num_epow_events) {
150+
pr_info("Non critical power/cooling issue cleared\n");
151+
num_epow_events--;
152+
}
149153
break;
150154

151155
case EPOW_WARN_COOLING:
152-
pr_err("Non critical cooling issue reported by firmware");
153-
pr_err("Check RTAS error log for details");
156+
pr_info("Non-critical cooling issue detected. Check RTAS error"
157+
" log for details\n");
154158
break;
155159

156160
case EPOW_WARN_POWER:
157-
pr_err("Non critical power issue reported by firmware");
158-
pr_err("Check RTAS error log for details");
161+
pr_info("Non-critical power issue detected. Check RTAS error"
162+
" log for details\n");
159163
break;
160164

161165
case EPOW_SYSTEM_SHUTDOWN:
162166
handle_system_shutdown(epow_log->event_modifier);
163167
break;
164168

165169
case EPOW_SYSTEM_HALT:
166-
pr_emerg("Firmware initiated power off");
170+
pr_emerg("Critical power/cooling issue detected. Check RTAS"
171+
" error log for details. Powering off.\n");
167172
orderly_poweroff(true);
168173
break;
169174

170175
case EPOW_MAIN_ENCLOSURE:
171176
case EPOW_POWER_OFF:
172-
pr_emerg("Critical power/cooling issue reported by firmware");
173-
pr_emerg("Check RTAS error log for details");
174-
pr_emerg("Immediate power off");
177+
pr_emerg("System about to lose power. Check RTAS error log "
178+
" for details. Powering off immediately.\n");
175179
emergency_sync();
176180
kernel_power_off();
177181
break;
178182

179183
default:
180-
pr_err("Unknown power/cooling event (action code %d)",
184+
pr_err("Unknown power/cooling event (action code = %d)\n",
181185
action_code);
182186
}
187+
188+
/* Increment epow events counter variable */
189+
if (action_code != EPOW_RESET)
190+
num_epow_events++;
183191
}
184192

185193
/* Handle environmental and power warning (EPOW) interrupts. */
@@ -249,13 +257,12 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
249257
log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
250258

251259
if (fatal) {
252-
pr_emerg("Fatal hardware error reported by firmware");
253-
pr_emerg("Check RTAS error log for details");
254-
pr_emerg("Immediate power off");
260+
pr_emerg("Fatal hardware error detected. Check RTAS error"
261+
" log for details. Powering off immediately\n");
255262
emergency_sync();
256263
kernel_power_off();
257264
} else {
258-
pr_err("Recoverable hardware error reported by firmware");
265+
pr_err("Recoverable hardware error detected\n");
259266
}
260267

261268
spin_unlock(&ras_log_buf_lock);

0 commit comments

Comments
 (0)