@@ -71,43 +71,110 @@ static int bnxt_hwrm_remote_dev_reset_set(struct bnxt *bp, bool remote_reset)
71
71
return hwrm_req_send (bp , req );
72
72
}
73
73
74
+ static char * bnxt_health_severity_str (enum bnxt_health_severity severity )
75
+ {
76
+ switch (severity ) {
77
+ case SEVERITY_NORMAL : return "normal" ;
78
+ case SEVERITY_WARNING : return "warning" ;
79
+ case SEVERITY_RECOVERABLE : return "recoverable" ;
80
+ case SEVERITY_FATAL : return "fatal" ;
81
+ default : return "unknown" ;
82
+ }
83
+ }
84
+
85
+ static char * bnxt_health_remedy_str (enum bnxt_health_remedy remedy )
86
+ {
87
+ switch (remedy ) {
88
+ case REMEDY_DEVLINK_RECOVER : return "devlink recover" ;
89
+ case REMEDY_POWER_CYCLE_DEVICE : return "device power cycle" ;
90
+ case REMEDY_POWER_CYCLE_HOST : return "host power cycle" ;
91
+ case REMEDY_FW_UPDATE : return "update firmware" ;
92
+ case REMEDY_HW_REPLACE : return "replace hardware" ;
93
+ default : return "unknown" ;
94
+ }
95
+ }
96
+
74
97
static int bnxt_fw_diagnose (struct devlink_health_reporter * reporter ,
75
98
struct devlink_fmsg * fmsg ,
76
99
struct netlink_ext_ack * extack )
77
100
{
78
101
struct bnxt * bp = devlink_health_reporter_priv (reporter );
79
- u32 val ;
102
+ struct bnxt_fw_health * h = bp -> fw_health ;
103
+ u32 fw_status , fw_resets ;
80
104
int rc ;
81
105
82
106
if (test_bit (BNXT_STATE_IN_FW_RESET , & bp -> state ))
83
- return 0 ;
107
+ return devlink_fmsg_string_pair_put ( fmsg , "Status" , "recovering" ) ;
84
108
85
- val = bnxt_fw_health_readl (bp , BNXT_FW_HEALTH_REG );
109
+ if (!h -> status_reliable )
110
+ return devlink_fmsg_string_pair_put (fmsg , "Status" , "unknown" );
86
111
87
- if (BNXT_FW_IS_BOOTING (val )) {
88
- rc = devlink_fmsg_string_pair_put (fmsg , "Description" ,
89
- "Not yet completed initialization" );
112
+ mutex_lock (& h -> lock );
113
+ fw_status = bnxt_fw_health_readl (bp , BNXT_FW_HEALTH_REG );
114
+ if (BNXT_FW_IS_BOOTING (fw_status )) {
115
+ rc = devlink_fmsg_string_pair_put (fmsg , "Status" , "initializing" );
90
116
if (rc )
91
- return rc ;
92
- } else if (BNXT_FW_IS_ERR (val )) {
93
- rc = devlink_fmsg_string_pair_put (fmsg , "Description" ,
94
- "Encountered fatal error and cannot recover" );
117
+ goto unlock ;
118
+ } else if (h -> severity || fw_status != BNXT_FW_STATUS_HEALTHY ) {
119
+ if (!h -> severity ) {
120
+ h -> severity = SEVERITY_FATAL ;
121
+ h -> remedy = REMEDY_POWER_CYCLE_DEVICE ;
122
+ h -> diagnoses ++ ;
123
+ devlink_health_report (h -> fw_reporter ,
124
+ "FW error diagnosed" , h );
125
+ }
126
+ rc = devlink_fmsg_string_pair_put (fmsg , "Status" , "error" );
95
127
if (rc )
96
- return rc ;
128
+ goto unlock ;
129
+ rc = devlink_fmsg_u32_pair_put (fmsg , "Syndrome" , fw_status );
130
+ if (rc )
131
+ goto unlock ;
132
+ } else {
133
+ rc = devlink_fmsg_string_pair_put (fmsg , "Status" , "healthy" );
134
+ if (rc )
135
+ goto unlock ;
97
136
}
98
137
99
- if (val >> 16 ) {
100
- rc = devlink_fmsg_u32_pair_put (fmsg , "Error code" , val >> 16 );
138
+ rc = devlink_fmsg_string_pair_put (fmsg , "Severity" ,
139
+ bnxt_health_severity_str (h -> severity ));
140
+ if (rc )
141
+ goto unlock ;
142
+
143
+ if (h -> severity ) {
144
+ rc = devlink_fmsg_string_pair_put (fmsg , "Remedy" ,
145
+ bnxt_health_remedy_str (h -> remedy ));
101
146
if (rc )
102
- return rc ;
147
+ goto unlock ;
148
+ if (h -> remedy == REMEDY_DEVLINK_RECOVER ) {
149
+ rc = devlink_fmsg_string_pair_put (fmsg , "Impact" ,
150
+ "traffic+ntuple_cfg" );
151
+ if (rc )
152
+ goto unlock ;
153
+ }
103
154
}
104
155
105
- val = bnxt_fw_health_readl ( bp , BNXT_FW_RESET_CNT_REG );
106
- rc = devlink_fmsg_u32_pair_put ( fmsg , "Reset count" , val );
107
- if (rc )
156
+ unlock :
157
+ mutex_unlock ( & h -> lock );
158
+ if (rc || ! h -> resets_reliable )
108
159
return rc ;
109
160
110
- return 0 ;
161
+ fw_resets = bnxt_fw_health_readl (bp , BNXT_FW_RESET_CNT_REG );
162
+ rc = devlink_fmsg_u32_pair_put (fmsg , "Resets" , fw_resets );
163
+ if (rc )
164
+ return rc ;
165
+ rc = devlink_fmsg_u32_pair_put (fmsg , "Arrests" , h -> arrests );
166
+ if (rc )
167
+ return rc ;
168
+ rc = devlink_fmsg_u32_pair_put (fmsg , "Survivals" , h -> survivals );
169
+ if (rc )
170
+ return rc ;
171
+ rc = devlink_fmsg_u32_pair_put (fmsg , "Discoveries" , h -> discoveries );
172
+ if (rc )
173
+ return rc ;
174
+ rc = devlink_fmsg_u32_pair_put (fmsg , "Fatalities" , h -> fatalities );
175
+ if (rc )
176
+ return rc ;
177
+ return devlink_fmsg_u32_pair_put (fmsg , "Diagnoses" , h -> diagnoses );
111
178
}
112
179
113
180
static int bnxt_fw_recover (struct devlink_health_reporter * reporter ,
@@ -116,6 +183,9 @@ static int bnxt_fw_recover(struct devlink_health_reporter *reporter,
116
183
{
117
184
struct bnxt * bp = devlink_health_reporter_priv (reporter );
118
185
186
+ if (bp -> fw_health -> severity == SEVERITY_FATAL )
187
+ return - ENODEV ;
188
+
119
189
set_bit (BNXT_STATE_RECOVER , & bp -> state );
120
190
__bnxt_fw_recover (bp );
121
191
@@ -165,6 +235,7 @@ void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all)
165
235
void bnxt_devlink_health_fw_report (struct bnxt * bp )
166
236
{
167
237
struct bnxt_fw_health * fw_health = bp -> fw_health ;
238
+ int rc ;
168
239
169
240
if (!fw_health )
170
241
return ;
@@ -174,20 +245,32 @@ void bnxt_devlink_health_fw_report(struct bnxt *bp)
174
245
return ;
175
246
}
176
247
177
- devlink_health_report (fw_health -> fw_reporter , "FW error reported" , NULL );
248
+ mutex_lock (& fw_health -> lock );
249
+ fw_health -> severity = SEVERITY_RECOVERABLE ;
250
+ fw_health -> remedy = REMEDY_DEVLINK_RECOVER ;
251
+ mutex_unlock (& fw_health -> lock );
252
+ rc = devlink_health_report (fw_health -> fw_reporter , "FW error reported" ,
253
+ fw_health );
254
+ if (rc == - ECANCELED )
255
+ __bnxt_fw_recover (bp );
178
256
}
179
257
180
258
void bnxt_dl_health_fw_status_update (struct bnxt * bp , bool healthy )
181
259
{
182
- struct bnxt_fw_health * health = bp -> fw_health ;
260
+ struct bnxt_fw_health * fw_health = bp -> fw_health ;
183
261
u8 state ;
184
262
185
- if (healthy )
263
+ mutex_lock (& fw_health -> lock );
264
+ if (healthy ) {
265
+ fw_health -> severity = SEVERITY_NORMAL ;
186
266
state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ;
187
- else
267
+ } else {
268
+ fw_health -> severity = SEVERITY_FATAL ;
269
+ fw_health -> remedy = REMEDY_POWER_CYCLE_DEVICE ;
188
270
state = DEVLINK_HEALTH_REPORTER_STATE_ERROR ;
189
-
190
- devlink_health_reporter_state_update (health -> fw_reporter , state );
271
+ }
272
+ mutex_unlock (& fw_health -> lock );
273
+ devlink_health_reporter_state_update (fw_health -> fw_reporter , state );
191
274
}
192
275
193
276
void bnxt_dl_health_fw_recovery_done (struct bnxt * bp )
0 commit comments