Skip to content

Commit c2dbb09

Browse files
emuslndavem330
authored andcommitted
pds_core: health timer and workqueue
Add in the periodic health check and the related workqueue, as well as the handlers for when a FW reset is seen. The firmware is polled every 5 seconds to be sure that it is still alive and that the FW generation didn't change. The alive check looks to see that the PCI bus is still readable and the fw_status still has the RUNNING bit on. If not alive, the driver stops activity and tears things down. When the FW recovers and the alive check again succeeds, the driver sets back up for activity. The generation check looks at the fw_generation to see if it has changed, which can happen if the FW crashed and recovered or was updated in between health checks. If changed, the driver counts that as though the alive test failed and forces the fw_down/fw_up cycle. Signed-off-by: Shannon Nelson <[email protected]> Acked-by: Jakub Kicinski <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 523847d commit c2dbb09

File tree

4 files changed

+103
-0
lines changed

4 files changed

+103
-0
lines changed

drivers/net/ethernet/amd/pds_core/core.c

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,64 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing)
3434

3535
set_bit(PDSC_S_FW_DEAD, &pdsc->state);
3636
}
37+
38+
static void pdsc_fw_down(struct pdsc *pdsc)
39+
{
40+
if (test_and_set_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
41+
dev_err(pdsc->dev, "%s: already happening\n", __func__);
42+
return;
43+
}
44+
45+
pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
46+
}
47+
48+
static void pdsc_fw_up(struct pdsc *pdsc)
49+
{
50+
int err;
51+
52+
if (!test_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
53+
dev_err(pdsc->dev, "%s: fw not dead\n", __func__);
54+
return;
55+
}
56+
57+
err = pdsc_setup(pdsc, PDSC_SETUP_RECOVERY);
58+
if (err)
59+
goto err_out;
60+
61+
return;
62+
63+
err_out:
64+
pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
65+
}
66+
67+
void pdsc_health_thread(struct work_struct *work)
68+
{
69+
struct pdsc *pdsc = container_of(work, struct pdsc, health_work);
70+
unsigned long mask;
71+
bool healthy;
72+
73+
mutex_lock(&pdsc->config_lock);
74+
75+
/* Don't do a check when in a transition state */
76+
mask = BIT_ULL(PDSC_S_INITING_DRIVER) |
77+
BIT_ULL(PDSC_S_STOPPING_DRIVER);
78+
if (pdsc->state & mask)
79+
goto out_unlock;
80+
81+
healthy = pdsc_is_fw_good(pdsc);
82+
dev_dbg(pdsc->dev, "%s: health %d fw_status %#02x fw_heartbeat %d\n",
83+
__func__, healthy, pdsc->fw_status, pdsc->last_hb);
84+
85+
if (test_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
86+
if (healthy)
87+
pdsc_fw_up(pdsc);
88+
} else {
89+
if (!healthy)
90+
pdsc_fw_down(pdsc);
91+
}
92+
93+
pdsc->fw_generation = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION;
94+
95+
out_unlock:
96+
mutex_unlock(&pdsc->config_lock);
97+
}

drivers/net/ethernet/amd/pds_core/core.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
#include <linux/pds/pds_intr.h>
1313

1414
#define PDSC_DRV_DESCRIPTION "AMD/Pensando Core Driver"
15+
16+
#define PDSC_WATCHDOG_SECS 5
1517
#define PDSC_TEARDOWN_RECOVERY false
1618
#define PDSC_TEARDOWN_REMOVING true
1719
#define PDSC_SETUP_RECOVERY false
@@ -63,12 +65,17 @@ struct pdsc {
6365
u8 fw_generation;
6466
unsigned long last_fw_time;
6567
u32 last_hb;
68+
struct timer_list wdtimer;
69+
unsigned int wdtimer_period;
70+
struct work_struct health_work;
6671

6772
struct pdsc_devinfo dev_info;
6873
struct pds_core_dev_identity dev_ident;
6974
unsigned int nintrs;
7075
struct pdsc_intr_info *intr_info; /* array of nintrs elements */
7176

77+
struct workqueue_struct *wq;
78+
7279
unsigned int devcmd_timeout;
7380
struct mutex devcmd_lock; /* lock for dev_cmd operations */
7481
struct mutex config_lock; /* lock for configuration operations */
@@ -102,5 +109,6 @@ int pdsc_dev_init(struct pdsc *pdsc);
102109

103110
int pdsc_setup(struct pdsc *pdsc, bool init);
104111
void pdsc_teardown(struct pdsc *pdsc, bool removing);
112+
void pdsc_health_thread(struct work_struct *work);
105113

106114
#endif /* _PDSC_H_ */

drivers/net/ethernet/amd/pds_core/dev.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,9 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd,
177177
err = pdsc_devcmd_wait(pdsc, max_seconds);
178178
memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp));
179179

180+
if (err == -ENXIO || err == -ETIMEDOUT)
181+
queue_work(pdsc->wq, &pdsc->health_work);
182+
180183
return err;
181184
}
182185

drivers/net/ethernet/amd/pds_core/main.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,17 @@ static const struct pci_device_id pdsc_id_table[] = {
2020
};
2121
MODULE_DEVICE_TABLE(pci, pdsc_id_table);
2222

23+
static void pdsc_wdtimer_cb(struct timer_list *t)
24+
{
25+
struct pdsc *pdsc = from_timer(pdsc, t, wdtimer);
26+
27+
dev_dbg(pdsc->dev, "%s: jiffies %ld\n", __func__, jiffies);
28+
mod_timer(&pdsc->wdtimer,
29+
round_jiffies(jiffies + pdsc->wdtimer_period));
30+
31+
queue_work(pdsc->wq, &pdsc->health_work);
32+
}
33+
2334
static void pdsc_unmap_bars(struct pdsc *pdsc)
2435
{
2536
struct pdsc_dev_bar *bars = pdsc->bars;
@@ -119,8 +130,11 @@ static int pdsc_init_vf(struct pdsc *vf)
119130
return -1;
120131
}
121132

133+
#define PDSC_WQ_NAME_LEN 24
134+
122135
static int pdsc_init_pf(struct pdsc *pdsc)
123136
{
137+
char wq_name[PDSC_WQ_NAME_LEN];
124138
struct devlink *dl;
125139
int err;
126140

@@ -137,6 +151,13 @@ static int pdsc_init_pf(struct pdsc *pdsc)
137151
if (err)
138152
goto err_out_release_regions;
139153

154+
/* General workqueue and timer, but don't start timer yet */
155+
snprintf(wq_name, sizeof(wq_name), "%s.%d", PDS_CORE_DRV_NAME, pdsc->uid);
156+
pdsc->wq = create_singlethread_workqueue(wq_name);
157+
INIT_WORK(&pdsc->health_work, pdsc_health_thread);
158+
timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0);
159+
pdsc->wdtimer_period = PDSC_WATCHDOG_SECS * HZ;
160+
140161
mutex_init(&pdsc->devcmd_lock);
141162
mutex_init(&pdsc->config_lock);
142163

@@ -154,10 +175,16 @@ static int pdsc_init_pf(struct pdsc *pdsc)
154175
devl_register(dl);
155176
devl_unlock(dl);
156177

178+
/* Lastly, start the health check timer */
179+
mod_timer(&pdsc->wdtimer, round_jiffies(jiffies + pdsc->wdtimer_period));
180+
157181
return 0;
158182

159183
err_out_unmap_bars:
160184
mutex_unlock(&pdsc->config_lock);
185+
del_timer_sync(&pdsc->wdtimer);
186+
if (pdsc->wq)
187+
destroy_workqueue(pdsc->wq);
161188
mutex_destroy(&pdsc->config_lock);
162189
mutex_destroy(&pdsc->devcmd_lock);
163190
pci_free_irq_vectors(pdsc->pdev);
@@ -259,6 +286,10 @@ static void pdsc_remove(struct pci_dev *pdev)
259286
devl_unlock(dl);
260287

261288
if (!pdev->is_virtfn) {
289+
del_timer_sync(&pdsc->wdtimer);
290+
if (pdsc->wq)
291+
destroy_workqueue(pdsc->wq);
292+
262293
mutex_lock(&pdsc->config_lock);
263294
set_bit(PDSC_S_STOPPING_DRIVER, &pdsc->state);
264295

0 commit comments

Comments
 (0)