Skip to content

Commit bf23ffc

Browse files
ThinhTrTrankuba-moo
authored andcommitted
bnx2x: new flag for track HW resource allocation
While injecting PCIe errors to the upstream PCIe switch of a BCM57810 NIC, system hangs/crashes were observed. After several calls to bnx2x_tx_timout() complete, bnx2x_nic_unload() is called to free up HW resources and bnx2x_napi_disable() is called to release NAPI objects. Later, when the EEH driver calls bnx2x_io_slot_reset() to complete the recovery process, bnx2x attempts to disable NAPI again by calling bnx2x_napi_disable() and freeing resources which have already been freed, resulting in a hang or crash. Introduce a new flag to track the HW resource and NAPI allocation state, refactor duplicated code into a single function, check page pool allocation status before freeing, and reduces debug output when a TX timeout event occurs. Reviewed-by: Manish Chopra <[email protected]> Tested-by: Abdul Haleem <[email protected]> Tested-by: David Christensen <[email protected]> Reviewed-by: Simon Horman <[email protected]> Tested-by: Venkata Sai Duggi <[email protected]> Signed-off-by: Thinh Tran <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 6dc5774 commit bf23ffc

File tree

4 files changed

+44
-28
lines changed

4 files changed

+44
-28
lines changed

drivers/net/ethernet/broadcom/bnx2x/bnx2x.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1508,6 +1508,8 @@ struct bnx2x {
15081508
bool cnic_loaded;
15091509
struct cnic_eth_dev *(*cnic_probe)(struct net_device *);
15101510

1511+
bool nic_stopped;
1512+
15111513
/* Flag that indicates that we can start looking for FCoE L2 queue
15121514
* completions in the default status block.
15131515
*/

drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2715,6 +2715,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
27152715
bnx2x_add_all_napi(bp);
27162716
DP(NETIF_MSG_IFUP, "napi added\n");
27172717
bnx2x_napi_enable(bp);
2718+
bp->nic_stopped = false;
27182719

27192720
if (IS_PF(bp)) {
27202721
/* set pf load just before approaching the MCP */
@@ -2960,6 +2961,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
29602961
load_error1:
29612962
bnx2x_napi_disable(bp);
29622963
bnx2x_del_all_napi(bp);
2964+
bp->nic_stopped = true;
29632965

29642966
/* clear pf_load status, as it was already set */
29652967
if (IS_PF(bp))
@@ -3095,14 +3097,17 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
30953097
if (!CHIP_IS_E1x(bp))
30963098
bnx2x_pf_disable(bp);
30973099

3098-
/* Disable HW interrupts, NAPI */
3099-
bnx2x_netif_stop(bp, 1);
3100-
/* Delete all NAPI objects */
3101-
bnx2x_del_all_napi(bp);
3102-
if (CNIC_LOADED(bp))
3103-
bnx2x_del_all_napi_cnic(bp);
3104-
/* Release IRQs */
3105-
bnx2x_free_irq(bp);
3100+
if (!bp->nic_stopped) {
3101+
/* Disable HW interrupts, NAPI */
3102+
bnx2x_netif_stop(bp, 1);
3103+
/* Delete all NAPI objects */
3104+
bnx2x_del_all_napi(bp);
3105+
if (CNIC_LOADED(bp))
3106+
bnx2x_del_all_napi_cnic(bp);
3107+
/* Release IRQs */
3108+
bnx2x_free_irq(bp);
3109+
bp->nic_stopped = true;
3110+
}
31063111

31073112
/* Report UNLOAD_DONE to MCP */
31083113
bnx2x_send_unload_done(bp, false);

drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9474,15 +9474,18 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link)
94749474
}
94759475
}
94769476

9477-
/* Disable HW interrupts, NAPI */
9478-
bnx2x_netif_stop(bp, 1);
9479-
/* Delete all NAPI objects */
9480-
bnx2x_del_all_napi(bp);
9481-
if (CNIC_LOADED(bp))
9482-
bnx2x_del_all_napi_cnic(bp);
9477+
if (!bp->nic_stopped) {
9478+
/* Disable HW interrupts, NAPI */
9479+
bnx2x_netif_stop(bp, 1);
9480+
/* Delete all NAPI objects */
9481+
bnx2x_del_all_napi(bp);
9482+
if (CNIC_LOADED(bp))
9483+
bnx2x_del_all_napi_cnic(bp);
94839484

9484-
/* Release IRQs */
9485-
bnx2x_free_irq(bp);
9485+
/* Release IRQs */
9486+
bnx2x_free_irq(bp);
9487+
bp->nic_stopped = true;
9488+
}
94869489

94879490
/* Reset the chip, unless PCI function is offline. If we reach this
94889491
* point following a PCI error handling, it means device is really
@@ -14238,13 +14241,16 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
1423814241
}
1423914242
bnx2x_drain_tx_queues(bp);
1424014243
bnx2x_send_unload_req(bp, UNLOAD_RECOVERY);
14241-
bnx2x_netif_stop(bp, 1);
14242-
bnx2x_del_all_napi(bp);
14244+
if (!bp->nic_stopped) {
14245+
bnx2x_netif_stop(bp, 1);
14246+
bnx2x_del_all_napi(bp);
1424314247

14244-
if (CNIC_LOADED(bp))
14245-
bnx2x_del_all_napi_cnic(bp);
14248+
if (CNIC_LOADED(bp))
14249+
bnx2x_del_all_napi_cnic(bp);
1424614250

14247-
bnx2x_free_irq(bp);
14251+
bnx2x_free_irq(bp);
14252+
bp->nic_stopped = true;
14253+
}
1424814254

1424914255
/* Report UNLOAD_DONE to MCP */
1425014256
bnx2x_send_unload_done(bp, true);

drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -529,13 +529,16 @@ void bnx2x_vfpf_close_vf(struct bnx2x *bp)
529529
bnx2x_vfpf_finalize(bp, &req->first_tlv);
530530

531531
free_irq:
532-
/* Disable HW interrupts, NAPI */
533-
bnx2x_netif_stop(bp, 0);
534-
/* Delete all NAPI objects */
535-
bnx2x_del_all_napi(bp);
536-
537-
/* Release IRQs */
538-
bnx2x_free_irq(bp);
532+
if (!bp->nic_stopped) {
533+
/* Disable HW interrupts, NAPI */
534+
bnx2x_netif_stop(bp, 0);
535+
/* Delete all NAPI objects */
536+
bnx2x_del_all_napi(bp);
537+
538+
/* Release IRQs */
539+
bnx2x_free_irq(bp);
540+
bp->nic_stopped = true;
541+
}
539542
}
540543

541544
static void bnx2x_leading_vfq_init(struct bnx2x *bp, struct bnx2x_virtf *vf,

0 commit comments

Comments
 (0)