Skip to content

Commit fbe346c

Browse files
haiyangzkuba-moo
authored andcommitted
net: mana: Handle Reset Request from MANA NIC
Upon receiving the Reset Request, pause the connection and clean up queues, wait for the specified period, then resume the NIC. In the cleanup phase, the HWC is no longer responding, so set hwc_timeout to zero to skip waiting on the response. Signed-off-by: Haiyang Zhang <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent f461c7a commit fbe346c

File tree

4 files changed

+143
-35
lines changed

4 files changed

+143
-35
lines changed

drivers/net/ethernet/microsoft/mana/gdma_main.c

Lines changed: 103 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <linux/irqdomain.h>
1111

1212
#include <net/mana/mana.h>
13+
#include <net/mana/hw_channel.h>
1314

1415
struct dentry *mana_debugfs_root;
1516

@@ -68,6 +69,24 @@ static void mana_gd_init_registers(struct pci_dev *pdev)
6869
mana_gd_init_vf_regs(pdev);
6970
}
7071

72+
/* Suppress logging when we set timeout to zero */
73+
bool mana_need_log(struct gdma_context *gc, int err)
74+
{
75+
struct hw_channel_context *hwc;
76+
77+
if (err != -ETIMEDOUT)
78+
return true;
79+
80+
if (!gc)
81+
return true;
82+
83+
hwc = gc->hwc.driver_data;
84+
if (hwc && hwc->hwc_timeout == 0)
85+
return false;
86+
87+
return true;
88+
}
89+
7190
static int mana_gd_query_max_resources(struct pci_dev *pdev)
7291
{
7392
struct gdma_context *gc = pci_get_drvdata(pdev);
@@ -278,8 +297,9 @@ static int mana_gd_disable_queue(struct gdma_queue *queue)
278297

279298
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
280299
if (err || resp.hdr.status) {
281-
dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err,
282-
resp.hdr.status);
300+
if (mana_need_log(gc, err))
301+
dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err,
302+
resp.hdr.status);
283303
return err ? err : -EPROTO;
284304
}
285305

@@ -366,25 +386,12 @@ EXPORT_SYMBOL_NS(mana_gd_ring_cq, "NET_MANA");
366386

367387
#define MANA_SERVICE_PERIOD 10
368388

369-
struct mana_serv_work {
370-
struct work_struct serv_work;
371-
struct pci_dev *pdev;
372-
};
373-
374-
static void mana_serv_func(struct work_struct *w)
389+
static void mana_serv_fpga(struct pci_dev *pdev)
375390
{
376-
struct mana_serv_work *mns_wk;
377391
struct pci_bus *bus, *parent;
378-
struct pci_dev *pdev;
379-
380-
mns_wk = container_of(w, struct mana_serv_work, serv_work);
381-
pdev = mns_wk->pdev;
382392

383393
pci_lock_rescan_remove();
384394

385-
if (!pdev)
386-
goto out;
387-
388395
bus = pdev->bus;
389396
if (!bus) {
390397
dev_err(&pdev->dev, "MANA service: no bus\n");
@@ -405,7 +412,74 @@ static void mana_serv_func(struct work_struct *w)
405412

406413
out:
407414
pci_unlock_rescan_remove();
415+
}
416+
417+
static void mana_serv_reset(struct pci_dev *pdev)
418+
{
419+
struct gdma_context *gc = pci_get_drvdata(pdev);
420+
struct hw_channel_context *hwc;
421+
422+
if (!gc) {
423+
dev_err(&pdev->dev, "MANA service: no GC\n");
424+
return;
425+
}
426+
427+
hwc = gc->hwc.driver_data;
428+
if (!hwc) {
429+
dev_err(&pdev->dev, "MANA service: no HWC\n");
430+
goto out;
431+
}
432+
433+
/* HWC is not responding in this case, so don't wait */
434+
hwc->hwc_timeout = 0;
435+
436+
dev_info(&pdev->dev, "MANA reset cycle start\n");
408437

438+
mana_gd_suspend(pdev, PMSG_SUSPEND);
439+
440+
msleep(MANA_SERVICE_PERIOD * 1000);
441+
442+
mana_gd_resume(pdev);
443+
444+
dev_info(&pdev->dev, "MANA reset cycle completed\n");
445+
446+
out:
447+
gc->in_service = false;
448+
}
449+
450+
struct mana_serv_work {
451+
struct work_struct serv_work;
452+
struct pci_dev *pdev;
453+
enum gdma_eqe_type type;
454+
};
455+
456+
static void mana_serv_func(struct work_struct *w)
457+
{
458+
struct mana_serv_work *mns_wk;
459+
struct pci_dev *pdev;
460+
461+
mns_wk = container_of(w, struct mana_serv_work, serv_work);
462+
pdev = mns_wk->pdev;
463+
464+
if (!pdev)
465+
goto out;
466+
467+
switch (mns_wk->type) {
468+
case GDMA_EQE_HWC_FPGA_RECONFIG:
469+
mana_serv_fpga(pdev);
470+
break;
471+
472+
case GDMA_EQE_HWC_RESET_REQUEST:
473+
mana_serv_reset(pdev);
474+
break;
475+
476+
default:
477+
dev_err(&pdev->dev, "MANA service: unknown type %d\n",
478+
mns_wk->type);
479+
break;
480+
}
481+
482+
out:
409483
pci_dev_put(pdev);
410484
kfree(mns_wk);
411485
module_put(THIS_MODULE);
@@ -462,6 +536,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
462536
break;
463537

464538
case GDMA_EQE_HWC_FPGA_RECONFIG:
539+
case GDMA_EQE_HWC_RESET_REQUEST:
465540
dev_info(gc->dev, "Recv MANA service type:%d\n", type);
466541

467542
if (gc->in_service) {
@@ -483,6 +558,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
483558
dev_info(gc->dev, "Start MANA service type:%d\n", type);
484559
gc->in_service = true;
485560
mns_wk->pdev = to_pci_dev(gc->dev);
561+
mns_wk->type = type;
486562
pci_dev_get(mns_wk->pdev);
487563
INIT_WORK(&mns_wk->serv_work, mana_serv_func);
488564
schedule_work(&mns_wk->serv_work);
@@ -634,7 +710,8 @@ int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq)
634710

635711
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
636712
if (err) {
637-
dev_err(dev, "test_eq failed: %d\n", err);
713+
if (mana_need_log(gc, err))
714+
dev_err(dev, "test_eq failed: %d\n", err);
638715
goto out;
639716
}
640717

@@ -669,7 +746,7 @@ static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets,
669746

670747
if (flush_evenets) {
671748
err = mana_gd_test_eq(gc, queue);
672-
if (err)
749+
if (err && mana_need_log(gc, err))
673750
dev_warn(gc->dev, "Failed to flush EQ: %d\n", err);
674751
}
675752

@@ -815,8 +892,9 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle)
815892

816893
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
817894
if (err || resp.hdr.status) {
818-
dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n",
819-
err, resp.hdr.status);
895+
if (mana_need_log(gc, err))
896+
dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n",
897+
err, resp.hdr.status);
820898
return -EPROTO;
821899
}
822900

@@ -1116,8 +1194,9 @@ int mana_gd_deregister_device(struct gdma_dev *gd)
11161194

11171195
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
11181196
if (err || resp.hdr.status) {
1119-
dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n",
1120-
err, resp.hdr.status);
1197+
if (mana_need_log(gc, err))
1198+
dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n",
1199+
err, resp.hdr.status);
11211200
if (!err)
11221201
err = -EPROTO;
11231202
}
@@ -1915,7 +1994,7 @@ static void mana_gd_remove(struct pci_dev *pdev)
19151994
}
19161995

19171996
/* The 'state' parameter is not used. */
1918-
static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
1997+
int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
19191998
{
19201999
struct gdma_context *gc = pci_get_drvdata(pdev);
19212000

@@ -1931,7 +2010,7 @@ static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
19312010
* fail -- if this happens, it's safer to just report an error than try to undo
19322011
* what has been done.
19332012
*/
1934-
static int mana_gd_resume(struct pci_dev *pdev)
2013+
int mana_gd_resume(struct pci_dev *pdev)
19352014
{
19362015
struct gdma_context *gc = pci_get_drvdata(pdev);
19372016
int err;

drivers/net/ethernet/microsoft/mana/hw_channel.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -880,7 +880,9 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
880880

881881
if (!wait_for_completion_timeout(&ctx->comp_event,
882882
(msecs_to_jiffies(hwc->hwc_timeout)))) {
883-
dev_err(hwc->dev, "HWC: Request timed out!\n");
883+
if (hwc->hwc_timeout != 0)
884+
dev_err(hwc->dev, "HWC: Request timed out!\n");
885+
884886
err = -ETIMEDOUT;
885887
goto out;
886888
}

drivers/net/ethernet/microsoft/mana/mana_en.c

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,15 @@ static const struct file_operations mana_dbg_q_fops = {
4747
.read = mana_dbg_q_read,
4848
};
4949

50+
static bool mana_en_need_log(struct mana_port_context *apc, int err)
51+
{
52+
if (apc && apc->ac && apc->ac->gdma_dev &&
53+
apc->ac->gdma_dev->gdma_context)
54+
return mana_need_log(apc->ac->gdma_dev->gdma_context, err);
55+
else
56+
return true;
57+
}
58+
5059
/* Microsoft Azure Network Adapter (MANA) functions */
5160

5261
static int mana_open(struct net_device *ndev)
@@ -854,7 +863,8 @@ static int mana_send_request(struct mana_context *ac, void *in_buf,
854863
if (err == -EOPNOTSUPP)
855864
return err;
856865

857-
if (req->req.msg_type != MANA_QUERY_PHY_STAT)
866+
if (req->req.msg_type != MANA_QUERY_PHY_STAT &&
867+
mana_need_log(gc, err))
858868
dev_err(dev, "Failed to send mana message: %d, 0x%x\n",
859869
err, resp->status);
860870
return err ? err : -EPROTO;
@@ -931,8 +941,10 @@ static void mana_pf_deregister_hw_vport(struct mana_port_context *apc)
931941
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
932942
sizeof(resp));
933943
if (err) {
934-
netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n",
935-
err);
944+
if (mana_en_need_log(apc, err))
945+
netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n",
946+
err);
947+
936948
return;
937949
}
938950

@@ -987,8 +999,10 @@ static void mana_pf_deregister_filter(struct mana_port_context *apc)
987999
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
9881000
sizeof(resp));
9891001
if (err) {
990-
netdev_err(apc->ndev, "Failed to unregister filter: %d\n",
991-
err);
1002+
if (mana_en_need_log(apc, err))
1003+
netdev_err(apc->ndev, "Failed to unregister filter: %d\n",
1004+
err);
1005+
9921006
return;
9931007
}
9941008

@@ -1218,7 +1232,9 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc,
12181232
err = mana_send_request(apc->ac, req, req_buf_size, &resp,
12191233
sizeof(resp));
12201234
if (err) {
1221-
netdev_err(ndev, "Failed to configure vPort RX: %d\n", err);
1235+
if (mana_en_need_log(apc, err))
1236+
netdev_err(ndev, "Failed to configure vPort RX: %d\n", err);
1237+
12221238
goto out;
12231239
}
12241240

@@ -1402,7 +1418,9 @@ void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type,
14021418
err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
14031419
sizeof(resp));
14041420
if (err) {
1405-
netdev_err(ndev, "Failed to destroy WQ object: %d\n", err);
1421+
if (mana_en_need_log(apc, err))
1422+
netdev_err(ndev, "Failed to destroy WQ object: %d\n", err);
1423+
14061424
return;
14071425
}
14081426

@@ -3067,11 +3085,10 @@ static int mana_dealloc_queues(struct net_device *ndev)
30673085

30683086
apc->rss_state = TRI_STATE_FALSE;
30693087
err = mana_config_rss(apc, TRI_STATE_FALSE, false, false);
3070-
if (err) {
3088+
if (err && mana_en_need_log(apc, err))
30713089
netdev_err(ndev, "Failed to disable vPort: %d\n", err);
3072-
return err;
3073-
}
30743090

3091+
/* Even in err case, still need to cleanup the vPort */
30753092
mana_destroy_vport(apc);
30763093

30773094
return 0;

include/net/mana/gdma.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ enum gdma_eqe_type {
6262
GDMA_EQE_HWC_FPGA_RECONFIG = 132,
6363
GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
6464
GDMA_EQE_HWC_SOC_SERVICE = 134,
65+
GDMA_EQE_HWC_RESET_REQUEST = 135,
6566
GDMA_EQE_RNIC_QP_FATAL = 176,
6667
};
6768

@@ -584,6 +585,9 @@ enum {
584585
/* Driver supports dynamic MSI-X vector allocation */
585586
#define GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT BIT(13)
586587

588+
/* Driver can self reset on EQE notification */
589+
#define GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE BIT(14)
590+
587591
/* Driver can self reset on FPGA Reconfig EQE notification */
588592
#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
589593

@@ -594,6 +598,7 @@ enum {
594598
GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \
595599
GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \
596600
GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
601+
GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
597602
GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE)
598603

599604
#define GDMA_DRV_CAP_FLAGS2 0
@@ -921,4 +926,9 @@ void mana_unregister_debugfs(void);
921926

922927
int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event);
923928

929+
int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state);
930+
int mana_gd_resume(struct pci_dev *pdev);
931+
932+
bool mana_need_log(struct gdma_context *gc, int err);
933+
924934
#endif /* _GDMA_H */

0 commit comments

Comments
 (0)