Skip to content

Commit 8285c64

Browse files
committed
net: mana: Handle Reset Request from MANA NIC
jira LE-4365 Rebuild_History Non-Buildable kernel-6.12.0-55.38.1.el10_0 commit-author Haiyang Zhang <[email protected]> commit fbe346c Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-6.12.0-55.38.1.el10_0/fbe346ce.failed Upon receiving the Reset Request, pause the connection and clean up queues, wait for the specified period, then resume the NIC. In the cleanup phase, the HWC is no longer responding, so set hwc_timeout to zero to skip waiting on the response. Signed-off-by: Haiyang Zhang <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]> (cherry picked from commit fbe346c) Signed-off-by: Jonathan Maple <[email protected]> # Conflicts: # drivers/net/ethernet/microsoft/mana/mana_en.c # include/net/mana/gdma.h
1 parent 10dec71 commit 8285c64

File tree

1 file changed

+348
-0
lines changed

1 file changed

+348
-0
lines changed
Lines changed: 348 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,348 @@
1+
net: mana: Handle Reset Request from MANA NIC
2+
3+
jira LE-4365
4+
Rebuild_History Non-Buildable kernel-6.12.0-55.38.1.el10_0
5+
commit-author Haiyang Zhang <[email protected]>
6+
commit fbe346ce9d626680a4dd0f079e17c7b5dd32ffad
7+
Empty-Commit: Cherry-Pick Conflicts during history rebuild.
8+
Will be included in final tarball splat. Ref for failed cherry-pick at:
9+
ciq/ciq_backports/kernel-6.12.0-55.38.1.el10_0/fbe346ce.failed
10+
11+
Upon receiving the Reset Request, pause the connection and clean up
12+
queues, wait for the specified period, then resume the NIC.
13+
In the cleanup phase, the HWC is no longer responding, so set hwc_timeout
14+
to zero to skip waiting on the response.
15+
16+
Signed-off-by: Haiyang Zhang <[email protected]>
17+
Link: https://patch.msgid.link/[email protected]
18+
Signed-off-by: Jakub Kicinski <[email protected]>
19+
(cherry picked from commit fbe346ce9d626680a4dd0f079e17c7b5dd32ffad)
20+
Signed-off-by: Jonathan Maple <[email protected]>
21+
22+
# Conflicts:
23+
# drivers/net/ethernet/microsoft/mana/mana_en.c
24+
# include/net/mana/gdma.h
25+
diff --cc drivers/net/ethernet/microsoft/mana/mana_en.c
26+
index b41b32b37dbc,a7973651ae51..000000000000
27+
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
28+
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
29+
@@@ -777,7 -860,11 +786,15 @@@ static int mana_send_request(struct man
30+
err = mana_gd_send_request(gc, in_len, in_buf, out_len,
31+
out_buf);
32+
if (err || resp->status) {
33+
++<<<<<<< HEAD
34+
+ if (req->req.msg_type != MANA_QUERY_PHY_STAT)
35+
++=======
36+
+ if (err == -EOPNOTSUPP)
37+
+ return err;
38+
+
39+
+ if (req->req.msg_type != MANA_QUERY_PHY_STAT &&
40+
+ mana_need_log(gc, err))
41+
++>>>>>>> fbe346ce9d62 (net: mana: Handle Reset Request from MANA NIC)
42+
dev_err(dev, "Failed to send mana message: %d, 0x%x\n",
43+
err, resp->status);
44+
return err ? err : -EPROTO;
45+
diff --cc include/net/mana/gdma.h
46+
index a1661ec549f4,57df78cfbf82..000000000000
47+
--- a/include/net/mana/gdma.h
48+
+++ b/include/net/mana/gdma.h
49+
@@@ -60,6 -61,8 +60,11 @@@ enum gdma_eqe_type
50+
GDMA_EQE_HWC_INIT_DONE = 131,
51+
GDMA_EQE_HWC_FPGA_RECONFIG = 132,
52+
GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
53+
++<<<<<<< HEAD
54+
++=======
55+
+ GDMA_EQE_HWC_SOC_SERVICE = 134,
56+
+ GDMA_EQE_HWC_RESET_REQUEST = 135,
57+
++>>>>>>> fbe346ce9d62 (net: mana: Handle Reset Request from MANA NIC)
58+
GDMA_EQE_RNIC_QP_FATAL = 176,
59+
};
60+
61+
@@@ -560,6 -582,12 +565,15 @@@ enum
62+
/* Driver can handle holes (zeros) in the device list */
63+
#define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11)
64+
65+
++<<<<<<< HEAD
66+
++=======
67+
+ /* Driver supports dynamic MSI-X vector allocation */
68+
+ #define GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT BIT(13)
69+
+
70+
+ /* Driver can self reset on EQE notification */
71+
+ #define GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE BIT(14)
72+
+
73+
++>>>>>>> fbe346ce9d62 (net: mana: Handle Reset Request from MANA NIC)
74+
/* Driver can self reset on FPGA Reconfig EQE notification */
75+
#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
76+
77+
@@@ -569,6 -597,8 +583,11 @@@
78+
GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
79+
GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \
80+
GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \
81+
++<<<<<<< HEAD
82+
++=======
83+
+ GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \
84+
+ GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \
85+
++>>>>>>> fbe346ce9d62 (net: mana: Handle Reset Request from MANA NIC)
86+
GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE)
87+
88+
#define GDMA_DRV_CAP_FLAGS2 0
89+
@@@ -893,4 -924,11 +912,14 @@@ int mana_gd_destroy_dma_region(struct g
90+
void mana_register_debugfs(void);
91+
void mana_unregister_debugfs(void);
92+
93+
++<<<<<<< HEAD
94+
++=======
95+
+ int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event);
96+
+
97+
+ int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state);
98+
+ int mana_gd_resume(struct pci_dev *pdev);
99+
+
100+
+ bool mana_need_log(struct gdma_context *gc, int err);
101+
+
102+
++>>>>>>> fbe346ce9d62 (net: mana: Handle Reset Request from MANA NIC)
103+
#endif /* _GDMA_H */
104+
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
105+
index 5f0228dfbf70..8cd814babcb2 100644
106+
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
107+
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
108+
@@ -8,6 +8,7 @@
109+
#include <linux/version.h>
110+
111+
#include <net/mana/mana.h>
112+
+#include <net/mana/hw_channel.h>
113+
114+
struct dentry *mana_debugfs_root;
115+
116+
@@ -66,6 +67,24 @@ static void mana_gd_init_registers(struct pci_dev *pdev)
117+
mana_gd_init_vf_regs(pdev);
118+
}
119+
120+
+/* Suppress logging when we set timeout to zero */
121+
+bool mana_need_log(struct gdma_context *gc, int err)
122+
+{
123+
+ struct hw_channel_context *hwc;
124+
+
125+
+ if (err != -ETIMEDOUT)
126+
+ return true;
127+
+
128+
+ if (!gc)
129+
+ return true;
130+
+
131+
+ hwc = gc->hwc.driver_data;
132+
+ if (hwc && hwc->hwc_timeout == 0)
133+
+ return false;
134+
+
135+
+ return true;
136+
+}
137+
+
138+
static int mana_gd_query_max_resources(struct pci_dev *pdev)
139+
{
140+
struct gdma_context *gc = pci_get_drvdata(pdev);
141+
@@ -269,8 +288,9 @@ static int mana_gd_disable_queue(struct gdma_queue *queue)
142+
143+
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
144+
if (err || resp.hdr.status) {
145+
- dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err,
146+
- resp.hdr.status);
147+
+ if (mana_need_log(gc, err))
148+
+ dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err,
149+
+ resp.hdr.status);
150+
return err ? err : -EPROTO;
151+
}
152+
153+
@@ -355,25 +375,12 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
154+
155+
#define MANA_SERVICE_PERIOD 10
156+
157+
-struct mana_serv_work {
158+
- struct work_struct serv_work;
159+
- struct pci_dev *pdev;
160+
-};
161+
-
162+
-static void mana_serv_func(struct work_struct *w)
163+
+static void mana_serv_fpga(struct pci_dev *pdev)
164+
{
165+
- struct mana_serv_work *mns_wk;
166+
struct pci_bus *bus, *parent;
167+
- struct pci_dev *pdev;
168+
-
169+
- mns_wk = container_of(w, struct mana_serv_work, serv_work);
170+
- pdev = mns_wk->pdev;
171+
172+
pci_lock_rescan_remove();
173+
174+
- if (!pdev)
175+
- goto out;
176+
-
177+
bus = pdev->bus;
178+
if (!bus) {
179+
dev_err(&pdev->dev, "MANA service: no bus\n");
180+
@@ -394,7 +401,74 @@ static void mana_serv_func(struct work_struct *w)
181+
182+
out:
183+
pci_unlock_rescan_remove();
184+
+}
185+
+
186+
+static void mana_serv_reset(struct pci_dev *pdev)
187+
+{
188+
+ struct gdma_context *gc = pci_get_drvdata(pdev);
189+
+ struct hw_channel_context *hwc;
190+
+
191+
+ if (!gc) {
192+
+ dev_err(&pdev->dev, "MANA service: no GC\n");
193+
+ return;
194+
+ }
195+
+
196+
+ hwc = gc->hwc.driver_data;
197+
+ if (!hwc) {
198+
+ dev_err(&pdev->dev, "MANA service: no HWC\n");
199+
+ goto out;
200+
+ }
201+
+
202+
+ /* HWC is not responding in this case, so don't wait */
203+
+ hwc->hwc_timeout = 0;
204+
+
205+
+ dev_info(&pdev->dev, "MANA reset cycle start\n");
206+
207+
+ mana_gd_suspend(pdev, PMSG_SUSPEND);
208+
+
209+
+ msleep(MANA_SERVICE_PERIOD * 1000);
210+
+
211+
+ mana_gd_resume(pdev);
212+
+
213+
+ dev_info(&pdev->dev, "MANA reset cycle completed\n");
214+
+
215+
+out:
216+
+ gc->in_service = false;
217+
+}
218+
+
219+
+struct mana_serv_work {
220+
+ struct work_struct serv_work;
221+
+ struct pci_dev *pdev;
222+
+ enum gdma_eqe_type type;
223+
+};
224+
+
225+
+static void mana_serv_func(struct work_struct *w)
226+
+{
227+
+ struct mana_serv_work *mns_wk;
228+
+ struct pci_dev *pdev;
229+
+
230+
+ mns_wk = container_of(w, struct mana_serv_work, serv_work);
231+
+ pdev = mns_wk->pdev;
232+
+
233+
+ if (!pdev)
234+
+ goto out;
235+
+
236+
+ switch (mns_wk->type) {
237+
+ case GDMA_EQE_HWC_FPGA_RECONFIG:
238+
+ mana_serv_fpga(pdev);
239+
+ break;
240+
+
241+
+ case GDMA_EQE_HWC_RESET_REQUEST:
242+
+ mana_serv_reset(pdev);
243+
+ break;
244+
+
245+
+ default:
246+
+ dev_err(&pdev->dev, "MANA service: unknown type %d\n",
247+
+ mns_wk->type);
248+
+ break;
249+
+ }
250+
+
251+
+out:
252+
pci_dev_put(pdev);
253+
kfree(mns_wk);
254+
module_put(THIS_MODULE);
255+
@@ -450,6 +524,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
256+
break;
257+
258+
case GDMA_EQE_HWC_FPGA_RECONFIG:
259+
+ case GDMA_EQE_HWC_RESET_REQUEST:
260+
dev_info(gc->dev, "Recv MANA service type:%d\n", type);
261+
262+
if (gc->in_service) {
263+
@@ -471,6 +546,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
264+
dev_info(gc->dev, "Start MANA service type:%d\n", type);
265+
gc->in_service = true;
266+
mns_wk->pdev = to_pci_dev(gc->dev);
267+
+ mns_wk->type = type;
268+
pci_dev_get(mns_wk->pdev);
269+
INIT_WORK(&mns_wk->serv_work, mana_serv_func);
270+
schedule_work(&mns_wk->serv_work);
271+
@@ -617,7 +693,8 @@ int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq)
272+
273+
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
274+
if (err) {
275+
- dev_err(dev, "test_eq failed: %d\n", err);
276+
+ if (mana_need_log(gc, err))
277+
+ dev_err(dev, "test_eq failed: %d\n", err);
278+
goto out;
279+
}
280+
281+
@@ -652,7 +729,7 @@ static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets,
282+
283+
if (flush_evenets) {
284+
err = mana_gd_test_eq(gc, queue);
285+
- if (err)
286+
+ if (err && mana_need_log(gc, err))
287+
dev_warn(gc->dev, "Failed to flush EQ: %d\n", err);
288+
}
289+
290+
@@ -798,8 +875,9 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle)
291+
292+
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
293+
if (err || resp.hdr.status) {
294+
- dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n",
295+
- err, resp.hdr.status);
296+
+ if (mana_need_log(gc, err))
297+
+ dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n",
298+
+ err, resp.hdr.status);
299+
return -EPROTO;
300+
}
301+
302+
@@ -1098,8 +1176,9 @@ int mana_gd_deregister_device(struct gdma_dev *gd)
303+
304+
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
305+
if (err || resp.hdr.status) {
306+
- dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n",
307+
- err, resp.hdr.status);
308+
+ if (mana_need_log(gc, err))
309+
+ dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n",
310+
+ err, resp.hdr.status);
311+
if (!err)
312+
err = -EPROTO;
313+
}
314+
@@ -1699,7 +1778,7 @@ static void mana_gd_remove(struct pci_dev *pdev)
315+
}
316+
317+
/* The 'state' parameter is not used. */
318+
-static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
319+
+int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
320+
{
321+
struct gdma_context *gc = pci_get_drvdata(pdev);
322+
323+
@@ -1714,7 +1793,7 @@ static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
324+
* fail -- if this happens, it's safer to just report an error than try to undo
325+
* what has been done.
326+
*/
327+
-static int mana_gd_resume(struct pci_dev *pdev)
328+
+int mana_gd_resume(struct pci_dev *pdev)
329+
{
330+
struct gdma_context *gc = pci_get_drvdata(pdev);
331+
int err;
332+
diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
333+
index feb3b74700ed..58da1fb1baaf 100644
334+
--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
335+
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
336+
@@ -861,7 +861,9 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
337+
338+
if (!wait_for_completion_timeout(&ctx->comp_event,
339+
(msecs_to_jiffies(hwc->hwc_timeout)))) {
340+
- dev_err(hwc->dev, "HWC: Request timed out!\n");
341+
+ if (hwc->hwc_timeout != 0)
342+
+ dev_err(hwc->dev, "HWC: Request timed out!\n");
343+
+
344+
err = -ETIMEDOUT;
345+
goto out;
346+
}
347+
* Unmerged path drivers/net/ethernet/microsoft/mana/mana_en.c
348+
* Unmerged path include/net/mana/gdma.h

0 commit comments

Comments
 (0)