|
| 1 | +net: mana: Handle Reset Request from MANA NIC |
| 2 | + |
| 3 | +jira LE-4365 |
| 4 | +Rebuild_History Non-Buildable kernel-6.12.0-55.38.1.el10_0 |
| 5 | +commit-author Haiyang Zhang < [email protected]> |
| 6 | +commit fbe346ce9d626680a4dd0f079e17c7b5dd32ffad |
| 7 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 8 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 9 | +ciq/ciq_backports/kernel-6.12.0-55.38.1.el10_0/fbe346ce.failed |
| 10 | + |
| 11 | +Upon receiving the Reset Request, pause the connection and clean up |
| 12 | +queues, wait for the specified period, then resume the NIC. |
| 13 | +In the cleanup phase, the HWC is no longer responding, so set hwc_timeout |
| 14 | +to zero to skip waiting on the response. |
| 15 | + |
| 16 | + Signed-off-by: Haiyang Zhang < [email protected]> |
| 17 | +Link: https://patch.msgid.link/ [email protected] |
| 18 | + Signed-off-by: Jakub Kicinski < [email protected]> |
| 19 | +(cherry picked from commit fbe346ce9d626680a4dd0f079e17c7b5dd32ffad) |
| 20 | + Signed-off-by: Jonathan Maple < [email protected]> |
| 21 | + |
| 22 | +# Conflicts: |
| 23 | +# drivers/net/ethernet/microsoft/mana/mana_en.c |
| 24 | +# include/net/mana/gdma.h |
| 25 | +diff --cc drivers/net/ethernet/microsoft/mana/mana_en.c |
| 26 | +index b41b32b37dbc,a7973651ae51..000000000000 |
| 27 | +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c |
| 28 | ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c |
| 29 | +@@@ -777,7 -860,11 +786,15 @@@ static int mana_send_request(struct man |
| 30 | + err = mana_gd_send_request(gc, in_len, in_buf, out_len, |
| 31 | + out_buf); |
| 32 | + if (err || resp->status) { |
| 33 | +++<<<<<<< HEAD |
| 34 | + + if (req->req.msg_type != MANA_QUERY_PHY_STAT) |
| 35 | +++======= |
| 36 | ++ if (err == -EOPNOTSUPP) |
| 37 | ++ return err; |
| 38 | ++ |
| 39 | ++ if (req->req.msg_type != MANA_QUERY_PHY_STAT && |
| 40 | ++ mana_need_log(gc, err)) |
| 41 | +++>>>>>>> fbe346ce9d62 (net: mana: Handle Reset Request from MANA NIC) |
| 42 | + dev_err(dev, "Failed to send mana message: %d, 0x%x\n", |
| 43 | + err, resp->status); |
| 44 | + return err ? err : -EPROTO; |
| 45 | +diff --cc include/net/mana/gdma.h |
| 46 | +index a1661ec549f4,57df78cfbf82..000000000000 |
| 47 | +--- a/include/net/mana/gdma.h |
| 48 | ++++ b/include/net/mana/gdma.h |
| 49 | +@@@ -60,6 -61,8 +60,11 @@@ enum gdma_eqe_type |
| 50 | + GDMA_EQE_HWC_INIT_DONE = 131, |
| 51 | + GDMA_EQE_HWC_FPGA_RECONFIG = 132, |
| 52 | + GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133, |
| 53 | +++<<<<<<< HEAD |
| 54 | +++======= |
| 55 | ++ GDMA_EQE_HWC_SOC_SERVICE = 134, |
| 56 | ++ GDMA_EQE_HWC_RESET_REQUEST = 135, |
| 57 | +++>>>>>>> fbe346ce9d62 (net: mana: Handle Reset Request from MANA NIC) |
| 58 | + GDMA_EQE_RNIC_QP_FATAL = 176, |
| 59 | + }; |
| 60 | + |
| 61 | +@@@ -560,6 -582,12 +565,15 @@@ enum |
| 62 | + /* Driver can handle holes (zeros) in the device list */ |
| 63 | + #define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11) |
| 64 | + |
| 65 | +++<<<<<<< HEAD |
| 66 | +++======= |
| 67 | ++ /* Driver supports dynamic MSI-X vector allocation */ |
| 68 | ++ #define GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT BIT(13) |
| 69 | ++ |
| 70 | ++ /* Driver can self reset on EQE notification */ |
| 71 | ++ #define GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE BIT(14) |
| 72 | ++ |
| 73 | +++>>>>>>> fbe346ce9d62 (net: mana: Handle Reset Request from MANA NIC) |
| 74 | + /* Driver can self reset on FPGA Reconfig EQE notification */ |
| 75 | + #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17) |
| 76 | + |
| 77 | +@@@ -569,6 -597,8 +583,11 @@@ |
| 78 | + GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \ |
| 79 | + GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \ |
| 80 | + GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \ |
| 81 | +++<<<<<<< HEAD |
| 82 | +++======= |
| 83 | ++ GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \ |
| 84 | ++ GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \ |
| 85 | +++>>>>>>> fbe346ce9d62 (net: mana: Handle Reset Request from MANA NIC) |
| 86 | + GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE) |
| 87 | + |
| 88 | + #define GDMA_DRV_CAP_FLAGS2 0 |
| 89 | +@@@ -893,4 -924,11 +912,14 @@@ int mana_gd_destroy_dma_region(struct g |
| 90 | + void mana_register_debugfs(void); |
| 91 | + void mana_unregister_debugfs(void); |
| 92 | + |
| 93 | +++<<<<<<< HEAD |
| 94 | +++======= |
| 95 | ++ int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event); |
| 96 | ++ |
| 97 | ++ int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state); |
| 98 | ++ int mana_gd_resume(struct pci_dev *pdev); |
| 99 | ++ |
| 100 | ++ bool mana_need_log(struct gdma_context *gc, int err); |
| 101 | ++ |
| 102 | +++>>>>>>> fbe346ce9d62 (net: mana: Handle Reset Request from MANA NIC) |
| 103 | + #endif /* _GDMA_H */ |
| 104 | +diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c |
| 105 | +index 5f0228dfbf70..8cd814babcb2 100644 |
| 106 | +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c |
| 107 | ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c |
| 108 | +@@ -8,6 +8,7 @@ |
| 109 | + #include <linux/version.h> |
| 110 | + |
| 111 | + #include <net/mana/mana.h> |
| 112 | ++#include <net/mana/hw_channel.h> |
| 113 | + |
| 114 | + struct dentry *mana_debugfs_root; |
| 115 | + |
| 116 | +@@ -66,6 +67,24 @@ static void mana_gd_init_registers(struct pci_dev *pdev) |
| 117 | + mana_gd_init_vf_regs(pdev); |
| 118 | + } |
| 119 | + |
| 120 | ++/* Suppress logging when we set timeout to zero */ |
| 121 | ++bool mana_need_log(struct gdma_context *gc, int err) |
| 122 | ++{ |
| 123 | ++ struct hw_channel_context *hwc; |
| 124 | ++ |
| 125 | ++ if (err != -ETIMEDOUT) |
| 126 | ++ return true; |
| 127 | ++ |
| 128 | ++ if (!gc) |
| 129 | ++ return true; |
| 130 | ++ |
| 131 | ++ hwc = gc->hwc.driver_data; |
| 132 | ++ if (hwc && hwc->hwc_timeout == 0) |
| 133 | ++ return false; |
| 134 | ++ |
| 135 | ++ return true; |
| 136 | ++} |
| 137 | ++ |
| 138 | + static int mana_gd_query_max_resources(struct pci_dev *pdev) |
| 139 | + { |
| 140 | + struct gdma_context *gc = pci_get_drvdata(pdev); |
| 141 | +@@ -269,8 +288,9 @@ static int mana_gd_disable_queue(struct gdma_queue *queue) |
| 142 | + |
| 143 | + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); |
| 144 | + if (err || resp.hdr.status) { |
| 145 | +- dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err, |
| 146 | +- resp.hdr.status); |
| 147 | ++ if (mana_need_log(gc, err)) |
| 148 | ++ dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err, |
| 149 | ++ resp.hdr.status); |
| 150 | + return err ? err : -EPROTO; |
| 151 | + } |
| 152 | + |
| 153 | +@@ -355,25 +375,12 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit) |
| 154 | + |
| 155 | + #define MANA_SERVICE_PERIOD 10 |
| 156 | + |
| 157 | +-struct mana_serv_work { |
| 158 | +- struct work_struct serv_work; |
| 159 | +- struct pci_dev *pdev; |
| 160 | +-}; |
| 161 | +- |
| 162 | +-static void mana_serv_func(struct work_struct *w) |
| 163 | ++static void mana_serv_fpga(struct pci_dev *pdev) |
| 164 | + { |
| 165 | +- struct mana_serv_work *mns_wk; |
| 166 | + struct pci_bus *bus, *parent; |
| 167 | +- struct pci_dev *pdev; |
| 168 | +- |
| 169 | +- mns_wk = container_of(w, struct mana_serv_work, serv_work); |
| 170 | +- pdev = mns_wk->pdev; |
| 171 | + |
| 172 | + pci_lock_rescan_remove(); |
| 173 | + |
| 174 | +- if (!pdev) |
| 175 | +- goto out; |
| 176 | +- |
| 177 | + bus = pdev->bus; |
| 178 | + if (!bus) { |
| 179 | + dev_err(&pdev->dev, "MANA service: no bus\n"); |
| 180 | +@@ -394,7 +401,74 @@ static void mana_serv_func(struct work_struct *w) |
| 181 | + |
| 182 | + out: |
| 183 | + pci_unlock_rescan_remove(); |
| 184 | ++} |
| 185 | ++ |
| 186 | ++static void mana_serv_reset(struct pci_dev *pdev) |
| 187 | ++{ |
| 188 | ++ struct gdma_context *gc = pci_get_drvdata(pdev); |
| 189 | ++ struct hw_channel_context *hwc; |
| 190 | ++ |
| 191 | ++ if (!gc) { |
| 192 | ++ dev_err(&pdev->dev, "MANA service: no GC\n"); |
| 193 | ++ return; |
| 194 | ++ } |
| 195 | ++ |
| 196 | ++ hwc = gc->hwc.driver_data; |
| 197 | ++ if (!hwc) { |
| 198 | ++ dev_err(&pdev->dev, "MANA service: no HWC\n"); |
| 199 | ++ goto out; |
| 200 | ++ } |
| 201 | ++ |
| 202 | ++ /* HWC is not responding in this case, so don't wait */ |
| 203 | ++ hwc->hwc_timeout = 0; |
| 204 | ++ |
| 205 | ++ dev_info(&pdev->dev, "MANA reset cycle start\n"); |
| 206 | + |
| 207 | ++ mana_gd_suspend(pdev, PMSG_SUSPEND); |
| 208 | ++ |
| 209 | ++ msleep(MANA_SERVICE_PERIOD * 1000); |
| 210 | ++ |
| 211 | ++ mana_gd_resume(pdev); |
| 212 | ++ |
| 213 | ++ dev_info(&pdev->dev, "MANA reset cycle completed\n"); |
| 214 | ++ |
| 215 | ++out: |
| 216 | ++ gc->in_service = false; |
| 217 | ++} |
| 218 | ++ |
| 219 | ++struct mana_serv_work { |
| 220 | ++ struct work_struct serv_work; |
| 221 | ++ struct pci_dev *pdev; |
| 222 | ++ enum gdma_eqe_type type; |
| 223 | ++}; |
| 224 | ++ |
| 225 | ++static void mana_serv_func(struct work_struct *w) |
| 226 | ++{ |
| 227 | ++ struct mana_serv_work *mns_wk; |
| 228 | ++ struct pci_dev *pdev; |
| 229 | ++ |
| 230 | ++ mns_wk = container_of(w, struct mana_serv_work, serv_work); |
| 231 | ++ pdev = mns_wk->pdev; |
| 232 | ++ |
| 233 | ++ if (!pdev) |
| 234 | ++ goto out; |
| 235 | ++ |
| 236 | ++ switch (mns_wk->type) { |
| 237 | ++ case GDMA_EQE_HWC_FPGA_RECONFIG: |
| 238 | ++ mana_serv_fpga(pdev); |
| 239 | ++ break; |
| 240 | ++ |
| 241 | ++ case GDMA_EQE_HWC_RESET_REQUEST: |
| 242 | ++ mana_serv_reset(pdev); |
| 243 | ++ break; |
| 244 | ++ |
| 245 | ++ default: |
| 246 | ++ dev_err(&pdev->dev, "MANA service: unknown type %d\n", |
| 247 | ++ mns_wk->type); |
| 248 | ++ break; |
| 249 | ++ } |
| 250 | ++ |
| 251 | ++out: |
| 252 | + pci_dev_put(pdev); |
| 253 | + kfree(mns_wk); |
| 254 | + module_put(THIS_MODULE); |
| 255 | +@@ -450,6 +524,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq) |
| 256 | + break; |
| 257 | + |
| 258 | + case GDMA_EQE_HWC_FPGA_RECONFIG: |
| 259 | ++ case GDMA_EQE_HWC_RESET_REQUEST: |
| 260 | + dev_info(gc->dev, "Recv MANA service type:%d\n", type); |
| 261 | + |
| 262 | + if (gc->in_service) { |
| 263 | +@@ -471,6 +546,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq) |
| 264 | + dev_info(gc->dev, "Start MANA service type:%d\n", type); |
| 265 | + gc->in_service = true; |
| 266 | + mns_wk->pdev = to_pci_dev(gc->dev); |
| 267 | ++ mns_wk->type = type; |
| 268 | + pci_dev_get(mns_wk->pdev); |
| 269 | + INIT_WORK(&mns_wk->serv_work, mana_serv_func); |
| 270 | + schedule_work(&mns_wk->serv_work); |
| 271 | +@@ -617,7 +693,8 @@ int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq) |
| 272 | + |
| 273 | + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); |
| 274 | + if (err) { |
| 275 | +- dev_err(dev, "test_eq failed: %d\n", err); |
| 276 | ++ if (mana_need_log(gc, err)) |
| 277 | ++ dev_err(dev, "test_eq failed: %d\n", err); |
| 278 | + goto out; |
| 279 | + } |
| 280 | + |
| 281 | +@@ -652,7 +729,7 @@ static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets, |
| 282 | + |
| 283 | + if (flush_evenets) { |
| 284 | + err = mana_gd_test_eq(gc, queue); |
| 285 | +- if (err) |
| 286 | ++ if (err && mana_need_log(gc, err)) |
| 287 | + dev_warn(gc->dev, "Failed to flush EQ: %d\n", err); |
| 288 | + } |
| 289 | + |
| 290 | +@@ -798,8 +875,9 @@ int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle) |
| 291 | + |
| 292 | + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); |
| 293 | + if (err || resp.hdr.status) { |
| 294 | +- dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n", |
| 295 | +- err, resp.hdr.status); |
| 296 | ++ if (mana_need_log(gc, err)) |
| 297 | ++ dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n", |
| 298 | ++ err, resp.hdr.status); |
| 299 | + return -EPROTO; |
| 300 | + } |
| 301 | + |
| 302 | +@@ -1098,8 +1176,9 @@ int mana_gd_deregister_device(struct gdma_dev *gd) |
| 303 | + |
| 304 | + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); |
| 305 | + if (err || resp.hdr.status) { |
| 306 | +- dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n", |
| 307 | +- err, resp.hdr.status); |
| 308 | ++ if (mana_need_log(gc, err)) |
| 309 | ++ dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n", |
| 310 | ++ err, resp.hdr.status); |
| 311 | + if (!err) |
| 312 | + err = -EPROTO; |
| 313 | + } |
| 314 | +@@ -1699,7 +1778,7 @@ static void mana_gd_remove(struct pci_dev *pdev) |
| 315 | + } |
| 316 | + |
| 317 | + /* The 'state' parameter is not used. */ |
| 318 | +-static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state) |
| 319 | ++int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state) |
| 320 | + { |
| 321 | + struct gdma_context *gc = pci_get_drvdata(pdev); |
| 322 | + |
| 323 | +@@ -1714,7 +1793,7 @@ static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state) |
| 324 | + * fail -- if this happens, it's safer to just report an error than try to undo |
| 325 | + * what has been done. |
| 326 | + */ |
| 327 | +-static int mana_gd_resume(struct pci_dev *pdev) |
| 328 | ++int mana_gd_resume(struct pci_dev *pdev) |
| 329 | + { |
| 330 | + struct gdma_context *gc = pci_get_drvdata(pdev); |
| 331 | + int err; |
| 332 | +diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c |
| 333 | +index feb3b74700ed..58da1fb1baaf 100644 |
| 334 | +--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c |
| 335 | ++++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c |
| 336 | +@@ -861,7 +861,9 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, |
| 337 | + |
| 338 | + if (!wait_for_completion_timeout(&ctx->comp_event, |
| 339 | + (msecs_to_jiffies(hwc->hwc_timeout)))) { |
| 340 | +- dev_err(hwc->dev, "HWC: Request timed out!\n"); |
| 341 | ++ if (hwc->hwc_timeout != 0) |
| 342 | ++ dev_err(hwc->dev, "HWC: Request timed out!\n"); |
| 343 | ++ |
| 344 | + err = -ETIMEDOUT; |
| 345 | + goto out; |
| 346 | + } |
| 347 | +* Unmerged path drivers/net/ethernet/microsoft/mana/mana_en.c |
| 348 | +* Unmerged path include/net/mana/gdma.h |
0 commit comments