Skip to content

Commit 5e162f8

Browse files
trusinowiczjlawryno
authored andcommitted
accel/ivpu: Add FW state dump on TDR
Send JSM state dump message at the beginning of TDR handler. This allows FW to collect debug info in the FW log before the state of the NPU is lost allowing to analyze the cause of a TDR. Wait a predefined timeout (10 ms) so the FW has a chance to write debug logs. We cannot wait for JSM response at this point because IRQs are already disabled before TDR handler is invoked. Signed-off-by: Tomasz Rusinowicz <[email protected]> Reviewed-by: Jacek Lawrynowicz <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] Signed-off-by: Jacek Lawrynowicz <[email protected]>
1 parent bade034 commit 5e162f8

File tree

7 files changed

+43
-0
lines changed

7 files changed

+43
-0
lines changed

drivers/accel/ivpu/ivpu_drv.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ struct ivpu_device {
152152
int tdr;
153153
int autosuspend;
154154
int d0i3_entry_msg;
155+
int state_dump_msg;
155156
} timeout;
156157
};
157158

drivers/accel/ivpu/ivpu_hw.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,14 @@ static void timeouts_init(struct ivpu_device *vdev)
8989
vdev->timeout.tdr = 2000000;
9090
vdev->timeout.autosuspend = -1;
9191
vdev->timeout.d0i3_entry_msg = 500;
92+
vdev->timeout.state_dump_msg = 10;
9293
} else if (ivpu_is_simics(vdev)) {
9394
vdev->timeout.boot = 50;
9495
vdev->timeout.jsm = 500;
9596
vdev->timeout.tdr = 10000;
9697
vdev->timeout.autosuspend = -1;
9798
vdev->timeout.d0i3_entry_msg = 100;
99+
vdev->timeout.state_dump_msg = 10;
98100
} else {
99101
vdev->timeout.boot = 1000;
100102
vdev->timeout.jsm = 500;
@@ -104,6 +106,7 @@ static void timeouts_init(struct ivpu_device *vdev)
104106
else
105107
vdev->timeout.autosuspend = 100;
106108
vdev->timeout.d0i3_entry_msg = 5;
109+
vdev->timeout.state_dump_msg = 10;
107110
}
108111
}
109112

drivers/accel/ivpu/ivpu_ipc.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,32 @@ int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
364364
return ret;
365365
}
366366

367+
int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
368+
u32 channel, unsigned long timeout_ms)
369+
{
370+
struct ivpu_ipc_consumer cons;
371+
int ret;
372+
373+
ret = ivpu_rpm_get(vdev);
374+
if (ret < 0)
375+
return ret;
376+
377+
ivpu_ipc_consumer_add(vdev, &cons, channel, NULL);
378+
379+
ret = ivpu_ipc_send(vdev, &cons, req);
380+
if (ret) {
381+
ivpu_warn_ratelimited(vdev, "IPC send failed: %d\n", ret);
382+
goto consumer_del;
383+
}
384+
385+
msleep(timeout_ms);
386+
387+
consumer_del:
388+
ivpu_ipc_consumer_del(vdev, &cons);
389+
ivpu_rpm_put(vdev);
390+
return ret;
391+
}
392+
367393
static bool
368394
ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
369395
struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)

drivers/accel/ivpu/ivpu_ipc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,5 +108,7 @@ int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *r
108108
int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
109109
enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
110110
u32 channel, unsigned long timeout_ms);
111+
int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
112+
u32 channel, unsigned long timeout_ms);
111113

112114
#endif /* __IVPU_IPC_H__ */

drivers/accel/ivpu/ivpu_jsm_msg.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,3 +559,11 @@ int ivpu_jsm_dct_disable(struct ivpu_device *vdev)
559559
&resp, VPU_IPC_CHAN_ASYNC_CMD,
560560
vdev->timeout.jsm);
561561
}
562+
563+
int ivpu_jsm_state_dump(struct ivpu_device *vdev)
564+
{
565+
struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_STATE_DUMP };
566+
567+
return ivpu_ipc_send_and_wait(vdev, &req, VPU_IPC_CHAN_ASYNC_CMD,
568+
vdev->timeout.state_dump_msg);
569+
}

drivers/accel/ivpu/ivpu_jsm_msg.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,6 @@ int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mas
4343
u64 buffer_size, u32 *sample_size, u64 *info_size);
4444
int ivpu_jsm_dct_enable(struct ivpu_device *vdev, u32 active_us, u32 inactive_us);
4545
int ivpu_jsm_dct_disable(struct ivpu_device *vdev);
46+
int ivpu_jsm_state_dump(struct ivpu_device *vdev);
47+
4648
#endif

drivers/accel/ivpu/ivpu_pm.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ static void ivpu_pm_recovery_work(struct work_struct *work)
125125
if (ret)
126126
ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
127127

128+
ivpu_jsm_state_dump(vdev);
128129
ivpu_dev_coredump(vdev);
129130

130131
atomic_inc(&vdev->pm->reset_counter);

0 commit comments

Comments
 (0)