Skip to content

Commit b43b995

Browse files
mbrost05johnharr-intel
authored andcommitted
drm/i915/guc: Add stall timer to non blocking CTB send function
Implement a stall timer which fails H2G CTBs once a period of time with no forward progress is reached to prevent deadlock. v2: (Michal) - Improve error message in ct_deadlock() - Set broken when ct_deadlock() returns true - Return -EPIPE on ct_deadlock() v3: (Michal) - Add ms to stall timer comment (Matthew) - Move broken check to intel_guc_ct_send() Signed-off-by: John Harrison <[email protected]> Signed-off-by: Daniele Ceraolo Spurio <[email protected]> Signed-off-by: Matthew Brost <[email protected]> Reviewed-by: John Harrison <[email protected]> Signed-off-by: John Harrison <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 1681924 commit b43b995

File tree

2 files changed

+59
-7
lines changed

2 files changed

+59
-7
lines changed

drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
*/
55

66
#include <linux/circ_buf.h>
7+
#include <linux/ktime.h>
8+
#include <linux/time64.h>
9+
#include <linux/timekeeping.h>
710

811
#include "i915_drv.h"
912
#include "intel_guc_ct.h"
@@ -316,6 +319,7 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
316319
goto err_deregister;
317320

318321
ct->enabled = true;
322+
ct->stall_time = KTIME_MAX;
319323

320324
return 0;
321325

@@ -389,9 +393,6 @@ static int ct_write(struct intel_guc_ct *ct,
389393
u32 *cmds = ctb->cmds;
390394
unsigned int i;
391395

392-
if (unlikely(ctb->broken))
393-
return -EPIPE;
394-
395396
if (unlikely(desc->status))
396397
goto corrupted;
397398

@@ -505,6 +506,25 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
505506
return err;
506507
}
507508

509+
#define GUC_CTB_TIMEOUT_MS 1500
510+
static inline bool ct_deadlocked(struct intel_guc_ct *ct)
511+
{
512+
long timeout = GUC_CTB_TIMEOUT_MS;
513+
bool ret = ktime_ms_delta(ktime_get(), ct->stall_time) > timeout;
514+
515+
if (unlikely(ret)) {
516+
struct guc_ct_buffer_desc *send = ct->ctbs.send.desc;
517+
struct guc_ct_buffer_desc *recv = ct->ctbs.send.desc;
518+
519+
CT_ERROR(ct, "Communication stalled for %lld ms, desc status=%#x,%#x\n",
520+
ktime_ms_delta(ktime_get(), ct->stall_time),
521+
send->status, recv->status);
522+
ct->ctbs.send.broken = true;
523+
}
524+
525+
return ret;
526+
}
527+
508528
static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb, u32 len_dw)
509529
{
510530
struct guc_ct_buffer_desc *desc = ctb->desc;
@@ -516,6 +536,26 @@ static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb, u32 len_dw)
516536
return space >= len_dw;
517537
}
518538

539+
static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw)
540+
{
541+
struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
542+
543+
lockdep_assert_held(&ct->ctbs.send.lock);
544+
545+
if (unlikely(!h2g_has_room(ctb, len_dw))) {
546+
if (ct->stall_time == KTIME_MAX)
547+
ct->stall_time = ktime_get();
548+
549+
if (unlikely(ct_deadlocked(ct)))
550+
return -EPIPE;
551+
else
552+
return -EBUSY;
553+
}
554+
555+
ct->stall_time = KTIME_MAX;
556+
return 0;
557+
}
558+
519559
static int ct_send_nb(struct intel_guc_ct *ct,
520560
const u32 *action,
521561
u32 len,
@@ -528,11 +568,9 @@ static int ct_send_nb(struct intel_guc_ct *ct,
528568

529569
spin_lock_irqsave(&ctb->lock, spin_flags);
530570

531-
ret = h2g_has_room(ctb, len + GUC_CTB_HDR_LEN);
532-
if (unlikely(!ret)) {
533-
ret = -EBUSY;
571+
ret = has_room_nb(ct, len + GUC_CTB_HDR_LEN);
572+
if (unlikely(ret))
534573
goto out;
535-
}
536574

537575
fence = ct_get_next_fence(ct);
538576
ret = ct_write(ct, action, len, fence, flags);
@@ -575,15 +613,22 @@ static int ct_send(struct intel_guc_ct *ct,
575613
retry:
576614
spin_lock_irqsave(&ctb->lock, flags);
577615
if (unlikely(!h2g_has_room(ctb, len + GUC_CTB_HDR_LEN))) {
616+
if (ct->stall_time == KTIME_MAX)
617+
ct->stall_time = ktime_get();
578618
spin_unlock_irqrestore(&ctb->lock, flags);
579619

620+
if (unlikely(ct_deadlocked(ct)))
621+
return -EPIPE;
622+
580623
if (msleep_interruptible(sleep_period_ms))
581624
return -EINTR;
582625
sleep_period_ms = sleep_period_ms << 1;
583626

584627
goto retry;
585628
}
586629

630+
ct->stall_time = KTIME_MAX;
631+
587632
fence = ct_get_next_fence(ct);
588633
request.fence = fence;
589634
request.status = 0;
@@ -646,6 +691,9 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
646691
return -ENODEV;
647692
}
648693

694+
if (unlikely(ct->ctbs.send.broken))
695+
return -EPIPE;
696+
649697
if (flags & INTEL_GUC_CT_SEND_NB)
650698
return ct_send_nb(ct, action, len, flags);
651699

drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <linux/interrupt.h>
1010
#include <linux/spinlock.h>
1111
#include <linux/workqueue.h>
12+
#include <linux/ktime.h>
1213

1314
#include "intel_guc_fwif.h"
1415

@@ -68,6 +69,9 @@ struct intel_guc_ct {
6869
struct list_head incoming; /* incoming requests */
6970
struct work_struct worker; /* handler for incoming requests */
7071
} requests;
72+
73+
/** @stall_time: time of first time a CTB submission is stalled */
74+
ktime_t stall_time;
7175
};
7276

7377
void intel_guc_ct_init_early(struct intel_guc_ct *ct);

0 commit comments

Comments
 (0)