Skip to content

Commit 702c8f8

Browse files
mthierryickle
authored andcommitted
drm/i915: Add engine reset count to error state
Driver maintains count of how many times a given engine is reset, useful to capture this in error state also. It gives an idea of how engine is coping up with the workloads it is executing before this error state. A follow-up patch will provide this information in debugfs. v2: s/engine_reset/reset_engine/ (Chris) Define count as unsigned int (Tvrtko) Cc: Chris Wilson <[email protected]> Cc: Mika Kuoppala <[email protected]> Signed-off-by: Arun Siluvery <[email protected]> Signed-off-by: Michel Thierry <[email protected]> Link: http://patchwork.freedesktop.org/patch/msgid/[email protected] Reviewed-by: Chris Wilson <[email protected]> Signed-off-by: Chris Wilson <[email protected]> Link: http://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent a1ef70e commit 702c8f8

File tree

3 files changed

+16
-0
lines changed

3 files changed

+16
-0
lines changed

drivers/gpu/drm/i915/i915_drv.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1971,7 +1971,10 @@ int i915_reset_engine(struct intel_engine_cs *engine)
19711971
* process to program RING_MODE, HWSP and re-enable submission.
19721972
*/
19731973
ret = engine->init_hw(engine);
1974+
if (ret)
1975+
goto out;
19741976

1977+
error->reset_engine_count[engine->id]++;
19751978
out:
19761979
return ret;
19771980
}

drivers/gpu/drm/i915/i915_drv.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,7 @@ struct i915_gpu_state {
917917
enum intel_engine_hangcheck_action hangcheck_action;
918918
struct i915_address_space *vm;
919919
int num_requests;
920+
u32 reset_count;
920921

921922
/* position of active request inside the ring */
922923
u32 rq_head, rq_post, rq_tail;
@@ -1567,6 +1568,9 @@ struct i915_gpu_error {
15671568
#define I915_WEDGED (BITS_PER_LONG - 1)
15681569
#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES)
15691570

1571+
/** Number of times an engine has been reset */
1572+
u32 reset_engine_count[I915_NUM_ENGINES];
1573+
15701574
/**
15711575
* Waitqueue to signal when a hang is detected. Used to for waiters
15721576
* to release the struct_mutex for the reset to procede.
@@ -3474,6 +3478,12 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
34743478
return READ_ONCE(error->reset_count);
34753479
}
34763480

3481+
static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
3482+
struct intel_engine_cs *engine)
3483+
{
3484+
return READ_ONCE(error->reset_engine_count[engine->id]);
3485+
}
3486+
34773487
struct drm_i915_gem_request *
34783488
i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
34793489
int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);

drivers/gpu/drm/i915/i915_gpu_error.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
463463
err_printf(m, " hangcheck action timestamp: %lu, %u ms ago\n",
464464
ee->hangcheck_timestamp,
465465
jiffies_to_msecs(jiffies - ee->hangcheck_timestamp));
466+
err_printf(m, " engine reset count: %u\n", ee->reset_count);
466467

467468
error_print_request(m, " ELSP[0]: ", &ee->execlist[0]);
468469
error_print_request(m, " ELSP[1]: ", &ee->execlist[1]);
@@ -1236,6 +1237,8 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
12361237
ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
12371238
ee->hangcheck_action = engine->hangcheck.action;
12381239
ee->hangcheck_stalled = engine->hangcheck.stalled;
1240+
ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,
1241+
engine);
12391242

12401243
if (USES_PPGTT(dev_priv)) {
12411244
int i;

0 commit comments

Comments
 (0)