Skip to content

Commit abeb4de

Browse files
mthierryickle
authored andcommitted
drm/i915/selftests: reset engine self tests
Check that we can reset specific engines, also check the fallback to full reset if something didn't work. v2: rebase. v3: use RESET_ENGINE_IN_PROGRESS flag. v4: use I915_RESET_ENGINE flag. Signed-off-by: Michel Thierry <[email protected]> Link: http://patchwork.freedesktop.org/patch/msgid/[email protected] Reviewed-by: Chris Wilson <[email protected]> Signed-off-by: Chris Wilson <[email protected]> Link: http://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 061d06a commit abeb4de

File tree

1 file changed

+148
-0
lines changed

1 file changed

+148
-0
lines changed

drivers/gpu/drm/i915/selftests/intel_hangcheck.c

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,54 @@ static int igt_global_reset(void *arg)
324324
return err;
325325
}
326326

327+
static int igt_reset_engine(void *arg)
328+
{
329+
struct drm_i915_private *i915 = arg;
330+
struct intel_engine_cs *engine;
331+
enum intel_engine_id id;
332+
unsigned int reset_count, reset_engine_count;
333+
int err = 0;
334+
335+
/* Check that we can issue a global GPU and engine reset */
336+
337+
if (!intel_has_reset_engine(i915))
338+
return 0;
339+
340+
for_each_engine(engine, i915, id) {
341+
set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
342+
reset_count = i915_reset_count(&i915->gpu_error);
343+
reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
344+
engine);
345+
346+
err = i915_reset_engine(engine);
347+
if (err) {
348+
pr_err("i915_reset_engine failed\n");
349+
break;
350+
}
351+
352+
if (i915_reset_count(&i915->gpu_error) != reset_count) {
353+
pr_err("Full GPU reset recorded! (engine reset expected)\n");
354+
err = -EINVAL;
355+
break;
356+
}
357+
358+
if (i915_reset_engine_count(&i915->gpu_error, engine) ==
359+
reset_engine_count) {
360+
pr_err("No %s engine reset recorded!\n", engine->name);
361+
err = -EINVAL;
362+
break;
363+
}
364+
365+
clear_bit(I915_RESET_ENGINE + engine->id,
366+
&i915->gpu_error.flags);
367+
}
368+
369+
if (i915_terminally_wedged(&i915->gpu_error))
370+
err = -EIO;
371+
372+
return err;
373+
}
374+
327375
static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
328376
{
329377
u32 reset_count;
@@ -530,13 +578,113 @@ static int igt_reset_queue(void *arg)
530578
return err;
531579
}
532580

581+
static int igt_render_engine_reset_fallback(void *arg)
582+
{
583+
struct drm_i915_private *i915 = arg;
584+
struct intel_engine_cs *engine = i915->engine[RCS];
585+
struct hang h;
586+
struct drm_i915_gem_request *rq;
587+
unsigned int reset_count, reset_engine_count;
588+
int err = 0;
589+
590+
/* Check that we can issue a global GPU and engine reset */
591+
592+
if (!intel_has_reset_engine(i915))
593+
return 0;
594+
595+
set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
596+
mutex_lock(&i915->drm.struct_mutex);
597+
598+
err = hang_init(&h, i915);
599+
if (err)
600+
goto unlock;
601+
602+
rq = hang_create_request(&h, engine, i915->kernel_context);
603+
if (IS_ERR(rq)) {
604+
err = PTR_ERR(rq);
605+
goto fini;
606+
}
607+
608+
i915_gem_request_get(rq);
609+
__i915_add_request(rq, true);
610+
611+
/* make reset engine fail */
612+
rq->fence.error = -EIO;
613+
614+
if (!wait_for_hang(&h, rq)) {
615+
pr_err("Failed to start request %x\n", rq->fence.seqno);
616+
err = -EIO;
617+
goto out_rq;
618+
}
619+
620+
reset_engine_count = i915_reset_engine_count(&i915->gpu_error, engine);
621+
reset_count = fake_hangcheck(rq);
622+
623+
/* unlock since we'll call handle_error */
624+
mutex_unlock(&i915->drm.struct_mutex);
625+
clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
626+
wake_up_all(&i915->gpu_error.reset_queue);
627+
628+
i915_handle_error(i915, intel_engine_flag(engine), "live test");
629+
630+
if (i915_reset_engine_count(&i915->gpu_error, engine) !=
631+
reset_engine_count) {
632+
pr_err("render engine reset recorded! (full reset expected)\n");
633+
err = -EINVAL;
634+
goto out_rq;
635+
}
636+
637+
if (i915_reset_count(&i915->gpu_error) == reset_count) {
638+
pr_err("No full GPU reset recorded!\n");
639+
err = -EINVAL;
640+
goto out_rq;
641+
}
642+
643+
/*
644+
* by using fence.error = -EIO, full reset sets the wedged flag, do one
645+
* more full reset to re-enable the hw.
646+
*/
647+
if (i915_terminally_wedged(&i915->gpu_error)) {
648+
set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
649+
mutex_lock(&i915->drm.struct_mutex);
650+
rq->fence.error = 0;
651+
652+
set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
653+
i915_reset(i915);
654+
GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
655+
&i915->gpu_error.flags));
656+
657+
if (i915_reset_count(&i915->gpu_error) == reset_count) {
658+
pr_err("No full GPU reset recorded!\n");
659+
err = -EINVAL;
660+
goto out_rq;
661+
}
662+
}
663+
664+
out_rq:
665+
i915_gem_request_put(rq);
666+
fini:
667+
hang_fini(&h);
668+
unlock:
669+
mutex_unlock(&i915->drm.struct_mutex);
670+
clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
671+
wake_up_all(&i915->gpu_error.reset_queue);
672+
673+
if (i915_terminally_wedged(&i915->gpu_error))
674+
return -EIO;
675+
676+
return err;
677+
}
678+
533679
int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
534680
{
535681
static const struct i915_subtest tests[] = {
536682
SUBTEST(igt_hang_sanitycheck),
537683
SUBTEST(igt_global_reset),
684+
SUBTEST(igt_reset_engine),
538685
SUBTEST(igt_wait_reset),
539686
SUBTEST(igt_reset_queue),
687+
SUBTEST(igt_render_engine_reset_fallback),
540688
};
541689

542690
if (!intel_has_gpu_reset(i915))

0 commit comments

Comments
 (0)