Skip to content

Commit ebece75

Browse files
committed
drm/i915: Keep timeline HWSP allocated until idle across the system
In preparation for enabling HW semaphores, we need to keep in flight timeline HWSP alive until its use across entire system has completed, as any other timeline active on the GPU may still refer back to the already retired timeline. We both have to delay recycling available cachelines and unpinning old HWSP until the next idle point. An easy option would be to simply keep all used HWSP until the system as a whole was idle, i.e. we could release them all at once on parking. However, on a busy system, we may never see a global idle point, essentially meaning the resource will be leaked until we are forced to do a GC pass. We already employ a fine-grained idle detection mechanism for vma, which we can reuse here so that each cacheline can be freed immediately after the last request using it is retired. v3: Keep track of the activity of each cacheline. v4: cacheline_free() on canceling the seqno tracking v5: Finally with a testcase to exercise wraparound v6: Pack cacheline into empty bits of page-aligned vaddr v7: Use i915_utils to hide the pointer casting around bit manipulation Signed-off-by: Chris Wilson <[email protected]> Reviewed-by: Tvrtko Ursulin <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 1e3f697 commit ebece75

File tree

5 files changed

+420
-39
lines changed

5 files changed

+420
-39
lines changed

drivers/gpu/drm/i915/i915_request.c

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -325,11 +325,6 @@ void i915_request_retire_upto(struct i915_request *rq)
325325
} while (tmp != rq);
326326
}
327327

328-
static u32 timeline_get_seqno(struct i915_timeline *tl)
329-
{
330-
return tl->seqno += 1 + tl->has_initial_breadcrumb;
331-
}
332-
333328
static void move_to_timeline(struct i915_request *request,
334329
struct i915_timeline *timeline)
335330
{
@@ -532,8 +527,10 @@ struct i915_request *
532527
i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
533528
{
534529
struct drm_i915_private *i915 = engine->i915;
535-
struct i915_request *rq;
536530
struct intel_context *ce;
531+
struct i915_timeline *tl;
532+
struct i915_request *rq;
533+
u32 seqno;
537534
int ret;
538535

539536
lockdep_assert_held(&i915->drm.struct_mutex);
@@ -610,24 +607,27 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
610607
}
611608
}
612609

613-
rq->rcustate = get_state_synchronize_rcu();
614-
615610
INIT_LIST_HEAD(&rq->active_list);
611+
612+
tl = ce->ring->timeline;
613+
ret = i915_timeline_get_seqno(tl, rq, &seqno);
614+
if (ret)
615+
goto err_free;
616+
616617
rq->i915 = i915;
617618
rq->engine = engine;
618619
rq->gem_context = ctx;
619620
rq->hw_context = ce;
620621
rq->ring = ce->ring;
621-
rq->timeline = ce->ring->timeline;
622+
rq->timeline = tl;
622623
GEM_BUG_ON(rq->timeline == &engine->timeline);
623-
rq->hwsp_seqno = rq->timeline->hwsp_seqno;
624+
rq->hwsp_seqno = tl->hwsp_seqno;
625+
rq->hwsp_cacheline = tl->hwsp_cacheline;
626+
rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
624627

625628
spin_lock_init(&rq->lock);
626-
dma_fence_init(&rq->fence,
627-
&i915_fence_ops,
628-
&rq->lock,
629-
rq->timeline->fence_context,
630-
timeline_get_seqno(rq->timeline));
629+
dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
630+
tl->fence_context, seqno);
631631

632632
/* We bump the ref for the fence chain */
633633
i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
@@ -687,6 +687,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
687687
GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
688688
GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
689689

690+
err_free:
690691
kmem_cache_free(global.slab_requests, rq);
691692
err_unreserve:
692693
mutex_unlock(&ce->ring->timeline->mutex);

drivers/gpu/drm/i915/i915_request.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ struct drm_file;
3838
struct drm_i915_gem_object;
3939
struct i915_request;
4040
struct i915_timeline;
41+
struct i915_timeline_cacheline;
4142

4243
struct i915_capture_list {
4344
struct i915_capture_list *next;
@@ -148,6 +149,16 @@ struct i915_request {
148149
*/
149150
const u32 *hwsp_seqno;
150151

152+
/*
153+
* If we need to access the timeline's seqno for this request in
154+
* another request, we need to keep a read reference to this associated
155+
* cacheline, so that we do not free and recycle it before the foreign
156+
* observers have completed. Hence, we keep a pointer to the cacheline
157+
* inside the timeline's HWSP vma, but it is only valid while this
158+
* request has not completed and guarded by the timeline mutex.
159+
*/
160+
struct i915_timeline_cacheline *hwsp_cacheline;
161+
151162
/** Position in the ring of the start of the request */
152163
u32 head;
153164

0 commit comments

Comments
 (0)