Skip to content

Commit 61231f6

Browse files
committed
drm/i915/gem: Check that the context wasn't closed during setup
As setup takes a long time, the user may close the context during the construction of the execbuf. In order to make sure we correctly track all outstanding work with non-persistent contexts, we need to serialise the submission with the context closure and mop up any leaks. Signed-off-by: Chris Wilson <[email protected]> Reviewed-by: Matthew Auld <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 373f27f commit 61231f6

File tree

2 files changed

+75
-47
lines changed

2 files changed

+75
-47
lines changed

drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2566,6 +2566,72 @@ signal_fence_array(struct i915_execbuffer *eb,
25662566
}
25672567
}
25682568

2569+
static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
2570+
{
2571+
struct i915_request *rq, *rn;
2572+
2573+
list_for_each_entry_safe(rq, rn, &tl->requests, link)
2574+
if (rq == end || !i915_request_retire(rq))
2575+
break;
2576+
}
2577+
2578+
static void eb_request_add(struct i915_execbuffer *eb)
2579+
{
2580+
struct i915_request *rq = eb->request;
2581+
struct intel_timeline * const tl = i915_request_timeline(rq);
2582+
struct i915_sched_attr attr = {};
2583+
struct i915_request *prev;
2584+
2585+
lockdep_assert_held(&tl->mutex);
2586+
lockdep_unpin_lock(&tl->mutex, rq->cookie);
2587+
2588+
trace_i915_request_add(rq);
2589+
2590+
prev = __i915_request_commit(rq);
2591+
2592+
/* Check that the context wasn't destroyed before submission */
2593+
if (likely(rcu_access_pointer(eb->context->gem_context))) {
2594+
attr = eb->gem_context->sched;
2595+
2596+
/*
2597+
* Boost actual workloads past semaphores!
2598+
*
2599+
* With semaphores we spin on one engine waiting for another,
2600+
* simply to reduce the latency of starting our work when
2601+
* the signaler completes. However, if there is any other
2602+
* work that we could be doing on this engine instead, that
2603+
* is better utilisation and will reduce the overall duration
2604+
* of the current work. To avoid PI boosting a semaphore
2605+
* far in the distance past over useful work, we keep a history
2606+
* of any semaphore use along our dependency chain.
2607+
*/
2608+
if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
2609+
attr.priority |= I915_PRIORITY_NOSEMAPHORE;
2610+
2611+
/*
2612+
* Boost priorities to new clients (new request flows).
2613+
*
2614+
* Allow interactive/synchronous clients to jump ahead of
2615+
* the bulk clients. (FQ_CODEL)
2616+
*/
2617+
if (list_empty(&rq->sched.signalers_list))
2618+
attr.priority |= I915_PRIORITY_WAIT;
2619+
} else {
2620+
/* Serialise with context_close via the add_to_timeline */
2621+
i915_request_skip(rq, -ENOENT);
2622+
}
2623+
2624+
local_bh_disable();
2625+
__i915_request_queue(rq, &attr);
2626+
local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
2627+
2628+
/* Try to clean up the client's timeline after submitting the request */
2629+
if (prev)
2630+
retire_requests(tl, prev);
2631+
2632+
mutex_unlock(&tl->mutex);
2633+
}
2634+
25692635
static int
25702636
i915_gem_do_execbuffer(struct drm_device *dev,
25712637
struct drm_file *file,
@@ -2778,7 +2844,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
27782844
err_request:
27792845
add_to_client(eb.request, file);
27802846
i915_request_get(eb.request);
2781-
i915_request_add(eb.request);
2847+
eb_request_add(&eb);
27822848

27832849
if (fences)
27842850
signal_fence_array(&eb, fences);

drivers/gpu/drm/i915/i915_request.c

Lines changed: 8 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1339,68 +1339,30 @@ void i915_request_add(struct i915_request *rq)
13391339
{
13401340
struct intel_timeline * const tl = i915_request_timeline(rq);
13411341
struct i915_sched_attr attr = {};
1342-
struct i915_request *prev;
1342+
struct i915_gem_context *ctx;
13431343

13441344
lockdep_assert_held(&tl->mutex);
13451345
lockdep_unpin_lock(&tl->mutex, rq->cookie);
13461346

13471347
trace_i915_request_add(rq);
1348+
__i915_request_commit(rq);
13481349

1349-
prev = __i915_request_commit(rq);
1350-
1351-
if (rcu_access_pointer(rq->context->gem_context))
1352-
attr = i915_request_gem_context(rq)->sched;
1350+
/* XXX placeholder for selftests */
1351+
rcu_read_lock();
1352+
ctx = rcu_dereference(rq->context->gem_context);
1353+
if (ctx)
1354+
attr = ctx->sched;
1355+
rcu_read_unlock();
13531356

1354-
/*
1355-
* Boost actual workloads past semaphores!
1356-
*
1357-
* With semaphores we spin on one engine waiting for another,
1358-
* simply to reduce the latency of starting our work when
1359-
* the signaler completes. However, if there is any other
1360-
* work that we could be doing on this engine instead, that
1361-
* is better utilisation and will reduce the overall duration
1362-
* of the current work. To avoid PI boosting a semaphore
1363-
* far in the distance past over useful work, we keep a history
1364-
* of any semaphore use along our dependency chain.
1365-
*/
13661357
if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
13671358
attr.priority |= I915_PRIORITY_NOSEMAPHORE;
1368-
1369-
/*
1370-
* Boost priorities to new clients (new request flows).
1371-
*
1372-
* Allow interactive/synchronous clients to jump ahead of
1373-
* the bulk clients. (FQ_CODEL)
1374-
*/
13751359
if (list_empty(&rq->sched.signalers_list))
13761360
attr.priority |= I915_PRIORITY_WAIT;
13771361

13781362
local_bh_disable();
13791363
__i915_request_queue(rq, &attr);
13801364
local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
13811365

1382-
/*
1383-
* In typical scenarios, we do not expect the previous request on
1384-
* the timeline to be still tracked by timeline->last_request if it
1385-
* has been completed. If the completed request is still here, that
1386-
* implies that request retirement is a long way behind submission,
1387-
* suggesting that we haven't been retiring frequently enough from
1388-
* the combination of retire-before-alloc, waiters and the background
1389-
* retirement worker. So if the last request on this timeline was
1390-
* already completed, do a catch up pass, flushing the retirement queue
1391-
* up to this client. Since we have now moved the heaviest operations
1392-
* during retirement onto secondary workers, such as freeing objects
1393-
* or contexts, retiring a bunch of requests is mostly list management
1394-
* (and cache misses), and so we should not be overly penalizing this
1395-
* client by performing excess work, though we may still performing
1396-
* work on behalf of others -- but instead we should benefit from
1397-
* improved resource management. (Well, that's the theory at least.)
1398-
*/
1399-
if (prev &&
1400-
i915_request_completed(prev) &&
1401-
rcu_access_pointer(prev->timeline) == tl)
1402-
i915_request_retire_upto(prev);
1403-
14041366
mutex_unlock(&tl->mutex);
14051367
}
14061368

0 commit comments

Comments
 (0)