Skip to content

Commit b7404c7

Browse files
committed
drm/i915: Bump ready tasks ahead of busywaits
Consider two tasks that are running in parallel on a pair of engines (vcs0, vcs1), but then must complete on a shared engine (rcs0). To maximise throughput, we want to run the first ready task on rcs0 (i.e. the first task that completes on either of vcs0 or vcs1). When using semaphores, however, we will instead queue onto rcs in submission order. To resolve this incorrect ordering, we want to re-evaluate the priority queue when each of the request is ready. Normally this happens because we only insert into the priority queue requests that are ready, but with semaphores we are inserting ahead of their readiness and to compensate we penalize those tasks with reduced priority (so that tasks that do not need to busywait should naturally be run first). However, given a series of tasks that each use semaphores, the queue degrades into submission fifo rather than readiness fifo, and so to counter this we give a small boost to semaphore users as their dependent tasks are completed (and so we no longer require any busywait prior to running the user task as they are then ready themselves). v2: Fixup irqsave for schedule_lock (Tvrtko) Testcase: igt/gem_exec_schedule/semaphore-codependency Signed-off-by: Chris Wilson <[email protected]> Cc: Tvrtko Ursulin <[email protected]> Cc: Dmitry Rogozhkin <[email protected]> Cc: Dmitry Ermilov <[email protected]> Reviewed-by: Tvrtko Ursulin <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 9726920 commit b7404c7

File tree

3 files changed

+52
-10
lines changed

3 files changed

+52
-10
lines changed

drivers/gpu/drm/i915/i915_request.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,36 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
552552
return NOTIFY_DONE;
553553
}
554554

555+
static int __i915_sw_fence_call
556+
semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
557+
{
558+
struct i915_request *request =
559+
container_of(fence, typeof(*request), semaphore);
560+
561+
switch (state) {
562+
case FENCE_COMPLETE:
563+
/*
564+
* We only check a small portion of our dependencies
565+
* and so cannot guarantee that there remains no
566+
* semaphore chain across all. Instead of opting
567+
* for the full NOSEMAPHORE boost, we go for the
568+
* smaller (but still preempting) boost of
569+
* NEWCLIENT. This will be enough to boost over
570+
* a busywaiting request (as that cannot be
571+
* NEWCLIENT) without accidentally boosting
572+
* a busywait over real work elsewhere.
573+
*/
574+
i915_schedule_bump_priority(request, I915_PRIORITY_NEWCLIENT);
575+
break;
576+
577+
case FENCE_FREE:
578+
i915_request_put(request);
579+
break;
580+
}
581+
582+
return NOTIFY_DONE;
583+
}
584+
555585
static void ring_retire_requests(struct intel_ring *ring)
556586
{
557587
struct i915_request *rq, *rn;
@@ -702,6 +732,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
702732

703733
/* We bump the ref for the fence chain */
704734
i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
735+
i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);
705736

706737
i915_sched_node_init(&rq->sched);
707738

@@ -784,6 +815,12 @@ emit_semaphore_wait(struct i915_request *to,
784815
&from->fence, 0,
785816
I915_FENCE_GFP);
786817

818+
err = i915_sw_fence_await_dma_fence(&to->semaphore,
819+
&from->fence, 0,
820+
I915_FENCE_GFP);
821+
if (err < 0)
822+
return err;
823+
787824
/* We need to pin the signaler's HWSP until we are finished reading. */
788825
err = i915_timeline_read_hwsp(from, to, &hwsp_offset);
789826
if (err)
@@ -1114,6 +1151,7 @@ void i915_request_add(struct i915_request *request)
11141151
* run at the earliest possible convenience.
11151152
*/
11161153
local_bh_disable();
1154+
i915_sw_fence_commit(&request->semaphore);
11171155
rcu_read_lock(); /* RCU serialisation for set-wedged protection */
11181156
if (engine->schedule) {
11191157
struct i915_sched_attr attr = request->gem_context->sched;
@@ -1320,7 +1358,9 @@ long i915_request_wait(struct i915_request *rq,
13201358
if (flags & I915_WAIT_PRIORITY) {
13211359
if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
13221360
gen6_rps_boost(rq);
1361+
local_bh_disable(); /* suspend tasklets for reprioritisation */
13231362
i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
1363+
local_bh_enable(); /* kick tasklets en masse */
13241364
}
13251365

13261366
wait.tsk = current;

drivers/gpu/drm/i915/i915_request.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ struct i915_request {
143143
struct i915_sw_dma_fence_cb dmaq;
144144
};
145145
struct list_head execute_cb;
146+
struct i915_sw_fence semaphore;
146147

147148
/*
148149
* A list of everyone we wait upon, and everyone who waits upon us.

drivers/gpu/drm/i915/i915_scheduler.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
6464
{
6565
bool ret = false;
6666

67-
spin_lock(&schedule_lock);
67+
spin_lock_irq(&schedule_lock);
6868

6969
if (!node_signaled(signal)) {
7070
INIT_LIST_HEAD(&dep->dfs_link);
@@ -81,7 +81,7 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
8181
ret = true;
8282
}
8383

84-
spin_unlock(&schedule_lock);
84+
spin_unlock_irq(&schedule_lock);
8585

8686
return ret;
8787
}
@@ -108,7 +108,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
108108

109109
GEM_BUG_ON(!list_empty(&node->link));
110110

111-
spin_lock(&schedule_lock);
111+
spin_lock_irq(&schedule_lock);
112112

113113
/*
114114
* Everyone we depended upon (the fences we wait to be signaled)
@@ -135,7 +135,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
135135
i915_dependency_free(dep);
136136
}
137137

138-
spin_unlock(&schedule_lock);
138+
spin_unlock_irq(&schedule_lock);
139139
}
140140

141141
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
@@ -356,7 +356,7 @@ static void __i915_schedule(struct i915_request *rq,
356356

357357
memset(&cache, 0, sizeof(cache));
358358
engine = rq->engine;
359-
spin_lock_irq(&engine->timeline.lock);
359+
spin_lock(&engine->timeline.lock);
360360

361361
/* Fifo and depth-first replacement ensure our deps execute before us */
362362
list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
@@ -407,32 +407,33 @@ static void __i915_schedule(struct i915_request *rq,
407407
tasklet_hi_schedule(&engine->execlists.tasklet);
408408
}
409409

410-
spin_unlock_irq(&engine->timeline.lock);
410+
spin_unlock(&engine->timeline.lock);
411411
}
412412

413413
void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
414414
{
415-
spin_lock(&schedule_lock);
415+
spin_lock_irq(&schedule_lock);
416416
__i915_schedule(rq, attr);
417-
spin_unlock(&schedule_lock);
417+
spin_unlock_irq(&schedule_lock);
418418
}
419419

420420
void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
421421
{
422422
struct i915_sched_attr attr;
423+
unsigned long flags;
423424

424425
GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
425426

426427
if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
427428
return;
428429

429-
spin_lock_bh(&schedule_lock);
430+
spin_lock_irqsave(&schedule_lock, flags);
430431

431432
attr = rq->sched.attr;
432433
attr.priority |= bump;
433434
__i915_schedule(rq, &attr);
434435

435-
spin_unlock_bh(&schedule_lock);
436+
spin_unlock_irqrestore(&schedule_lock, flags);
436437
}
437438

438439
void __i915_priolist_free(struct i915_priolist *p)

0 commit comments

Comments
 (0)