Skip to content

Commit ed5982e

Browse files
committed
drm/i915: Allow userspace to hint that the relocations were known
Userspace is able to hint to the kernel that its command stream and auxiliary state buffers already hold the correct presumed addresses and so the relocation process may be skipped if the kernel does not need to move any buffers in preparation for the execbuffer. Thus for the common case where the allotment of buffers is static between batches, we can avoid the overhead of individually checking the relocation entries. Note that this requires userspace to supply the domain tracking and requests for workarounds itself that would otherwise be computed based upon the relocation entries. Using copywinwin10 as an example that is dependent upon emitting a lot of relocations (2 per operation), we see improvements of: c2d/gm45: 618000.0/sec to 632000.0/sec. i3-330m: 748000.0/sec to 830000.0/sec. (measured relative to a baseline with neither optimisations applied). Signed-off-by: Chris Wilson <[email protected]> Reviewed-by: Imre Deak <[email protected]> [danvet: Fixup merge conflict in userspace header due to different baseline trees.] Signed-off-by: Daniel Vetter <[email protected]>
1 parent bcffc3f commit ed5982e

File tree

3 files changed

+62
-23
lines changed

3 files changed

+62
-23
lines changed

drivers/gpu/drm/i915/i915_dma.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
992992
case I915_PARAM_HAS_PINNED_BATCHES:
993993
value = 1;
994994
break;
995+
case I915_PARAM_HAS_EXEC_NO_RELOC:
996+
value = 1;
997+
break;
995998
default:
996999
DRM_DEBUG_DRIVER("Unknown parameter %d\n",
9971000
param->param);

drivers/gpu/drm/i915/i915_gem_execbuffer.c

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,8 @@ need_reloc_mappable(struct drm_i915_gem_object *obj)
373373

374374
static int
375375
i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
376-
struct intel_ring_buffer *ring)
376+
struct intel_ring_buffer *ring,
377+
bool *need_reloc)
377378
{
378379
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
379380
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
@@ -414,7 +415,20 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
414415
obj->has_aliasing_ppgtt_mapping = 1;
415416
}
416417

417-
entry->offset = obj->gtt_offset;
418+
if (entry->offset != obj->gtt_offset) {
419+
entry->offset = obj->gtt_offset;
420+
*need_reloc = true;
421+
}
422+
423+
if (entry->flags & EXEC_OBJECT_WRITE) {
424+
obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
425+
obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
426+
}
427+
428+
if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
429+
!obj->has_global_gtt_mapping)
430+
i915_gem_gtt_bind_object(obj, obj->cache_level);
431+
418432
return 0;
419433
}
420434

@@ -440,7 +454,8 @@ i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
440454
static int
441455
i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
442456
struct drm_file *file,
443-
struct list_head *objects)
457+
struct list_head *objects,
458+
bool *need_relocs)
444459
{
445460
struct drm_i915_gem_object *obj;
446461
struct list_head ordered_objects;
@@ -468,7 +483,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
468483
else
469484
list_move_tail(&obj->exec_list, &ordered_objects);
470485

471-
obj->base.pending_read_domains = 0;
486+
obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
472487
obj->base.pending_write_domain = 0;
473488
obj->pending_fenced_gpu_access = false;
474489
}
@@ -508,7 +523,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
508523
(need_mappable && !obj->map_and_fenceable))
509524
ret = i915_gem_object_unbind(obj);
510525
else
511-
ret = i915_gem_execbuffer_reserve_object(obj, ring);
526+
ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
512527
if (ret)
513528
goto err;
514529
}
@@ -518,7 +533,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
518533
if (obj->gtt_space)
519534
continue;
520535

521-
ret = i915_gem_execbuffer_reserve_object(obj, ring);
536+
ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
522537
if (ret)
523538
goto err;
524539
}
@@ -538,16 +553,18 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
538553

539554
static int
540555
i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
556+
struct drm_i915_gem_execbuffer2 *args,
541557
struct drm_file *file,
542558
struct intel_ring_buffer *ring,
543559
struct eb_objects *eb,
544-
struct drm_i915_gem_exec_object2 *exec,
545-
int count)
560+
struct drm_i915_gem_exec_object2 *exec)
546561
{
547562
struct drm_i915_gem_relocation_entry *reloc;
548563
struct drm_i915_gem_object *obj;
564+
bool need_relocs;
549565
int *reloc_offset;
550566
int i, total, ret;
567+
int count = args->buffer_count;
551568

552569
/* We may process another execbuffer during the unlock... */
553570
while (!list_empty(&eb->objects)) {
@@ -602,7 +619,8 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
602619
if (ret)
603620
goto err;
604621

605-
ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects);
622+
need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
623+
ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects, &need_relocs);
606624
if (ret)
607625
goto err;
608626

@@ -660,6 +678,9 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
660678
static bool
661679
i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
662680
{
681+
if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
682+
return false;
683+
663684
return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
664685
}
665686

@@ -673,16 +694,16 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
673694
char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
674695
int length; /* limited by fault_in_pages_readable() */
675696

697+
if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
698+
return -EINVAL;
699+
676700
/* First check for malicious input causing overflow */
677701
if (exec[i].relocation_count >
678702
INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
679703
return -EINVAL;
680704

681705
length = exec[i].relocation_count *
682706
sizeof(struct drm_i915_gem_relocation_entry);
683-
if (!access_ok(VERIFY_READ, ptr, length))
684-
return -EFAULT;
685-
686707
/* we may also need to update the presumed offsets */
687708
if (!access_ok(VERIFY_WRITE, ptr, length))
688709
return -EFAULT;
@@ -704,8 +725,10 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
704725
u32 old_read = obj->base.read_domains;
705726
u32 old_write = obj->base.write_domain;
706727

707-
obj->base.read_domains = obj->base.pending_read_domains;
708728
obj->base.write_domain = obj->base.pending_write_domain;
729+
if (obj->base.write_domain == 0)
730+
obj->base.pending_read_domains |= obj->base.read_domains;
731+
obj->base.read_domains = obj->base.pending_read_domains;
709732
obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
710733

711734
i915_gem_object_move_to_active(obj, ring);
@@ -770,14 +793,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
770793
struct intel_ring_buffer *ring;
771794
u32 ctx_id = i915_execbuffer2_get_context_id(*args);
772795
u32 exec_start, exec_len;
773-
u32 mask;
774-
u32 flags;
796+
u32 mask, flags;
775797
int ret, mode, i;
798+
bool need_relocs;
776799

777-
if (!i915_gem_check_execbuffer(args)) {
778-
DRM_DEBUG("execbuf with invalid offset/length\n");
800+
if (!i915_gem_check_execbuffer(args))
779801
return -EINVAL;
780-
}
781802

782803
ret = validate_exec_list(exec, args->buffer_count);
783804
if (ret)
@@ -916,17 +937,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
916937
exec_list);
917938

918939
/* Move the objects en-masse into the GTT, evicting if necessary. */
919-
ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects);
940+
need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
941+
ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects, &need_relocs);
920942
if (ret)
921943
goto err;
922944

923945
/* The objects are in their final locations, apply the relocations. */
924-
ret = i915_gem_execbuffer_relocate(dev, eb);
946+
if (need_relocs)
947+
ret = i915_gem_execbuffer_relocate(dev, eb);
925948
if (ret) {
926949
if (ret == -EFAULT) {
927-
ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
928-
eb, exec,
929-
args->buffer_count);
950+
ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
951+
eb, exec);
930952
BUG_ON(!mutex_is_locked(&dev->struct_mutex));
931953
}
932954
if (ret)

include/uapi/drm/i915_drm.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ typedef struct drm_i915_irq_wait {
308308
#define I915_PARAM_RSVD_FOR_FUTURE_USE 22
309309
#define I915_PARAM_HAS_SECURE_BATCHES 23
310310
#define I915_PARAM_HAS_PINNED_BATCHES 24
311+
#define I915_PARAM_HAS_EXEC_NO_RELOC 25
311312

312313
typedef struct drm_i915_getparam {
313314
int param;
@@ -628,7 +629,11 @@ struct drm_i915_gem_exec_object2 {
628629
__u64 offset;
629630

630631
#define EXEC_OBJECT_NEEDS_FENCE (1<<0)
632+
#define EXEC_OBJECT_NEEDS_GTT (1<<1)
633+
#define EXEC_OBJECT_WRITE (1<<2)
634+
#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
631635
__u64 flags;
636+
632637
__u64 rsvd1;
633638
__u64 rsvd2;
634639
};
@@ -687,6 +692,15 @@ struct drm_i915_gem_execbuffer2 {
687692
*/
688693
#define I915_EXEC_IS_PINNED (1<<10)
689694

695+
/** Provide a hint to the kernel that the command stream and auxilliary
696+
* state buffers already holds the correct presumed addresses and so the
697+
* relocation process may be skipped if no buffers need to be moved in
698+
* preparation for the execbuffer.
699+
*/
700+
#define I915_EXEC_NO_RELOC (1<<11)
701+
702+
#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_NO_RELOC<<1)
703+
690704
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
691705
#define i915_execbuffer2_set_context_id(eb2, context) \
692706
(eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK

0 commit comments

Comments
 (0)