Skip to content

Commit 7fc92e9

Browse files
committed
drm/i915: Store i915_gem_object_is_coherent() as a bit next to cache-dirty
For ease of use (i.e. avoiding a few checks and function calls), store the object's cache coherency next to the cache is dirty bit. Specifically this patch aims to reduce the frequency of no-op calls to i915_gem_object_clflush() to counter-act the increase of such calls for GPU only objects in the previous patch. v2: Replace cache_dirty & ~cache_coherent with cache_dirty && !cache_coherent as gcc generates much better code for the latter (Tvrtko) Signed-off-by: Chris Wilson <[email protected]> Cc: Dongwon Kim <[email protected]> Cc: Matt Roper <[email protected]> Tested-by: Dongwon Kim <[email protected]> Cc: Tvrtko Ursulin <[email protected]> Link: http://patchwork.freedesktop.org/patch/msgid/[email protected] Reviewed-by: Tvrtko Ursulin <[email protected]>
1 parent e27ab73 commit 7fc92e9

File tree

8 files changed

+17
-12
lines changed

8 files changed

+17
-12
lines changed

drivers/gpu/drm/i915/i915_gem.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
5252
if (obj->cache_dirty)
5353
return false;
5454

55-
if (!i915_gem_object_is_coherent(obj))
55+
if (!obj->cache_coherent)
5656
return true;
5757

5858
return obj->pin_display;
@@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
253253

254254
if (needs_clflush &&
255255
(obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
256-
!i915_gem_object_is_coherent(obj))
256+
!obj->cache_coherent)
257257
drm_clflush_sg(pages);
258258

259259
__start_cpu_write(obj);
@@ -856,8 +856,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
856856
if (ret)
857857
return ret;
858858

859-
if (i915_gem_object_is_coherent(obj) ||
860-
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
859+
if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
861860
ret = i915_gem_object_set_to_cpu_domain(obj, false);
862861
if (ret)
863862
goto err_unpin;
@@ -909,8 +908,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
909908
if (ret)
910909
return ret;
911910

912-
if (i915_gem_object_is_coherent(obj) ||
913-
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
911+
if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
914912
ret = i915_gem_object_set_to_cpu_domain(obj, true);
915913
if (ret)
916914
goto err_unpin;
@@ -3684,6 +3682,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
36843682
list_for_each_entry(vma, &obj->vma_list, obj_link)
36853683
vma->node.color = cache_level;
36863684
obj->cache_level = cache_level;
3685+
obj->cache_coherent = i915_gem_object_is_coherent(obj);
36873686
obj->cache_dirty = true; /* Always invalidate stale cachelines */
36883687

36893688
return 0;
@@ -4344,7 +4343,8 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
43444343
} else
43454344
obj->cache_level = I915_CACHE_NONE;
43464345

4347-
obj->cache_dirty = !i915_gem_object_is_coherent(obj);
4346+
obj->cache_coherent = i915_gem_object_is_coherent(obj);
4347+
obj->cache_dirty = !obj->cache_coherent;
43484348

43494349
trace_i915_gem_object_create(obj);
43504350

drivers/gpu/drm/i915/i915_gem_clflush.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
139139
* snooping behaviour occurs naturally as the result of our domain
140140
* tracking.
141141
*/
142-
if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj))
142+
if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent)
143143
return;
144144

145145
trace_i915_gem_object_clflush(obj);

drivers/gpu/drm/i915/i915_gem_execbuffer.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1110,7 +1110,7 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
11101110
if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
11111111
continue;
11121112

1113-
if (obj->cache_dirty)
1113+
if (unlikely(obj->cache_dirty && !obj->cache_coherent))
11141114
i915_gem_clflush_object(obj, 0);
11151115

11161116
ret = i915_gem_request_await_object

drivers/gpu/drm/i915/i915_gem_internal.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,8 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
191191
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
192192
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
193193
obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
194-
obj->cache_dirty = !i915_gem_object_is_coherent(obj);
194+
obj->cache_coherent = i915_gem_object_is_coherent(obj);
195+
obj->cache_dirty = !obj->cache_coherent;
195196

196197
return obj;
197198
}

drivers/gpu/drm/i915/i915_gem_object.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ struct drm_i915_gem_object {
121121
unsigned long gt_ro:1;
122122
unsigned int cache_level:3;
123123
unsigned int cache_dirty:1;
124+
unsigned int cache_coherent:1;
124125

125126
atomic_t frontbuffer_bits;
126127
unsigned int frontbuffer_ggtt_origin; /* write once */

drivers/gpu/drm/i915/i915_gem_stolen.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,7 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
590590
obj->stolen = stolen;
591591
obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
592592
obj->cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
593+
obj->cache_coherent = true; /* assumptions! more like cache_oblivious */
593594

594595
if (i915_gem_object_pin_pages(obj))
595596
goto cleanup;

drivers/gpu/drm/i915/i915_gem_userptr.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -805,7 +805,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
805805
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
806806
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
807807
obj->cache_level = I915_CACHE_LLC;
808-
obj->cache_dirty = !i915_gem_object_is_coherent(obj);
808+
obj->cache_coherent = i915_gem_object_is_coherent(obj);
809+
obj->cache_dirty = !obj->cache_coherent;
809810

810811
obj->userptr.ptr = args->user_ptr;
811812
obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);

drivers/gpu/drm/i915/selftests/huge_gem_object.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ huge_gem_object(struct drm_i915_private *i915,
129129
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
130130
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
131131
obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
132-
obj->cache_dirty = !i915_gem_object_is_coherent(obj);
132+
obj->cache_coherent = i915_gem_object_is_coherent(obj);
133+
obj->cache_dirty = !obj->cache_coherent;
133134
obj->scratch = phys_size;
134135

135136
return obj;

0 commit comments

Comments
 (0)