4040#include "intel_drv.h"
4141#include "intel_frontbuffer.h"
4242
43- #define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */
43+ enum {
44+ FORCE_CPU_RELOC = 1 ,
45+ FORCE_GTT_RELOC ,
46+ FORCE_GPU_RELOC ,
47+ #define DBG_FORCE_RELOC 0 /* choose one of the above! */
48+ };
4449
4550#define __EXEC_OBJECT_HAS_REF BIT(31)
4651#define __EXEC_OBJECT_HAS_PIN BIT(30)
@@ -212,10 +217,15 @@ struct i915_execbuffer {
212217 struct drm_mm_node node ; /** temporary GTT binding */
213218 unsigned long vaddr ; /** Current kmap address */
214219 unsigned long page ; /** Currently mapped page index */
220+ unsigned int gen ; /** Cached value of INTEL_GEN */
215221 bool use_64bit_reloc : 1 ;
216222 bool has_llc : 1 ;
217223 bool has_fence : 1 ;
218224 bool needs_unfenced : 1 ;
225+
226+ struct drm_i915_gem_request * rq ;
227+ u32 * rq_cmd ;
228+ unsigned int rq_size ;
219229 } reloc_cache ;
220230
221231 u64 invalid_flags ; /** Set of execobj.flags that are invalid */
@@ -496,8 +506,11 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
496506 if (!i915_gem_object_has_struct_page (obj ))
497507 return false;
498508
499- if (DBG_USE_CPU_RELOC )
500- return DBG_USE_CPU_RELOC > 0 ;
509+ if (DBG_FORCE_RELOC == FORCE_CPU_RELOC )
510+ return true;
511+
512+ if (DBG_FORCE_RELOC == FORCE_GTT_RELOC )
513+ return false;
501514
502515 return (cache -> has_llc ||
503516 obj -> cache_dirty ||
@@ -887,6 +900,8 @@ static void eb_reset_vmas(const struct i915_execbuffer *eb)
887900
888901static void eb_destroy (const struct i915_execbuffer * eb )
889902{
903+ GEM_BUG_ON (eb -> reloc_cache .rq );
904+
890905 if (eb -> lut_size >= 0 )
891906 kfree (eb -> buckets );
892907}
@@ -904,11 +919,14 @@ static void reloc_cache_init(struct reloc_cache *cache,
904919 cache -> page = -1 ;
905920 cache -> vaddr = 0 ;
906921 /* Must be a variable in the struct to allow GCC to unroll. */
922+ cache -> gen = INTEL_GEN (i915 );
907923 cache -> has_llc = HAS_LLC (i915 );
908- cache -> has_fence = INTEL_GEN (i915 ) < 4 ;
909- cache -> needs_unfenced = INTEL_INFO (i915 )-> unfenced_needs_alignment ;
910924 cache -> use_64bit_reloc = HAS_64BIT_RELOC (i915 );
925+ cache -> has_fence = cache -> gen < 4 ;
926+ cache -> needs_unfenced = INTEL_INFO (i915 )-> unfenced_needs_alignment ;
911927 cache -> node .allocated = false;
928+ cache -> rq = NULL ;
929+ cache -> rq_size = 0 ;
912930}
913931
914932static inline void * unmask_page (unsigned long p )
@@ -930,10 +948,24 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
930948 return & i915 -> ggtt ;
931949}
932950
951+ static void reloc_gpu_flush (struct reloc_cache * cache )
952+ {
953+ GEM_BUG_ON (cache -> rq_size >= cache -> rq -> batch -> obj -> base .size / sizeof (u32 ));
954+ cache -> rq_cmd [cache -> rq_size ] = MI_BATCH_BUFFER_END ;
955+ i915_gem_object_unpin_map (cache -> rq -> batch -> obj );
956+ i915_gem_chipset_flush (cache -> rq -> i915 );
957+
958+ __i915_add_request (cache -> rq , true);
959+ cache -> rq = NULL ;
960+ }
961+
933962static void reloc_cache_reset (struct reloc_cache * cache )
934963{
935964 void * vaddr ;
936965
966+ if (cache -> rq )
967+ reloc_gpu_flush (cache );
968+
937969 if (!cache -> vaddr )
938970 return ;
939971
@@ -1099,6 +1131,121 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
10991131 * addr = value ;
11001132}
11011133
1134+ static int __reloc_gpu_alloc (struct i915_execbuffer * eb ,
1135+ struct i915_vma * vma ,
1136+ unsigned int len )
1137+ {
1138+ struct reloc_cache * cache = & eb -> reloc_cache ;
1139+ struct drm_i915_gem_object * obj ;
1140+ struct drm_i915_gem_request * rq ;
1141+ struct i915_vma * batch ;
1142+ u32 * cmd ;
1143+ int err ;
1144+
1145+ GEM_BUG_ON (vma -> obj -> base .write_domain & I915_GEM_DOMAIN_CPU );
1146+
1147+ obj = i915_gem_batch_pool_get (& eb -> engine -> batch_pool , PAGE_SIZE );
1148+ if (IS_ERR (obj ))
1149+ return PTR_ERR (obj );
1150+
1151+ cmd = i915_gem_object_pin_map (obj ,
1152+ cache -> has_llc ? I915_MAP_WB : I915_MAP_WC );
1153+ i915_gem_object_unpin_pages (obj );
1154+ if (IS_ERR (cmd ))
1155+ return PTR_ERR (cmd );
1156+
1157+ err = i915_gem_object_set_to_wc_domain (obj , false);
1158+ if (err )
1159+ goto err_unmap ;
1160+
1161+ batch = i915_vma_instance (obj , vma -> vm , NULL );
1162+ if (IS_ERR (batch )) {
1163+ err = PTR_ERR (batch );
1164+ goto err_unmap ;
1165+ }
1166+
1167+ err = i915_vma_pin (batch , 0 , 0 , PIN_USER | PIN_NONBLOCK );
1168+ if (err )
1169+ goto err_unmap ;
1170+
1171+ rq = i915_gem_request_alloc (eb -> engine , eb -> ctx );
1172+ if (IS_ERR (rq )) {
1173+ err = PTR_ERR (rq );
1174+ goto err_unpin ;
1175+ }
1176+
1177+ err = i915_gem_request_await_object (rq , vma -> obj , true);
1178+ if (err )
1179+ goto err_request ;
1180+
1181+ err = eb -> engine -> emit_flush (rq , EMIT_INVALIDATE );
1182+ if (err )
1183+ goto err_request ;
1184+
1185+ err = i915_switch_context (rq );
1186+ if (err )
1187+ goto err_request ;
1188+
1189+ err = eb -> engine -> emit_bb_start (rq ,
1190+ batch -> node .start , PAGE_SIZE ,
1191+ cache -> gen > 5 ? 0 : I915_DISPATCH_SECURE );
1192+ if (err )
1193+ goto err_request ;
1194+
1195+ GEM_BUG_ON (!reservation_object_test_signaled_rcu (obj -> resv , true));
1196+ i915_vma_move_to_active (batch , rq , 0 );
1197+ reservation_object_lock (obj -> resv , NULL );
1198+ reservation_object_add_excl_fence (obj -> resv , & rq -> fence );
1199+ reservation_object_unlock (obj -> resv );
1200+ i915_vma_unpin (batch );
1201+
1202+ i915_vma_move_to_active (vma , rq , true);
1203+ reservation_object_lock (vma -> obj -> resv , NULL );
1204+ reservation_object_add_excl_fence (vma -> obj -> resv , & rq -> fence );
1205+ reservation_object_unlock (vma -> obj -> resv );
1206+
1207+ rq -> batch = batch ;
1208+
1209+ cache -> rq = rq ;
1210+ cache -> rq_cmd = cmd ;
1211+ cache -> rq_size = 0 ;
1212+
1213+ /* Return with batch mapping (cmd) still pinned */
1214+ return 0 ;
1215+
1216+ err_request :
1217+ i915_add_request (rq );
1218+ err_unpin :
1219+ i915_vma_unpin (batch );
1220+ err_unmap :
1221+ i915_gem_object_unpin_map (obj );
1222+ return err ;
1223+ }
1224+
1225+ static u32 * reloc_gpu (struct i915_execbuffer * eb ,
1226+ struct i915_vma * vma ,
1227+ unsigned int len )
1228+ {
1229+ struct reloc_cache * cache = & eb -> reloc_cache ;
1230+ u32 * cmd ;
1231+
1232+ if (cache -> rq_size > PAGE_SIZE /sizeof (u32 ) - (len + 1 ))
1233+ reloc_gpu_flush (cache );
1234+
1235+ if (unlikely (!cache -> rq )) {
1236+ int err ;
1237+
1238+ err = __reloc_gpu_alloc (eb , vma , len );
1239+ if (unlikely (err ))
1240+ return ERR_PTR (err );
1241+ }
1242+
1243+ cmd = cache -> rq_cmd + cache -> rq_size ;
1244+ cache -> rq_size += len ;
1245+
1246+ return cmd ;
1247+ }
1248+
11021249static u64
11031250relocate_entry (struct i915_vma * vma ,
11041251 const struct drm_i915_gem_relocation_entry * reloc ,
@@ -1111,6 +1258,67 @@ relocate_entry(struct i915_vma *vma,
11111258 bool wide = eb -> reloc_cache .use_64bit_reloc ;
11121259 void * vaddr ;
11131260
1261+ if (!eb -> reloc_cache .vaddr &&
1262+ (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
1263+ !reservation_object_test_signaled_rcu (obj -> resv , true))) {
1264+ const unsigned int gen = eb -> reloc_cache .gen ;
1265+ unsigned int len ;
1266+ u32 * batch ;
1267+ u64 addr ;
1268+
1269+ if (wide )
1270+ len = offset & 7 ? 8 : 5 ;
1271+ else if (gen >= 4 )
1272+ len = 4 ;
1273+ else if (gen >= 3 )
1274+ len = 3 ;
1275+ else /* On gen2 MI_STORE_DWORD_IMM uses a physical address */
1276+ goto repeat ;
1277+
1278+ batch = reloc_gpu (eb , vma , len );
1279+ if (IS_ERR (batch ))
1280+ goto repeat ;
1281+
1282+ addr = gen8_canonical_addr (vma -> node .start + offset );
1283+ if (wide ) {
1284+ if (offset & 7 ) {
1285+ * batch ++ = MI_STORE_DWORD_IMM_GEN4 ;
1286+ * batch ++ = lower_32_bits (addr );
1287+ * batch ++ = upper_32_bits (addr );
1288+ * batch ++ = lower_32_bits (target_offset );
1289+
1290+ addr = gen8_canonical_addr (addr + 4 );
1291+
1292+ * batch ++ = MI_STORE_DWORD_IMM_GEN4 ;
1293+ * batch ++ = lower_32_bits (addr );
1294+ * batch ++ = upper_32_bits (addr );
1295+ * batch ++ = upper_32_bits (target_offset );
1296+ } else {
1297+ * batch ++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21 )) + 1 ;
1298+ * batch ++ = lower_32_bits (addr );
1299+ * batch ++ = upper_32_bits (addr );
1300+ * batch ++ = lower_32_bits (target_offset );
1301+ * batch ++ = upper_32_bits (target_offset );
1302+ }
1303+ } else if (gen >= 6 ) {
1304+ * batch ++ = MI_STORE_DWORD_IMM_GEN4 ;
1305+ * batch ++ = 0 ;
1306+ * batch ++ = addr ;
1307+ * batch ++ = target_offset ;
1308+ } else if (gen >= 4 ) {
1309+ * batch ++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT ;
1310+ * batch ++ = 0 ;
1311+ * batch ++ = addr ;
1312+ * batch ++ = target_offset ;
1313+ } else {
1314+ * batch ++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL ;
1315+ * batch ++ = addr ;
1316+ * batch ++ = target_offset ;
1317+ }
1318+
1319+ goto out ;
1320+ }
1321+
11141322repeat :
11151323 vaddr = reloc_vaddr (obj , & eb -> reloc_cache , offset >> PAGE_SHIFT );
11161324 if (IS_ERR (vaddr ))
@@ -1127,6 +1335,7 @@ relocate_entry(struct i915_vma *vma,
11271335 goto repeat ;
11281336 }
11291337
1338+ out :
11301339 return target -> node .start | UPDATE ;
11311340}
11321341
@@ -1189,7 +1398,8 @@ eb_relocate_entry(struct i915_execbuffer *eb,
11891398 * If the relocation already has the right value in it, no
11901399 * more work needs to be done.
11911400 */
1192- if (gen8_canonical_addr (target -> node .start ) == reloc -> presumed_offset )
1401+ if (!DBG_FORCE_RELOC &&
1402+ gen8_canonical_addr (target -> node .start ) == reloc -> presumed_offset )
11931403 return 0 ;
11941404
11951405 /* Check that the relocation address is valid... */
@@ -1915,7 +2125,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
19152125 eb .i915 = to_i915 (dev );
19162126 eb .file = file ;
19172127 eb .args = args ;
1918- if (!(args -> flags & I915_EXEC_NO_RELOC ))
2128+ if (DBG_FORCE_RELOC || !(args -> flags & I915_EXEC_NO_RELOC ))
19192129 args -> flags |= __EXEC_HAS_RELOC ;
19202130 eb .exec = exec ;
19212131 eb .ctx = NULL ;
@@ -2068,6 +2278,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
20682278 eb .batch = vma ;
20692279 }
20702280
2281+ /* All GPU relocation batches must be submitted prior to the user rq */
2282+ GEM_BUG_ON (eb .reloc_cache .rq );
2283+
20712284 /* Allocate a request for this batch buffer nice and early. */
20722285 eb .request = i915_gem_request_alloc (eb .engine , eb .ctx );
20732286 if (IS_ERR (eb .request )) {
0 commit comments