Skip to content

Commit 01570b4

Browse files
committed
drm/xe/bmg: implement Wa_16023588340
This involves enabling l2 caching of host side memory access to VRAM through the CPU BAR. The main fallout here is with display since VRAM writes from CPU can now be cached in GPU l2, and display is never coherent with caches, so needs various manual flushing. In the case of fbc we disable it due to complications in getting this to work correctly (in a later patch). Signed-off-by: Matthew Auld <[email protected]> Cc: Jonathan Cavitt <[email protected]> Cc: Matt Roper <[email protected]> Cc: Lucas De Marchi <[email protected]> Cc: Vinod Govindapillai <[email protected]> Reviewed-by: Jonathan Cavitt <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 3078d9c commit 01570b4

File tree

9 files changed

+117
-1
lines changed

9 files changed

+117
-1
lines changed

drivers/gpu/drm/xe/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,14 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
2525

2626
uses_generated_oob := \
2727
$(obj)/xe_ggtt.o \
28+
$(obj)/xe_device.o \
2829
$(obj)/xe_gsc.o \
2930
$(obj)/xe_gt.o \
3031
$(obj)/xe_guc.o \
3132
$(obj)/xe_guc_ads.o \
3233
$(obj)/xe_guc_pc.o \
3334
$(obj)/xe_migrate.o \
35+
$(obj)/xe_pat.o \
3436
$(obj)/xe_ring_ops.o \
3537
$(obj)/xe_vm.o \
3638
$(obj)/xe_wa.o \

drivers/gpu/drm/xe/display/xe_dsb_buffer.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#include "intel_display_types.h"
88
#include "intel_dsb_buffer.h"
99
#include "xe_bo.h"
10+
#include "xe_device.h"
11+
#include "xe_device_types.h"
1012
#include "xe_gt.h"
1113

1214
u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
@@ -16,7 +18,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
1618

1719
void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val)
1820
{
21+
struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
22+
1923
iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
24+
xe_device_l2_flush(xe);
2025
}
2126

2227
u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
@@ -26,9 +31,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
2631

2732
void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size)
2833
{
34+
struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
35+
2936
WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf));
3037

3138
iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size);
39+
xe_device_l2_flush(xe);
3240
}
3341

3442
bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)

drivers/gpu/drm/xe/display/xe_fb_pin.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "intel_fb.h"
1111
#include "intel_fb_pin.h"
1212
#include "xe_bo.h"
13+
#include "xe_device.h"
1314
#include "xe_ggtt.h"
1415
#include "xe_gt.h"
1516
#include "xe_pm.h"
@@ -304,6 +305,8 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
304305
if (ret)
305306
goto err_unpin;
306307

308+
/* Ensure DPT writes are flushed */
309+
xe_device_l2_flush(xe);
307310
return vma;
308311

309312
err_unpin:

drivers/gpu/drm/xe/regs/xe_gt_regs.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@
8080
#define LE_CACHEABILITY_MASK REG_GENMASK(1, 0)
8181
#define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value)
8282

83+
#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194)
84+
#define CG_DIS_CNTLBUS REG_BIT(6)
85+
8386
#define CCS_AUX_INV XE_REG(0x4208)
8487

8588
#define VD0_AUX_INV XE_REG(0x4218)
@@ -374,6 +377,11 @@
374377

375378
#define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8)
376379

380+
#define XE2_GLOBAL_INVAL XE_REG(0xb404)
381+
382+
#define SCRATCH1LPFC XE_REG(0xb474)
383+
#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0)
384+
377385
#define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658)
378386

379387
#define XE2_TDF_CTRL XE_REG(0xb418)

drivers/gpu/drm/xe/xe_device.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@
5454
#include "xe_vm.h"
5555
#include "xe_vram.h"
5656
#include "xe_wait_user_fence.h"
57+
#include "xe_wa.h"
58+
59+
#include <generated/xe_wa_oob.h>
5760

5861
static int xe_file_open(struct drm_device *dev, struct drm_file *file)
5962
{
@@ -788,6 +791,11 @@ void xe_device_td_flush(struct xe_device *xe)
788791
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
789792
return;
790793

794+
if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
795+
xe_device_l2_flush(xe);
796+
return;
797+
}
798+
791799
for_each_gt(gt, xe, id) {
792800
if (xe_gt_is_media_type(gt))
793801
continue;
@@ -811,6 +819,28 @@ void xe_device_td_flush(struct xe_device *xe)
811819
}
812820
}
813821

822+
void xe_device_l2_flush(struct xe_device *xe)
823+
{
824+
struct xe_gt *gt;
825+
int err;
826+
827+
gt = xe_root_mmio_gt(xe);
828+
829+
if (!XE_WA(gt, 16023588340))
830+
return;
831+
832+
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
833+
if (err)
834+
return;
835+
836+
xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
837+
838+
if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true))
839+
xe_gt_err_once(gt, "Global invalidation timeout\n");
840+
841+
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
842+
}
843+
814844
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
815845
{
816846
return xe_device_has_flat_ccs(xe) ?

drivers/gpu/drm/xe/xe_device.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address);
162162
u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address);
163163

164164
void xe_device_td_flush(struct xe_device *xe);
165+
void xe_device_l2_flush(struct xe_device *xe);
165166

166167
static inline bool xe_device_wedged(struct xe_device *xe)
167168
{

drivers/gpu/drm/xe/xe_gt.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#include <drm/xe_drm.h>
1212
#include <generated/xe_wa_oob.h>
1313

14+
#include <generated/xe_wa_oob.h>
15+
1416
#include "instructions/xe_gfxpipe_commands.h"
1517
#include "instructions/xe_mi_commands.h"
1618
#include "regs/xe_gt_regs.h"
@@ -95,6 +97,51 @@ void xe_gt_sanitize(struct xe_gt *gt)
9597
gt->uc.guc.submission_state.enabled = false;
9698
}
9799

100+
static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
101+
{
102+
u32 reg;
103+
int err;
104+
105+
if (!XE_WA(gt, 16023588340))
106+
return;
107+
108+
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
109+
if (WARN_ON(err))
110+
return;
111+
112+
if (!xe_gt_is_media_type(gt)) {
113+
xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH);
114+
reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
115+
reg |= CG_DIS_CNTLBUS;
116+
xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
117+
}
118+
119+
xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
120+
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
121+
}
122+
123+
static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
124+
{
125+
u32 reg;
126+
int err;
127+
128+
if (!XE_WA(gt, 16023588340))
129+
return;
130+
131+
if (xe_gt_is_media_type(gt))
132+
return;
133+
134+
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
135+
if (WARN_ON(err))
136+
return;
137+
138+
reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
139+
reg &= ~CG_DIS_CNTLBUS;
140+
xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
141+
142+
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
143+
}
144+
98145
/**
99146
* xe_gt_remove() - Clean up the GT structures before driver removal
100147
* @gt: the GT object
@@ -111,6 +158,8 @@ void xe_gt_remove(struct xe_gt *gt)
111158

112159
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
113160
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
161+
162+
xe_gt_disable_host_l2_vram(gt);
114163
}
115164

116165
static void gt_reset_worker(struct work_struct *w);
@@ -508,6 +557,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
508557

509558
xe_gt_mcr_init_early(gt);
510559
xe_pat_init(gt);
560+
xe_gt_enable_host_l2_vram(gt);
511561

512562
err = xe_uc_init(&gt->uc);
513563
if (err)
@@ -643,6 +693,8 @@ static int do_gt_restart(struct xe_gt *gt)
643693

644694
xe_pat_init(gt);
645695

696+
xe_gt_enable_host_l2_vram(gt);
697+
646698
xe_gt_mcr_set_implicit_defaults(gt);
647699
xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
648700

@@ -796,6 +848,8 @@ int xe_gt_suspend(struct xe_gt *gt)
796848

797849
xe_gt_idle_disable_pg(gt);
798850

851+
xe_gt_disable_host_l2_vram(gt);
852+
799853
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
800854
xe_gt_dbg(gt, "suspended\n");
801855

drivers/gpu/drm/xe/xe_pat.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
#include <drm/xe_drm.h>
99

10+
#include <generated/xe_wa_oob.h>
11+
1012
#include "regs/xe_reg_defs.h"
1113
#include "xe_assert.h"
1214
#include "xe_device.h"
@@ -15,6 +17,7 @@
1517
#include "xe_gt_mcr.h"
1618
#include "xe_mmio.h"
1719
#include "xe_sriov.h"
20+
#include "xe_wa.h"
1821

1922
#define _PAT_ATS 0x47fc
2023
#define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \
@@ -382,7 +385,13 @@ void xe_pat_init_early(struct xe_device *xe)
382385
if (GRAPHICS_VER(xe) == 20) {
383386
xe->pat.ops = &xe2_pat_ops;
384387
xe->pat.table = xe2_pat_table;
385-
xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
388+
389+
/* Wa_16023588340. XXX: Should use XE_WA */
390+
if (GRAPHICS_VERx100(xe) == 2001)
391+
xe->pat.n_entries = 28; /* Disable CLOS3 */
392+
else
393+
xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
394+
386395
xe->pat.idx[XE_CACHE_NONE] = 3;
387396
xe->pat.idx[XE_CACHE_WT] = 15;
388397
xe->pat.idx[XE_CACHE_WB] = 2;

drivers/gpu/drm/xe/xe_wa_oob.rules

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,4 @@
2929
13011645652 GRAPHICS_VERSION(2004)
3030
22019338487 MEDIA_VERSION(2000)
3131
GRAPHICS_VERSION(2001)
32+
16023588340 GRAPHICS_VERSION(2001)

0 commit comments

Comments
 (0)