Skip to content

Commit 6694482

Browse files
diandersrobclark
authored andcommitted
drm/msm: Avoid unclocked GMU register access in 6xx gpu_busy
From testing on sc7180-trogdor devices, reading the GMU registers needs the GMU clocks to be enabled. Those clocks get turned on in a6xx_gmu_resume(). Confusingly enough, that function is called as a result of the runtime_pm of the GPU "struct device", not the GMU "struct device". Unfortunately the current a6xx_gpu_busy() grabs a reference to the GMU's "struct device". The fact that we were grabbing the wrong reference was easily seen to cause crashes that happen if we change the GPU's pm_runtime usage to not use autosuspend. It's also believed to cause some long tail GPU crashes even with autosuspend. We could look at changing it so that we do pm_runtime_get_if_in_use() on the GPU's "struct device", but then we run into a different problem. pm_runtime_get_if_in_use() will return 0 for the GPU's "struct device" the whole time when we're in the "autosuspend delay". That is, when we drop the last reference to the GPU but we're waiting a period before actually suspending then we'll think the GPU is off. One reason that's bad is that if the GPU didn't actually turn off then the cycle counter doesn't lose state and that throws off all of our calculations. Let's change the code to keep track of the suspend state of devfreq. msm_devfreq_suspend() is always called before we actually suspend the GPU and msm_devfreq_resume() after we resume it. This means we can use the suspended state to know if we're powered or not. NOTE: one might wonder when exactly our status function is called when devfreq is supposed to be disabled. The stack crawl I captured was: msm_devfreq_get_dev_status devfreq_simple_ondemand_func devfreq_update_target qos_notifier_call qos_max_notifier_call blocking_notifier_call_chain pm_qos_update_target freq_qos_apply apply_constraint __dev_pm_qos_update_request dev_pm_qos_update_request msm_devfreq_idle_work Fixes: eadf792 ("drm/msm: Check for powered down HW in the devfreq callbacks") Signed-off-by: Douglas Anderson <[email protected]> Reviewed-by: Rob Clark <[email protected]> Patchwork: https://patchwork.freedesktop.org/patch/489124/ Link: https://lore.kernel.org/r/20220610124639.v4.1.Ie846c5352bc307ee4248d7cab998ab3016b85d06@changeid Signed-off-by: Rob Clark <[email protected]>
1 parent 1ff1da4 commit 6694482

File tree

6 files changed

+53
-33
lines changed

6 files changed

+53
-33
lines changed

drivers/gpu/drm/msm/adreno/a5xx_gpu.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1666,18 +1666,10 @@ static u64 a5xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
16661666
{
16671667
u64 busy_cycles;
16681668

1669-
/* Only read the gpu busy if the hardware is already active */
1670-
if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0) {
1671-
*out_sample_rate = 1;
1672-
return 0;
1673-
}
1674-
16751669
busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
16761670
REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
16771671
*out_sample_rate = clk_get_rate(gpu->core_clk);
16781672

1679-
pm_runtime_put(&gpu->pdev->dev);
1680-
16811673
return busy_cycles;
16821674
}
16831675

drivers/gpu/drm/msm/adreno/a6xx_gmu.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
102102
A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF));
103103
}
104104

105-
void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp)
105+
void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
106+
bool suspended)
106107
{
107108
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
108109
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
@@ -127,15 +128,16 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp)
127128

128129
/*
129130
* This can get called from devfreq while the hardware is idle. Don't
130-
* bring up the power if it isn't already active
131+
* bring up the power if it isn't already active. All we're doing here
132+
* is updating the frequency so that when we come back online we're at
133+
* the right rate.
131134
*/
132-
if (pm_runtime_get_if_in_use(gmu->dev) == 0)
135+
if (suspended)
133136
return;
134137

135138
if (!gmu->legacy) {
136139
a6xx_hfi_set_freq(gmu, perf_index);
137140
dev_pm_opp_set_opp(&gpu->pdev->dev, opp);
138-
pm_runtime_put(gmu->dev);
139141
return;
140142
}
141143

@@ -159,7 +161,6 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp)
159161
dev_err(gmu->dev, "GMU set GPU frequency error: %d\n", ret);
160162

161163
dev_pm_opp_set_opp(&gpu->pdev->dev, opp);
162-
pm_runtime_put(gmu->dev);
163164
}
164165

165166
unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
@@ -895,7 +896,7 @@ static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu)
895896
return;
896897

897898
gmu->freq = 0; /* so a6xx_gmu_set_freq() doesn't exit early */
898-
a6xx_gmu_set_freq(gpu, gpu_opp);
899+
a6xx_gmu_set_freq(gpu, gpu_opp, false);
899900
dev_pm_opp_put(gpu_opp);
900901
}
901902

drivers/gpu/drm/msm/adreno/a6xx_gpu.c

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1658,27 +1658,21 @@ static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
16581658
/* 19.2MHz */
16591659
*out_sample_rate = 19200000;
16601660

1661-
/* Only read the gpu busy if the hardware is already active */
1662-
if (pm_runtime_get_if_in_use(a6xx_gpu->gmu.dev) == 0)
1663-
return 0;
1664-
16651661
busy_cycles = gmu_read64(&a6xx_gpu->gmu,
16661662
REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
16671663
REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
16681664

1669-
1670-
pm_runtime_put(a6xx_gpu->gmu.dev);
1671-
16721665
return busy_cycles;
16731666
}
16741667

1675-
static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp)
1668+
static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
1669+
bool suspended)
16761670
{
16771671
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
16781672
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
16791673

16801674
mutex_lock(&a6xx_gpu->gmu.lock);
1681-
a6xx_gmu_set_freq(gpu, opp);
1675+
a6xx_gmu_set_freq(gpu, opp, suspended);
16821676
mutex_unlock(&a6xx_gpu->gmu.lock);
16831677
}
16841678

drivers/gpu/drm/msm/adreno/a6xx_gpu.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state);
7777
int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node);
7878
void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu);
7979

80-
void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp);
80+
void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
81+
bool suspended);
8182
unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu);
8283

8384
void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,

drivers/gpu/drm/msm/msm_gpu.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,14 @@ struct msm_gpu_funcs {
6464
/* for generation specific debugfs: */
6565
void (*debugfs_init)(struct msm_gpu *gpu, struct drm_minor *minor);
6666
#endif
67+
/* note: gpu_busy() can assume that we have been pm_resumed */
6768
u64 (*gpu_busy)(struct msm_gpu *gpu, unsigned long *out_sample_rate);
6869
struct msm_gpu_state *(*gpu_state_get)(struct msm_gpu *gpu);
6970
int (*gpu_state_put)(struct msm_gpu_state *state);
7071
unsigned long (*gpu_get_freq)(struct msm_gpu *gpu);
71-
void (*gpu_set_freq)(struct msm_gpu *gpu, struct dev_pm_opp *opp);
72+
/* note: gpu_set_freq() can assume that we have been pm_resumed */
73+
void (*gpu_set_freq)(struct msm_gpu *gpu, struct dev_pm_opp *opp,
74+
bool suspended);
7275
struct msm_gem_address_space *(*create_address_space)
7376
(struct msm_gpu *gpu, struct platform_device *pdev);
7477
struct msm_gem_address_space *(*create_private_address_space)
@@ -92,6 +95,9 @@ struct msm_gpu_devfreq {
9295
/** devfreq: devfreq instance */
9396
struct devfreq *devfreq;
9497

98+
/** lock: lock for "suspended", "busy_cycles", and "time" */
99+
struct mutex lock;
100+
95101
/**
96102
* idle_constraint:
97103
*
@@ -135,6 +141,9 @@ struct msm_gpu_devfreq {
135141
* elapsed
136142
*/
137143
struct msm_hrtimer_work boost_work;
144+
145+
/** suspended: tracks if we're suspended */
146+
bool suspended;
138147
};
139148

140149
struct msm_gpu {

drivers/gpu/drm/msm/msm_gpu_devfreq.c

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ static int msm_devfreq_target(struct device *dev, unsigned long *freq,
2020
u32 flags)
2121
{
2222
struct msm_gpu *gpu = dev_to_gpu(dev);
23+
struct msm_gpu_devfreq *df = &gpu->devfreq;
2324
struct dev_pm_opp *opp;
2425

2526
/*
@@ -32,10 +33,13 @@ static int msm_devfreq_target(struct device *dev, unsigned long *freq,
3233

3334
trace_msm_gpu_freq_change(dev_pm_opp_get_freq(opp));
3435

35-
if (gpu->funcs->gpu_set_freq)
36-
gpu->funcs->gpu_set_freq(gpu, opp);
37-
else
36+
if (gpu->funcs->gpu_set_freq) {
37+
mutex_lock(&df->lock);
38+
gpu->funcs->gpu_set_freq(gpu, opp, df->suspended);
39+
mutex_unlock(&df->lock);
40+
} else {
3841
clk_set_rate(gpu->core_clk, *freq);
42+
}
3943

4044
dev_pm_opp_put(opp);
4145

@@ -58,15 +62,24 @@ static void get_raw_dev_status(struct msm_gpu *gpu,
5862
unsigned long sample_rate;
5963
ktime_t time;
6064

65+
mutex_lock(&df->lock);
66+
6167
status->current_frequency = get_freq(gpu);
62-
busy_cycles = gpu->funcs->gpu_busy(gpu, &sample_rate);
6368
time = ktime_get();
64-
65-
busy_time = busy_cycles - df->busy_cycles;
6669
status->total_time = ktime_us_delta(time, df->time);
70+
df->time = time;
6771

72+
if (df->suspended) {
73+
mutex_unlock(&df->lock);
74+
status->busy_time = 0;
75+
return;
76+
}
77+
78+
busy_cycles = gpu->funcs->gpu_busy(gpu, &sample_rate);
79+
busy_time = busy_cycles - df->busy_cycles;
6880
df->busy_cycles = busy_cycles;
69-
df->time = time;
81+
82+
mutex_unlock(&df->lock);
7083

7184
busy_time *= USEC_PER_SEC;
7285
busy_time = div64_ul(busy_time, sample_rate);
@@ -175,6 +188,8 @@ void msm_devfreq_init(struct msm_gpu *gpu)
175188
if (!gpu->funcs->gpu_busy)
176189
return;
177190

191+
mutex_init(&df->lock);
192+
178193
dev_pm_qos_add_request(&gpu->pdev->dev, &df->idle_freq,
179194
DEV_PM_QOS_MAX_FREQUENCY,
180195
PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
@@ -244,12 +259,16 @@ void msm_devfreq_cleanup(struct msm_gpu *gpu)
244259
void msm_devfreq_resume(struct msm_gpu *gpu)
245260
{
246261
struct msm_gpu_devfreq *df = &gpu->devfreq;
262+
unsigned long sample_rate;
247263

248264
if (!has_devfreq(gpu))
249265
return;
250266

251-
df->busy_cycles = 0;
267+
mutex_lock(&df->lock);
268+
df->busy_cycles = gpu->funcs->gpu_busy(gpu, &sample_rate);
252269
df->time = ktime_get();
270+
df->suspended = false;
271+
mutex_unlock(&df->lock);
253272

254273
devfreq_resume_device(df->devfreq);
255274
}
@@ -261,6 +280,10 @@ void msm_devfreq_suspend(struct msm_gpu *gpu)
261280
if (!has_devfreq(gpu))
262281
return;
263282

283+
mutex_lock(&df->lock);
284+
df->suspended = true;
285+
mutex_unlock(&df->lock);
286+
264287
devfreq_suspend_device(df->devfreq);
265288

266289
cancel_idle_work(df);

0 commit comments

Comments
 (0)