1010
1111#define pr_fmt (fmt ) "CPPC Cpufreq:" fmt
1212
13+ #include <linux/arch_topology.h>
1314#include <linux/kernel.h>
1415#include <linux/module.h>
1516#include <linux/delay.h>
1617#include <linux/cpu.h>
1718#include <linux/cpufreq.h>
1819#include <linux/dmi.h>
20+ #include <linux/irq_work.h>
21+ #include <linux/kthread.h>
1922#include <linux/time.h>
2023#include <linux/vmalloc.h>
24+ #include <uapi/linux/sched/types.h>
2125
2226#include <asm/unaligned.h>
2327
@@ -57,6 +61,216 @@ static struct cppc_workaround_oem_info wa_info[] = {
5761 }
5862};
5963
64+ #ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE
65+
66+ /* Frequency invariance support */
67+ struct cppc_freq_invariance {
68+ int cpu ;
69+ struct irq_work irq_work ;
70+ struct kthread_work work ;
71+ struct cppc_perf_fb_ctrs prev_perf_fb_ctrs ;
72+ struct cppc_cpudata * cpu_data ;
73+ };
74+
75+ static DEFINE_PER_CPU (struct cppc_freq_invariance , cppc_freq_inv ) ;
76+ static struct kthread_worker * kworker_fie ;
77+
78+ static struct cpufreq_driver cppc_cpufreq_driver ;
79+ static unsigned int hisi_cppc_cpufreq_get_rate (unsigned int cpu );
80+ static int cppc_perf_from_fbctrs (struct cppc_cpudata * cpu_data ,
81+ struct cppc_perf_fb_ctrs * fb_ctrs_t0 ,
82+ struct cppc_perf_fb_ctrs * fb_ctrs_t1 );
83+
84+ /**
85+ * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance
86+ * @work: The work item.
87+ *
88+ * The CPPC driver register itself with the topology core to provide its own
89+ * implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which
90+ * gets called by the scheduler on every tick.
91+ *
92+ * Note that the arch specific counters have higher priority than CPPC counters,
93+ * if available, though the CPPC driver doesn't need to have any special
94+ * handling for that.
95+ *
96+ * On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we
97+ * reach here from hard-irq context), which then schedules a normal work item
98+ * and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable
99+ * based on the counter updates since the last tick.
100+ */
101+ static void cppc_scale_freq_workfn (struct kthread_work * work )
102+ {
103+ struct cppc_freq_invariance * cppc_fi ;
104+ struct cppc_perf_fb_ctrs fb_ctrs = {0 };
105+ struct cppc_cpudata * cpu_data ;
106+ unsigned long local_freq_scale ;
107+ u64 perf ;
108+
109+ cppc_fi = container_of (work , struct cppc_freq_invariance , work );
110+ cpu_data = cppc_fi -> cpu_data ;
111+
112+ if (cppc_get_perf_ctrs (cppc_fi -> cpu , & fb_ctrs )) {
113+ pr_warn ("%s: failed to read perf counters\n" , __func__ );
114+ return ;
115+ }
116+
117+ perf = cppc_perf_from_fbctrs (cpu_data , & cppc_fi -> prev_perf_fb_ctrs ,
118+ & fb_ctrs );
119+ cppc_fi -> prev_perf_fb_ctrs = fb_ctrs ;
120+
121+ perf <<= SCHED_CAPACITY_SHIFT ;
122+ local_freq_scale = div64_u64 (perf , cpu_data -> perf_caps .highest_perf );
123+
124+ /* This can happen due to counter's overflow */
125+ if (unlikely (local_freq_scale > 1024 ))
126+ local_freq_scale = 1024 ;
127+
128+ per_cpu (arch_freq_scale , cppc_fi -> cpu ) = local_freq_scale ;
129+ }
130+
131+ static void cppc_irq_work (struct irq_work * irq_work )
132+ {
133+ struct cppc_freq_invariance * cppc_fi ;
134+
135+ cppc_fi = container_of (irq_work , struct cppc_freq_invariance , irq_work );
136+ kthread_queue_work (kworker_fie , & cppc_fi -> work );
137+ }
138+
139+ static void cppc_scale_freq_tick (void )
140+ {
141+ struct cppc_freq_invariance * cppc_fi = & per_cpu (cppc_freq_inv , smp_processor_id ());
142+
143+ /*
144+ * cppc_get_perf_ctrs() can potentially sleep, call that from the right
145+ * context.
146+ */
147+ irq_work_queue (& cppc_fi -> irq_work );
148+ }
149+
150+ static struct scale_freq_data cppc_sftd = {
151+ .source = SCALE_FREQ_SOURCE_CPPC ,
152+ .set_freq_scale = cppc_scale_freq_tick ,
153+ };
154+
155+ static void cppc_cpufreq_cpu_fie_init (struct cpufreq_policy * policy )
156+ {
157+ struct cppc_freq_invariance * cppc_fi ;
158+ int cpu , ret ;
159+
160+ if (cppc_cpufreq_driver .get == hisi_cppc_cpufreq_get_rate )
161+ return ;
162+
163+ for_each_cpu (cpu , policy -> cpus ) {
164+ cppc_fi = & per_cpu (cppc_freq_inv , cpu );
165+ cppc_fi -> cpu = cpu ;
166+ cppc_fi -> cpu_data = policy -> driver_data ;
167+ kthread_init_work (& cppc_fi -> work , cppc_scale_freq_workfn );
168+ init_irq_work (& cppc_fi -> irq_work , cppc_irq_work );
169+
170+ ret = cppc_get_perf_ctrs (cpu , & cppc_fi -> prev_perf_fb_ctrs );
171+ if (ret ) {
172+ pr_warn ("%s: failed to read perf counters for cpu:%d: %d\n" ,
173+ __func__ , cpu , ret );
174+
175+ /*
176+ * Don't abort if the CPU was offline while the driver
177+ * was getting registered.
178+ */
179+ if (cpu_online (cpu ))
180+ return ;
181+ }
182+ }
183+
184+ /* Register for freq-invariance */
185+ topology_set_scale_freq_source (& cppc_sftd , policy -> cpus );
186+ }
187+
188+ /*
189+ * We free all the resources on policy's removal and not on CPU removal as the
190+ * irq-work are per-cpu and the hotplug core takes care of flushing the pending
191+ * irq-works (hint: smpcfd_dying_cpu()) on CPU hotplug. Even if the kthread-work
192+ * fires on another CPU after the concerned CPU is removed, it won't harm.
193+ *
194+ * We just need to make sure to remove them all on policy->exit().
195+ */
196+ static void cppc_cpufreq_cpu_fie_exit (struct cpufreq_policy * policy )
197+ {
198+ struct cppc_freq_invariance * cppc_fi ;
199+ int cpu ;
200+
201+ if (cppc_cpufreq_driver .get == hisi_cppc_cpufreq_get_rate )
202+ return ;
203+
204+ /* policy->cpus will be empty here, use related_cpus instead */
205+ topology_clear_scale_freq_source (SCALE_FREQ_SOURCE_CPPC , policy -> related_cpus );
206+
207+ for_each_cpu (cpu , policy -> related_cpus ) {
208+ cppc_fi = & per_cpu (cppc_freq_inv , cpu );
209+ irq_work_sync (& cppc_fi -> irq_work );
210+ kthread_cancel_work_sync (& cppc_fi -> work );
211+ }
212+ }
213+
214+ static void __init cppc_freq_invariance_init (void )
215+ {
216+ struct sched_attr attr = {
217+ .size = sizeof (struct sched_attr ),
218+ .sched_policy = SCHED_DEADLINE ,
219+ .sched_nice = 0 ,
220+ .sched_priority = 0 ,
221+ /*
222+ * Fake (unused) bandwidth; workaround to "fix"
223+ * priority inheritance.
224+ */
225+ .sched_runtime = 1000000 ,
226+ .sched_deadline = 10000000 ,
227+ .sched_period = 10000000 ,
228+ };
229+ int ret ;
230+
231+ if (cppc_cpufreq_driver .get == hisi_cppc_cpufreq_get_rate )
232+ return ;
233+
234+ kworker_fie = kthread_create_worker (0 , "cppc_fie" );
235+ if (IS_ERR (kworker_fie ))
236+ return ;
237+
238+ ret = sched_setattr_nocheck (kworker_fie -> task , & attr );
239+ if (ret ) {
240+ pr_warn ("%s: failed to set SCHED_DEADLINE: %d\n" , __func__ ,
241+ ret );
242+ kthread_destroy_worker (kworker_fie );
243+ return ;
244+ }
245+ }
246+
247+ static void cppc_freq_invariance_exit (void )
248+ {
249+ if (cppc_cpufreq_driver .get == hisi_cppc_cpufreq_get_rate )
250+ return ;
251+
252+ kthread_destroy_worker (kworker_fie );
253+ kworker_fie = NULL ;
254+ }
255+
256+ #else
257+ static inline void cppc_cpufreq_cpu_fie_init (struct cpufreq_policy * policy )
258+ {
259+ }
260+
261+ static inline void cppc_cpufreq_cpu_fie_exit (struct cpufreq_policy * policy )
262+ {
263+ }
264+
265+ static inline void cppc_freq_invariance_init (void )
266+ {
267+ }
268+
269+ static inline void cppc_freq_invariance_exit (void )
270+ {
271+ }
272+ #endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */
273+
60274/* Callback function used to retrieve the max frequency from DMI */
61275static void cppc_find_dmi_mhz (const struct dmi_header * dm , void * private )
62276{
@@ -341,6 +555,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
341555 goto out ;
342556 }
343557
558+ cppc_cpufreq_cpu_fie_init (policy );
344559 return 0 ;
345560
346561out :
@@ -355,6 +570,8 @@ static int cppc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
355570 unsigned int cpu = policy -> cpu ;
356571 int ret ;
357572
573+ cppc_cpufreq_cpu_fie_exit (policy );
574+
358575 cpu_data -> perf_ctrls .desired_perf = caps -> lowest_perf ;
359576
360577 ret = cppc_set_perf (cpu , & cpu_data -> perf_ctrls );
@@ -374,12 +591,12 @@ static inline u64 get_delta(u64 t1, u64 t0)
374591 return (u32 )t1 - (u32 )t0 ;
375592}
376593
377- static int cppc_get_rate_from_fbctrs (struct cppc_cpudata * cpu_data ,
378- struct cppc_perf_fb_ctrs * fb_ctrs_t0 ,
379- struct cppc_perf_fb_ctrs * fb_ctrs_t1 )
594+ static int cppc_perf_from_fbctrs (struct cppc_cpudata * cpu_data ,
595+ struct cppc_perf_fb_ctrs * fb_ctrs_t0 ,
596+ struct cppc_perf_fb_ctrs * fb_ctrs_t1 )
380597{
381598 u64 delta_reference , delta_delivered ;
382- u64 reference_perf , delivered_perf ;
599+ u64 reference_perf ;
383600
384601 reference_perf = fb_ctrs_t0 -> reference_perf ;
385602
@@ -388,21 +605,19 @@ static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
388605 delta_delivered = get_delta (fb_ctrs_t1 -> delivered ,
389606 fb_ctrs_t0 -> delivered );
390607
391- /* Check to avoid divide-by zero */
392- if (delta_reference || delta_delivered )
393- delivered_perf = (reference_perf * delta_delivered ) /
394- delta_reference ;
395- else
396- delivered_perf = cpu_data -> perf_ctrls .desired_perf ;
608+ /* Check to avoid divide-by zero and invalid delivered_perf */
609+ if (!delta_reference || !delta_delivered )
610+ return cpu_data -> perf_ctrls .desired_perf ;
397611
398- return cppc_cpufreq_perf_to_khz ( cpu_data , delivered_perf ) ;
612+ return ( reference_perf * delta_delivered ) / delta_reference ;
399613}
400614
401615static unsigned int cppc_cpufreq_get_rate (unsigned int cpu )
402616{
403617 struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0 }, fb_ctrs_t1 = {0 };
404618 struct cpufreq_policy * policy = cpufreq_cpu_get (cpu );
405619 struct cppc_cpudata * cpu_data = policy -> driver_data ;
620+ u64 delivered_perf ;
406621 int ret ;
407622
408623 cpufreq_cpu_put (policy );
@@ -417,7 +632,10 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
417632 if (ret )
418633 return ret ;
419634
420- return cppc_get_rate_from_fbctrs (cpu_data , & fb_ctrs_t0 , & fb_ctrs_t1 );
635+ delivered_perf = cppc_perf_from_fbctrs (cpu_data , & fb_ctrs_t0 ,
636+ & fb_ctrs_t1 );
637+
638+ return cppc_cpufreq_perf_to_khz (cpu_data , delivered_perf );
421639}
422640
423641static int cppc_cpufreq_set_boost (struct cpufreq_policy * policy , int state )
@@ -518,14 +736,21 @@ static void cppc_check_hisi_workaround(void)
518736
519737static int __init cppc_cpufreq_init (void )
520738{
739+ int ret ;
740+
521741 if ((acpi_disabled ) || !acpi_cpc_valid ())
522742 return - ENODEV ;
523743
524744 INIT_LIST_HEAD (& cpu_data_list );
525745
526746 cppc_check_hisi_workaround ();
747+ cppc_freq_invariance_init ();
527748
528- return cpufreq_register_driver (& cppc_cpufreq_driver );
749+ ret = cpufreq_register_driver (& cppc_cpufreq_driver );
750+ if (ret )
751+ cppc_freq_invariance_exit ();
752+
753+ return ret ;
529754}
530755
531756static inline void free_cpu_data (void )
@@ -543,6 +768,7 @@ static inline void free_cpu_data(void)
543768static void __exit cppc_cpufreq_exit (void )
544769{
545770 cpufreq_unregister_driver (& cppc_cpufreq_driver );
771+ cppc_freq_invariance_exit ();
546772
547773 free_cpu_data ();
548774}
0 commit comments