GNU Linux-libre 5.10.215-gnu1
[releases.git] / drivers / gpu / drm / i915 / gt / selftest_rps.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
8
9 #include "intel_engine_heartbeat.h"
10 #include "intel_engine_pm.h"
11 #include "intel_gpu_commands.h"
12 #include "intel_gt_clock_utils.h"
13 #include "intel_gt_pm.h"
14 #include "intel_rc6.h"
15 #include "selftest_engine_heartbeat.h"
16 #include "selftest_rps.h"
17 #include "selftests/igt_flush_test.h"
18 #include "selftests/igt_spinner.h"
19 #include "selftests/librapl.h"
20
21 /* Try to isolate the impact of cstates from determing frequency response */
22 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
23
24 static void dummy_rps_work(struct work_struct *wrk)
25 {
26 }
27
28 static int cmp_u64(const void *A, const void *B)
29 {
30         const u64 *a = A, *b = B;
31
32         if (*a < *b)
33                 return -1;
34         else if (*a > *b)
35                 return 1;
36         else
37                 return 0;
38 }
39
40 static int cmp_u32(const void *A, const void *B)
41 {
42         const u32 *a = A, *b = B;
43
44         if (*a < *b)
45                 return -1;
46         else if (*a > *b)
47                 return 1;
48         else
49                 return 0;
50 }
51
52 static struct i915_vma *
53 create_spin_counter(struct intel_engine_cs *engine,
54                     struct i915_address_space *vm,
55                     bool srm,
56                     u32 **cancel,
57                     u32 **counter)
58 {
59         enum {
60                 COUNT,
61                 INC,
62                 __NGPR__,
63         };
64 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
65         struct drm_i915_gem_object *obj;
66         struct i915_vma *vma;
67         unsigned long end;
68         u32 *base, *cs;
69         int loop, i;
70         int err;
71
72         obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
73         if (IS_ERR(obj))
74                 return ERR_CAST(obj);
75
76         end = obj->base.size / sizeof(u32) - 1;
77
78         vma = i915_vma_instance(obj, vm, NULL);
79         if (IS_ERR(vma)) {
80                 err = PTR_ERR(vma);
81                 goto err_put;
82         }
83
84         err = i915_vma_pin(vma, 0, 0, PIN_USER);
85         if (err)
86                 goto err_unlock;
87
88         i915_vma_lock(vma);
89
90         base = i915_gem_object_pin_map(obj, I915_MAP_WC);
91         if (IS_ERR(base)) {
92                 err = PTR_ERR(base);
93                 goto err_unpin;
94         }
95         cs = base;
96
97         *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
98         for (i = 0; i < __NGPR__; i++) {
99                 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
100                 *cs++ = 0;
101                 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
102                 *cs++ = 0;
103         }
104
105         *cs++ = MI_LOAD_REGISTER_IMM(1);
106         *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
107         *cs++ = 1;
108
109         loop = cs - base;
110
111         /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
112         for (i = 0; i < 1024; i++) {
113                 *cs++ = MI_MATH(4);
114                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
115                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
116                 *cs++ = MI_MATH_ADD;
117                 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
118
119                 if (srm) {
120                         *cs++ = MI_STORE_REGISTER_MEM_GEN8;
121                         *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
122                         *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
123                         *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
124                 }
125         }
126
127         *cs++ = MI_BATCH_BUFFER_START_GEN8;
128         *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
129         *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
130         GEM_BUG_ON(cs - base > end);
131
132         i915_gem_object_flush_map(obj);
133
134         *cancel = base + loop;
135         *counter = srm ? memset32(base + end, 0, 1) : NULL;
136         return vma;
137
138 err_unpin:
139         i915_vma_unpin(vma);
140 err_unlock:
141         i915_vma_unlock(vma);
142 err_put:
143         i915_gem_object_put(obj);
144         return ERR_PTR(err);
145 }
146
147 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
148 {
149         u8 history[64], i;
150         unsigned long end;
151         int sleep;
152
153         i = 0;
154         memset(history, freq, sizeof(history));
155         sleep = 20;
156
157         /* The PCU does not change instantly, but drifts towards the goal? */
158         end = jiffies + msecs_to_jiffies(timeout_ms);
159         do {
160                 u8 act;
161
162                 act = read_cagf(rps);
163                 if (time_after(jiffies, end))
164                         return act;
165
166                 /* Target acquired */
167                 if (act == freq)
168                         return act;
169
170                 /* Any change within the last N samples? */
171                 if (!memchr_inv(history, act, sizeof(history)))
172                         return act;
173
174                 history[i] = act;
175                 i = (i + 1) % ARRAY_SIZE(history);
176
177                 usleep_range(sleep, 2 * sleep);
178                 sleep *= 2;
179                 if (sleep > timeout_ms * 20)
180                         sleep = timeout_ms * 20;
181         } while (1);
182 }
183
184 static u8 rps_set_check(struct intel_rps *rps, u8 freq)
185 {
186         mutex_lock(&rps->lock);
187         GEM_BUG_ON(!intel_rps_is_active(rps));
188         intel_rps_set(rps, freq);
189         GEM_BUG_ON(rps->last_freq != freq);
190         mutex_unlock(&rps->lock);
191
192         return wait_for_freq(rps, freq, 50);
193 }
194
195 static void show_pstate_limits(struct intel_rps *rps)
196 {
197         struct drm_i915_private *i915 = rps_to_i915(rps);
198
199         if (IS_BROXTON(i915)) {
200                 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
201                         i915_mmio_reg_offset(BXT_RP_STATE_CAP),
202                         intel_uncore_read(rps_to_uncore(rps),
203                                           BXT_RP_STATE_CAP));
204         } else if (IS_GEN(i915, 9)) {
205                 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
206                         i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
207                         intel_uncore_read(rps_to_uncore(rps),
208                                           GEN9_RP_STATE_LIMITS));
209         }
210 }
211
212 int live_rps_clock_interval(void *arg)
213 {
214         struct intel_gt *gt = arg;
215         struct intel_rps *rps = &gt->rps;
216         void (*saved_work)(struct work_struct *wrk);
217         struct intel_engine_cs *engine;
218         enum intel_engine_id id;
219         struct igt_spinner spin;
220         int err = 0;
221
222         if (!intel_rps_is_enabled(rps))
223                 return 0;
224
225         if (igt_spinner_init(&spin, gt))
226                 return -ENOMEM;
227
228         intel_gt_pm_wait_for_idle(gt);
229         saved_work = rps->work.func;
230         rps->work.func = dummy_rps_work;
231
232         intel_gt_pm_get(gt);
233         intel_rps_disable(&gt->rps);
234
235         intel_gt_check_clock_frequency(gt);
236
237         for_each_engine(engine, gt, id) {
238                 struct i915_request *rq;
239                 u32 cycles;
240                 u64 dt;
241
242                 if (!intel_engine_can_store_dword(engine))
243                         continue;
244
245                 st_engine_heartbeat_disable(engine);
246
247                 rq = igt_spinner_create_request(&spin,
248                                                 engine->kernel_context,
249                                                 MI_NOOP);
250                 if (IS_ERR(rq)) {
251                         st_engine_heartbeat_enable(engine);
252                         err = PTR_ERR(rq);
253                         break;
254                 }
255
256                 i915_request_add(rq);
257
258                 if (!igt_wait_for_spinner(&spin, rq)) {
259                         pr_err("%s: RPS spinner did not start\n",
260                                engine->name);
261                         igt_spinner_end(&spin);
262                         st_engine_heartbeat_enable(engine);
263                         intel_gt_set_wedged(engine->gt);
264                         err = -EIO;
265                         break;
266                 }
267
268                 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
269
270                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
271
272                 /* Set the evaluation interval to infinity! */
273                 intel_uncore_write_fw(gt->uncore,
274                                       GEN6_RP_UP_EI, 0xffffffff);
275                 intel_uncore_write_fw(gt->uncore,
276                                       GEN6_RP_UP_THRESHOLD, 0xffffffff);
277
278                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
279                                       GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
280
281                 if (wait_for(intel_uncore_read_fw(gt->uncore,
282                                                   GEN6_RP_CUR_UP_EI),
283                              10)) {
284                         /* Just skip the test; assume lack of HW support */
285                         pr_notice("%s: rps evaluation interval not ticking\n",
286                                   engine->name);
287                         err = -ENODEV;
288                 } else {
289                         ktime_t dt_[5];
290                         u32 cycles_[5];
291                         int i;
292
293                         for (i = 0; i < 5; i++) {
294                                 preempt_disable();
295
296                                 dt_[i] = ktime_get();
297                                 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
298
299                                 udelay(1000);
300
301                                 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
302                                 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
303
304                                 preempt_enable();
305                         }
306
307                         /* Use the median of both cycle/dt; close enough */
308                         sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
309                         cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
310                         sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
311                         dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
312                 }
313
314                 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
315                 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
316
317                 igt_spinner_end(&spin);
318                 st_engine_heartbeat_enable(engine);
319
320                 if (err == 0) {
321                         u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
322                         u32 expected =
323                                 intel_gt_ns_to_pm_interval(gt, dt);
324
325                         pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
326                                 engine->name, cycles, time, dt, expected,
327                                 gt->clock_frequency / 1000);
328
329                         if (10 * time < 8 * dt ||
330                             8 * time > 10 * dt) {
331                                 pr_err("%s: rps clock time does not match walltime!\n",
332                                        engine->name);
333                                 err = -EINVAL;
334                         }
335
336                         if (10 * expected < 8 * cycles ||
337                             8 * expected > 10 * cycles) {
338                                 pr_err("%s: walltime does not match rps clock ticks!\n",
339                                        engine->name);
340                                 err = -EINVAL;
341                         }
342                 }
343
344                 if (igt_flush_test(gt->i915))
345                         err = -EIO;
346
347                 break; /* once is enough */
348         }
349
350         intel_rps_enable(&gt->rps);
351         intel_gt_pm_put(gt);
352
353         igt_spinner_fini(&spin);
354
355         intel_gt_pm_wait_for_idle(gt);
356         rps->work.func = saved_work;
357
358         if (err == -ENODEV) /* skipped, don't report a fail */
359                 err = 0;
360
361         return err;
362 }
363
364 int live_rps_control(void *arg)
365 {
366         struct intel_gt *gt = arg;
367         struct intel_rps *rps = &gt->rps;
368         void (*saved_work)(struct work_struct *wrk);
369         struct intel_engine_cs *engine;
370         enum intel_engine_id id;
371         struct igt_spinner spin;
372         int err = 0;
373
374         /*
375          * Check that the actual frequency matches our requested frequency,
376          * to verify our control mechanism. We have to be careful that the
377          * PCU may throttle the GPU in which case the actual frequency used
378          * will be lowered than requested.
379          */
380
381         if (!intel_rps_is_enabled(rps))
382                 return 0;
383
384         if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
385                 return 0;
386
387         if (igt_spinner_init(&spin, gt))
388                 return -ENOMEM;
389
390         intel_gt_pm_wait_for_idle(gt);
391         saved_work = rps->work.func;
392         rps->work.func = dummy_rps_work;
393
394         intel_gt_pm_get(gt);
395         for_each_engine(engine, gt, id) {
396                 struct i915_request *rq;
397                 ktime_t min_dt, max_dt;
398                 int f, limit;
399                 int min, max;
400
401                 if (!intel_engine_can_store_dword(engine))
402                         continue;
403
404                 st_engine_heartbeat_disable(engine);
405
406                 rq = igt_spinner_create_request(&spin,
407                                                 engine->kernel_context,
408                                                 MI_NOOP);
409                 if (IS_ERR(rq)) {
410                         err = PTR_ERR(rq);
411                         break;
412                 }
413
414                 i915_request_add(rq);
415
416                 if (!igt_wait_for_spinner(&spin, rq)) {
417                         pr_err("%s: RPS spinner did not start\n",
418                                engine->name);
419                         igt_spinner_end(&spin);
420                         st_engine_heartbeat_enable(engine);
421                         intel_gt_set_wedged(engine->gt);
422                         err = -EIO;
423                         break;
424                 }
425
426                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
427                         pr_err("%s: could not set minimum frequency [%x], only %x!\n",
428                                engine->name, rps->min_freq, read_cagf(rps));
429                         igt_spinner_end(&spin);
430                         st_engine_heartbeat_enable(engine);
431                         show_pstate_limits(rps);
432                         err = -EINVAL;
433                         break;
434                 }
435
436                 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
437                         if (rps_set_check(rps, f) < f)
438                                 break;
439                 }
440
441                 limit = rps_set_check(rps, f);
442
443                 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
444                         pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
445                                engine->name, rps->min_freq, read_cagf(rps));
446                         igt_spinner_end(&spin);
447                         st_engine_heartbeat_enable(engine);
448                         show_pstate_limits(rps);
449                         err = -EINVAL;
450                         break;
451                 }
452
453                 max_dt = ktime_get();
454                 max = rps_set_check(rps, limit);
455                 max_dt = ktime_sub(ktime_get(), max_dt);
456
457                 min_dt = ktime_get();
458                 min = rps_set_check(rps, rps->min_freq);
459                 min_dt = ktime_sub(ktime_get(), min_dt);
460
461                 igt_spinner_end(&spin);
462                 st_engine_heartbeat_enable(engine);
463
464                 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
465                         engine->name,
466                         rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
467                         rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
468                         limit, intel_gpu_freq(rps, limit),
469                         min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
470
471                 if (limit == rps->min_freq) {
472                         pr_err("%s: GPU throttled to minimum!\n",
473                                engine->name);
474                         show_pstate_limits(rps);
475                         err = -ENODEV;
476                         break;
477                 }
478
479                 if (igt_flush_test(gt->i915)) {
480                         err = -EIO;
481                         break;
482                 }
483         }
484         intel_gt_pm_put(gt);
485
486         igt_spinner_fini(&spin);
487
488         intel_gt_pm_wait_for_idle(gt);
489         rps->work.func = saved_work;
490
491         return err;
492 }
493
494 static void show_pcu_config(struct intel_rps *rps)
495 {
496         struct drm_i915_private *i915 = rps_to_i915(rps);
497         unsigned int max_gpu_freq, min_gpu_freq;
498         intel_wakeref_t wakeref;
499         int gpu_freq;
500
501         if (!HAS_LLC(i915))
502                 return;
503
504         min_gpu_freq = rps->min_freq;
505         max_gpu_freq = rps->max_freq;
506         if (INTEL_GEN(i915) >= 9) {
507                 /* Convert GT frequency to 50 HZ units */
508                 min_gpu_freq /= GEN9_FREQ_SCALER;
509                 max_gpu_freq /= GEN9_FREQ_SCALER;
510         }
511
512         wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
513
514         pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
515         for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
516                 int ia_freq = gpu_freq;
517
518                 sandybridge_pcode_read(i915,
519                                        GEN6_PCODE_READ_MIN_FREQ_TABLE,
520                                        &ia_freq, NULL);
521
522                 pr_info("%5d  %5d  %5d\n",
523                         gpu_freq * 50,
524                         ((ia_freq >> 0) & 0xff) * 100,
525                         ((ia_freq >> 8) & 0xff) * 100);
526         }
527
528         intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
529 }
530
531 static u64 __measure_frequency(u32 *cntr, int duration_ms)
532 {
533         u64 dc, dt;
534
535         dt = ktime_get();
536         dc = READ_ONCE(*cntr);
537         usleep_range(1000 * duration_ms, 2000 * duration_ms);
538         dc = READ_ONCE(*cntr) - dc;
539         dt = ktime_get() - dt;
540
541         return div64_u64(1000 * 1000 * dc, dt);
542 }
543
544 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
545 {
546         u64 x[5];
547         int i;
548
549         *freq = rps_set_check(rps, *freq);
550         for (i = 0; i < 5; i++)
551                 x[i] = __measure_frequency(cntr, 2);
552         *freq = (*freq + read_cagf(rps)) / 2;
553
554         /* A simple triangle filter for better result stability */
555         sort(x, 5, sizeof(*x), cmp_u64, NULL);
556         return div_u64(x[1] + 2 * x[2] + x[3], 4);
557 }
558
559 static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
560                                   int duration_ms)
561 {
562         u64 dc, dt;
563
564         dt = ktime_get();
565         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
566         usleep_range(1000 * duration_ms, 2000 * duration_ms);
567         dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
568         dt = ktime_get() - dt;
569
570         return div64_u64(1000 * 1000 * dc, dt);
571 }
572
573 static u64 measure_cs_frequency_at(struct intel_rps *rps,
574                                    struct intel_engine_cs *engine,
575                                    int *freq)
576 {
577         u64 x[5];
578         int i;
579
580         *freq = rps_set_check(rps, *freq);
581         for (i = 0; i < 5; i++)
582                 x[i] = __measure_cs_frequency(engine, 2);
583         *freq = (*freq + read_cagf(rps)) / 2;
584
585         /* A simple triangle filter for better result stability */
586         sort(x, 5, sizeof(*x), cmp_u64, NULL);
587         return div_u64(x[1] + 2 * x[2] + x[3], 4);
588 }
589
590 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
591 {
592         return f_d * x > f_n * y && f_n * x < f_d * y;
593 }
594
595 int live_rps_frequency_cs(void *arg)
596 {
597         void (*saved_work)(struct work_struct *wrk);
598         struct intel_gt *gt = arg;
599         struct intel_rps *rps = &gt->rps;
600         struct intel_engine_cs *engine;
601         struct pm_qos_request qos;
602         enum intel_engine_id id;
603         int err = 0;
604
605         /*
606          * The premise is that the GPU does change freqency at our behest.
607          * Let's check there is a correspondence between the requested
608          * frequency, the actual frequency, and the observed clock rate.
609          */
610
611         if (!intel_rps_is_enabled(rps))
612                 return 0;
613
614         if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
615                 return 0;
616
617         if (CPU_LATENCY >= 0)
618                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
619
620         intel_gt_pm_wait_for_idle(gt);
621         saved_work = rps->work.func;
622         rps->work.func = dummy_rps_work;
623
624         for_each_engine(engine, gt, id) {
625                 struct i915_request *rq;
626                 struct i915_vma *vma;
627                 u32 *cancel, *cntr;
628                 struct {
629                         u64 count;
630                         int freq;
631                 } min, max;
632
633                 st_engine_heartbeat_disable(engine);
634
635                 vma = create_spin_counter(engine,
636                                           engine->kernel_context->vm, false,
637                                           &cancel, &cntr);
638                 if (IS_ERR(vma)) {
639                         err = PTR_ERR(vma);
640                         st_engine_heartbeat_enable(engine);
641                         break;
642                 }
643
644                 rq = intel_engine_create_kernel_request(engine);
645                 if (IS_ERR(rq)) {
646                         err = PTR_ERR(rq);
647                         goto err_vma;
648                 }
649
650                 err = i915_request_await_object(rq, vma->obj, false);
651                 if (!err)
652                         err = i915_vma_move_to_active(vma, rq, 0);
653                 if (!err)
654                         err = rq->engine->emit_bb_start(rq,
655                                                         vma->node.start,
656                                                         PAGE_SIZE, 0);
657                 i915_request_add(rq);
658                 if (err)
659                         goto err_vma;
660
661                 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
662                              10)) {
663                         pr_err("%s: timed loop did not start\n",
664                                engine->name);
665                         goto err_vma;
666                 }
667
668                 min.freq = rps->min_freq;
669                 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
670
671                 max.freq = rps->max_freq;
672                 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
673
674                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
675                         engine->name,
676                         min.count, intel_gpu_freq(rps, min.freq),
677                         max.count, intel_gpu_freq(rps, max.freq),
678                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
679                                                      max.freq * min.count));
680
681                 if (!scaled_within(max.freq * min.count,
682                                    min.freq * max.count,
683                                    2, 3)) {
684                         int f;
685
686                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
687                                engine->name,
688                                max.freq * min.count,
689                                min.freq * max.count);
690                         show_pcu_config(rps);
691
692                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
693                                 int act = f;
694                                 u64 count;
695
696                                 count = measure_cs_frequency_at(rps, engine, &act);
697                                 if (act < f)
698                                         break;
699
700                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
701                                         engine->name,
702                                         act, intel_gpu_freq(rps, act), count,
703                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
704                                                                      act * min.count));
705
706                                 f = act; /* may skip ahead [pcu granularity] */
707                         }
708
709                         err = -EINTR; /* ignore error, continue on with test */
710                 }
711
712 err_vma:
713                 *cancel = MI_BATCH_BUFFER_END;
714                 i915_gem_object_flush_map(vma->obj);
715                 i915_gem_object_unpin_map(vma->obj);
716                 i915_vma_unpin(vma);
717                 i915_vma_unlock(vma);
718                 i915_vma_put(vma);
719
720                 st_engine_heartbeat_enable(engine);
721                 if (igt_flush_test(gt->i915))
722                         err = -EIO;
723                 if (err)
724                         break;
725         }
726
727         intel_gt_pm_wait_for_idle(gt);
728         rps->work.func = saved_work;
729
730         if (CPU_LATENCY >= 0)
731                 cpu_latency_qos_remove_request(&qos);
732
733         return err;
734 }
735
736 int live_rps_frequency_srm(void *arg)
737 {
738         void (*saved_work)(struct work_struct *wrk);
739         struct intel_gt *gt = arg;
740         struct intel_rps *rps = &gt->rps;
741         struct intel_engine_cs *engine;
742         struct pm_qos_request qos;
743         enum intel_engine_id id;
744         int err = 0;
745
746         /*
747          * The premise is that the GPU does change freqency at our behest.
748          * Let's check there is a correspondence between the requested
749          * frequency, the actual frequency, and the observed clock rate.
750          */
751
752         if (!intel_rps_is_enabled(rps))
753                 return 0;
754
755         if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
756                 return 0;
757
758         if (CPU_LATENCY >= 0)
759                 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
760
761         intel_gt_pm_wait_for_idle(gt);
762         saved_work = rps->work.func;
763         rps->work.func = dummy_rps_work;
764
765         for_each_engine(engine, gt, id) {
766                 struct i915_request *rq;
767                 struct i915_vma *vma;
768                 u32 *cancel, *cntr;
769                 struct {
770                         u64 count;
771                         int freq;
772                 } min, max;
773
774                 st_engine_heartbeat_disable(engine);
775
776                 vma = create_spin_counter(engine,
777                                           engine->kernel_context->vm, true,
778                                           &cancel, &cntr);
779                 if (IS_ERR(vma)) {
780                         err = PTR_ERR(vma);
781                         st_engine_heartbeat_enable(engine);
782                         break;
783                 }
784
785                 rq = intel_engine_create_kernel_request(engine);
786                 if (IS_ERR(rq)) {
787                         err = PTR_ERR(rq);
788                         goto err_vma;
789                 }
790
791                 err = i915_request_await_object(rq, vma->obj, false);
792                 if (!err)
793                         err = i915_vma_move_to_active(vma, rq, 0);
794                 if (!err)
795                         err = rq->engine->emit_bb_start(rq,
796                                                         vma->node.start,
797                                                         PAGE_SIZE, 0);
798                 i915_request_add(rq);
799                 if (err)
800                         goto err_vma;
801
802                 if (wait_for(READ_ONCE(*cntr), 10)) {
803                         pr_err("%s: timed loop did not start\n",
804                                engine->name);
805                         goto err_vma;
806                 }
807
808                 min.freq = rps->min_freq;
809                 min.count = measure_frequency_at(rps, cntr, &min.freq);
810
811                 max.freq = rps->max_freq;
812                 max.count = measure_frequency_at(rps, cntr, &max.freq);
813
814                 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
815                         engine->name,
816                         min.count, intel_gpu_freq(rps, min.freq),
817                         max.count, intel_gpu_freq(rps, max.freq),
818                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
819                                                      max.freq * min.count));
820
821                 if (!scaled_within(max.freq * min.count,
822                                    min.freq * max.count,
823                                    1, 2)) {
824                         int f;
825
826                         pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
827                                engine->name,
828                                max.freq * min.count,
829                                min.freq * max.count);
830                         show_pcu_config(rps);
831
832                         for (f = min.freq + 1; f <= rps->max_freq; f++) {
833                                 int act = f;
834                                 u64 count;
835
836                                 count = measure_frequency_at(rps, cntr, &act);
837                                 if (act < f)
838                                         break;
839
840                                 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
841                                         engine->name,
842                                         act, intel_gpu_freq(rps, act), count,
843                                         (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
844                                                                      act * min.count));
845
846                                 f = act; /* may skip ahead [pcu granularity] */
847                         }
848
849                         err = -EINTR; /* ignore error, continue on with test */
850                 }
851
852 err_vma:
853                 *cancel = MI_BATCH_BUFFER_END;
854                 i915_gem_object_flush_map(vma->obj);
855                 i915_gem_object_unpin_map(vma->obj);
856                 i915_vma_unpin(vma);
857                 i915_vma_unlock(vma);
858                 i915_vma_put(vma);
859
860                 st_engine_heartbeat_enable(engine);
861                 if (igt_flush_test(gt->i915))
862                         err = -EIO;
863                 if (err)
864                         break;
865         }
866
867         intel_gt_pm_wait_for_idle(gt);
868         rps->work.func = saved_work;
869
870         if (CPU_LATENCY >= 0)
871                 cpu_latency_qos_remove_request(&qos);
872
873         return err;
874 }
875
876 static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
877 {
878         /* Flush any previous EI */
879         usleep_range(timeout_us, 2 * timeout_us);
880
881         /* Reset the interrupt status */
882         rps_disable_interrupts(rps);
883         GEM_BUG_ON(rps->pm_iir);
884         rps_enable_interrupts(rps);
885
886         /* And then wait for the timeout, for real this time */
887         usleep_range(2 * timeout_us, 3 * timeout_us);
888 }
889
890 static int __rps_up_interrupt(struct intel_rps *rps,
891                               struct intel_engine_cs *engine,
892                               struct igt_spinner *spin)
893 {
894         struct intel_uncore *uncore = engine->uncore;
895         struct i915_request *rq;
896         u32 timeout;
897
898         if (!intel_engine_can_store_dword(engine))
899                 return 0;
900
901         rps_set_check(rps, rps->min_freq);
902
903         rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
904         if (IS_ERR(rq))
905                 return PTR_ERR(rq);
906
907         i915_request_get(rq);
908         i915_request_add(rq);
909
910         if (!igt_wait_for_spinner(spin, rq)) {
911                 pr_err("%s: RPS spinner did not start\n",
912                        engine->name);
913                 i915_request_put(rq);
914                 intel_gt_set_wedged(engine->gt);
915                 return -EIO;
916         }
917
918         if (!intel_rps_is_active(rps)) {
919                 pr_err("%s: RPS not enabled on starting spinner\n",
920                        engine->name);
921                 igt_spinner_end(spin);
922                 i915_request_put(rq);
923                 return -EINVAL;
924         }
925
926         if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
927                 pr_err("%s: RPS did not register UP interrupt\n",
928                        engine->name);
929                 i915_request_put(rq);
930                 return -EINVAL;
931         }
932
933         if (rps->last_freq != rps->min_freq) {
934                 pr_err("%s: RPS did not program min frequency\n",
935                        engine->name);
936                 i915_request_put(rq);
937                 return -EINVAL;
938         }
939
940         timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
941         timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
942         timeout = DIV_ROUND_UP(timeout, 1000);
943
944         sleep_for_ei(rps, timeout);
945         GEM_BUG_ON(i915_request_completed(rq));
946
947         igt_spinner_end(spin);
948         i915_request_put(rq);
949
950         if (rps->cur_freq != rps->min_freq) {
951                 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
952                        engine->name, intel_rps_read_actual_frequency(rps));
953                 return -EINVAL;
954         }
955
956         if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
957                 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
958                        engine->name, rps->pm_iir,
959                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
960                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
961                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
962                 return -EINVAL;
963         }
964
965         return 0;
966 }
967
968 static int __rps_down_interrupt(struct intel_rps *rps,
969                                 struct intel_engine_cs *engine)
970 {
971         struct intel_uncore *uncore = engine->uncore;
972         u32 timeout;
973
974         rps_set_check(rps, rps->max_freq);
975
976         if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
977                 pr_err("%s: RPS did not register DOWN interrupt\n",
978                        engine->name);
979                 return -EINVAL;
980         }
981
982         if (rps->last_freq != rps->max_freq) {
983                 pr_err("%s: RPS did not program max frequency\n",
984                        engine->name);
985                 return -EINVAL;
986         }
987
988         timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
989         timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
990         timeout = DIV_ROUND_UP(timeout, 1000);
991
992         sleep_for_ei(rps, timeout);
993
994         if (rps->cur_freq != rps->max_freq) {
995                 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
996                        engine->name,
997                        intel_rps_read_actual_frequency(rps));
998                 return -EINVAL;
999         }
1000
1001         if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1002                 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1003                        engine->name, rps->pm_iir,
1004                        intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1005                        intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1006                        intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1007                        intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1008                        intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1009                        intel_uncore_read(uncore, GEN6_RP_UP_EI));
1010                 return -EINVAL;
1011         }
1012
1013         return 0;
1014 }
1015
1016 int live_rps_interrupt(void *arg)
1017 {
1018         struct intel_gt *gt = arg;
1019         struct intel_rps *rps = &gt->rps;
1020         void (*saved_work)(struct work_struct *wrk);
1021         struct intel_engine_cs *engine;
1022         enum intel_engine_id id;
1023         struct igt_spinner spin;
1024         u32 pm_events;
1025         int err = 0;
1026
1027         /*
1028          * First, let's check whether or not we are receiving interrupts.
1029          */
1030
1031         if (!intel_rps_has_interrupts(rps))
1032                 return 0;
1033
1034         intel_gt_pm_get(gt);
1035         pm_events = rps->pm_events;
1036         intel_gt_pm_put(gt);
1037         if (!pm_events) {
1038                 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1039                 return -ENODEV;
1040         }
1041
1042         if (igt_spinner_init(&spin, gt))
1043                 return -ENOMEM;
1044
1045         intel_gt_pm_wait_for_idle(gt);
1046         saved_work = rps->work.func;
1047         rps->work.func = dummy_rps_work;
1048
1049         for_each_engine(engine, gt, id) {
1050                 /* Keep the engine busy with a spinner; expect an UP! */
1051                 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1052                         intel_gt_pm_wait_for_idle(engine->gt);
1053                         GEM_BUG_ON(intel_rps_is_active(rps));
1054
1055                         st_engine_heartbeat_disable(engine);
1056
1057                         err = __rps_up_interrupt(rps, engine, &spin);
1058
1059                         st_engine_heartbeat_enable(engine);
1060                         if (err)
1061                                 goto out;
1062
1063                         intel_gt_pm_wait_for_idle(engine->gt);
1064                 }
1065
1066                 /* Keep the engine awake but idle and check for DOWN */
1067                 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1068                         st_engine_heartbeat_disable(engine);
1069                         intel_rc6_disable(&gt->rc6);
1070
1071                         err = __rps_down_interrupt(rps, engine);
1072
1073                         intel_rc6_enable(&gt->rc6);
1074                         st_engine_heartbeat_enable(engine);
1075                         if (err)
1076                                 goto out;
1077                 }
1078         }
1079
1080 out:
1081         if (igt_flush_test(gt->i915))
1082                 err = -EIO;
1083
1084         igt_spinner_fini(&spin);
1085
1086         intel_gt_pm_wait_for_idle(gt);
1087         rps->work.func = saved_work;
1088
1089         return err;
1090 }
1091
1092 static u64 __measure_power(int duration_ms)
1093 {
1094         u64 dE, dt;
1095
1096         dt = ktime_get();
1097         dE = librapl_energy_uJ();
1098         usleep_range(1000 * duration_ms, 2000 * duration_ms);
1099         dE = librapl_energy_uJ() - dE;
1100         dt = ktime_get() - dt;
1101
1102         return div64_u64(1000 * 1000 * dE, dt);
1103 }
1104
1105 static u64 measure_power_at(struct intel_rps *rps, int *freq)
1106 {
1107         u64 x[5];
1108         int i;
1109
1110         *freq = rps_set_check(rps, *freq);
1111         for (i = 0; i < 5; i++)
1112                 x[i] = __measure_power(5);
1113         *freq = (*freq + read_cagf(rps)) / 2;
1114
1115         /* A simple triangle filter for better result stability */
1116         sort(x, 5, sizeof(*x), cmp_u64, NULL);
1117         return div_u64(x[1] + 2 * x[2] + x[3], 4);
1118 }
1119
1120 int live_rps_power(void *arg)
1121 {
1122         struct intel_gt *gt = arg;
1123         struct intel_rps *rps = &gt->rps;
1124         void (*saved_work)(struct work_struct *wrk);
1125         struct intel_engine_cs *engine;
1126         enum intel_engine_id id;
1127         struct igt_spinner spin;
1128         int err = 0;
1129
1130         /*
1131          * Our fundamental assumption is that running at lower frequency
1132          * actually saves power. Let's see if our RAPL measurement support
1133          * that theory.
1134          */
1135
1136         if (!intel_rps_is_enabled(rps))
1137                 return 0;
1138
1139         if (!librapl_energy_uJ())
1140                 return 0;
1141
1142         if (igt_spinner_init(&spin, gt))
1143                 return -ENOMEM;
1144
1145         intel_gt_pm_wait_for_idle(gt);
1146         saved_work = rps->work.func;
1147         rps->work.func = dummy_rps_work;
1148
1149         for_each_engine(engine, gt, id) {
1150                 struct i915_request *rq;
1151                 struct {
1152                         u64 power;
1153                         int freq;
1154                 } min, max;
1155
1156                 if (!intel_engine_can_store_dword(engine))
1157                         continue;
1158
1159                 st_engine_heartbeat_disable(engine);
1160
1161                 rq = igt_spinner_create_request(&spin,
1162                                                 engine->kernel_context,
1163                                                 MI_NOOP);
1164                 if (IS_ERR(rq)) {
1165                         st_engine_heartbeat_enable(engine);
1166                         err = PTR_ERR(rq);
1167                         break;
1168                 }
1169
1170                 i915_request_add(rq);
1171
1172                 if (!igt_wait_for_spinner(&spin, rq)) {
1173                         pr_err("%s: RPS spinner did not start\n",
1174                                engine->name);
1175                         igt_spinner_end(&spin);
1176                         st_engine_heartbeat_enable(engine);
1177                         intel_gt_set_wedged(engine->gt);
1178                         err = -EIO;
1179                         break;
1180                 }
1181
1182                 max.freq = rps->max_freq;
1183                 max.power = measure_power_at(rps, &max.freq);
1184
1185                 min.freq = rps->min_freq;
1186                 min.power = measure_power_at(rps, &min.freq);
1187
1188                 igt_spinner_end(&spin);
1189                 st_engine_heartbeat_enable(engine);
1190
1191                 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1192                         engine->name,
1193                         min.power, intel_gpu_freq(rps, min.freq),
1194                         max.power, intel_gpu_freq(rps, max.freq));
1195
1196                 if (10 * min.freq >= 9 * max.freq) {
1197                         pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1198                                   min.freq, intel_gpu_freq(rps, min.freq),
1199                                   max.freq, intel_gpu_freq(rps, max.freq));
1200                         continue;
1201                 }
1202
1203                 if (11 * min.power > 10 * max.power) {
1204                         pr_err("%s: did not conserve power when setting lower frequency!\n",
1205                                engine->name);
1206                         err = -EINVAL;
1207                         break;
1208                 }
1209
1210                 if (igt_flush_test(gt->i915)) {
1211                         err = -EIO;
1212                         break;
1213                 }
1214         }
1215
1216         igt_spinner_fini(&spin);
1217
1218         intel_gt_pm_wait_for_idle(gt);
1219         rps->work.func = saved_work;
1220
1221         return err;
1222 }
1223
1224 int live_rps_dynamic(void *arg)
1225 {
1226         struct intel_gt *gt = arg;
1227         struct intel_rps *rps = &gt->rps;
1228         struct intel_engine_cs *engine;
1229         enum intel_engine_id id;
1230         struct igt_spinner spin;
1231         int err = 0;
1232
1233         /*
1234          * We've looked at the bascs, and have established that we
1235          * can change the clock frequency and that the HW will generate
1236          * interrupts based on load. Now we check how we integrate those
1237          * moving parts into dynamic reclocking based on load.
1238          */
1239
1240         if (!intel_rps_is_enabled(rps))
1241                 return 0;
1242
1243         if (igt_spinner_init(&spin, gt))
1244                 return -ENOMEM;
1245
1246         if (intel_rps_has_interrupts(rps))
1247                 pr_info("RPS has interrupt support\n");
1248         if (intel_rps_uses_timer(rps))
1249                 pr_info("RPS has timer support\n");
1250
1251         for_each_engine(engine, gt, id) {
1252                 struct i915_request *rq;
1253                 struct {
1254                         ktime_t dt;
1255                         u8 freq;
1256                 } min, max;
1257
1258                 if (!intel_engine_can_store_dword(engine))
1259                         continue;
1260
1261                 intel_gt_pm_wait_for_idle(gt);
1262                 GEM_BUG_ON(intel_rps_is_active(rps));
1263                 rps->cur_freq = rps->min_freq;
1264
1265                 intel_engine_pm_get(engine);
1266                 intel_rc6_disable(&gt->rc6);
1267                 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1268
1269                 rq = igt_spinner_create_request(&spin,
1270                                                 engine->kernel_context,
1271                                                 MI_NOOP);
1272                 if (IS_ERR(rq)) {
1273                         err = PTR_ERR(rq);
1274                         goto err;
1275                 }
1276
1277                 i915_request_add(rq);
1278
1279                 max.dt = ktime_get();
1280                 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1281                 max.dt = ktime_sub(ktime_get(), max.dt);
1282
1283                 igt_spinner_end(&spin);
1284
1285                 min.dt = ktime_get();
1286                 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1287                 min.dt = ktime_sub(ktime_get(), min.dt);
1288
1289                 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1290                         engine->name,
1291                         max.freq, intel_gpu_freq(rps, max.freq),
1292                         ktime_to_ns(max.dt),
1293                         min.freq, intel_gpu_freq(rps, min.freq),
1294                         ktime_to_ns(min.dt));
1295                 if (min.freq >= max.freq) {
1296                         pr_err("%s: dynamic reclocking of spinner failed\n!",
1297                                engine->name);
1298                         err = -EINVAL;
1299                 }
1300
1301 err:
1302                 intel_rc6_enable(&gt->rc6);
1303                 intel_engine_pm_put(engine);
1304
1305                 if (igt_flush_test(gt->i915))
1306                         err = -EIO;
1307                 if (err)
1308                         break;
1309         }
1310
1311         igt_spinner_fini(&spin);
1312
1313         return err;
1314 }