GNU Linux-libre 5.10.215-gnu1
[releases.git] / drivers / gpu / drm / i915 / gt / selftest_engine_cs.c
1 /*
2  * SPDX-License-Identifier: GPL-2.0
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6
7 #include <linux/sort.h>
8
9 #include "intel_gt_pm.h"
10 #include "intel_rps.h"
11
12 #include "i915_selftest.h"
13 #include "selftests/igt_flush_test.h"
14
15 #define COUNT 5
16
17 static int cmp_u32(const void *A, const void *B)
18 {
19         const u32 *a = A, *b = B;
20
21         return *a - *b;
22 }
23
24 static void perf_begin(struct intel_gt *gt)
25 {
26         intel_gt_pm_get(gt);
27
28         /* Boost gpufreq to max [waitboost] and keep it fixed */
29         atomic_inc(&gt->rps.num_waiters);
30         schedule_work(&gt->rps.work);
31         flush_work(&gt->rps.work);
32 }
33
34 static int perf_end(struct intel_gt *gt)
35 {
36         atomic_dec(&gt->rps.num_waiters);
37         intel_gt_pm_put(gt);
38
39         return igt_flush_test(gt->i915);
40 }
41
42 static int write_timestamp(struct i915_request *rq, int slot)
43 {
44         u32 cmd;
45         u32 *cs;
46
47         cs = intel_ring_begin(rq, 4);
48         if (IS_ERR(cs))
49                 return PTR_ERR(cs);
50
51         cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
52         if (INTEL_GEN(rq->engine->i915) >= 8)
53                 cmd++;
54         *cs++ = cmd;
55         *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
56         *cs++ = i915_request_timeline(rq)->hwsp_offset + slot * sizeof(u32);
57         *cs++ = 0;
58
59         intel_ring_advance(rq, cs);
60
61         return 0;
62 }
63
64 static struct i915_vma *create_empty_batch(struct intel_context *ce)
65 {
66         struct drm_i915_gem_object *obj;
67         struct i915_vma *vma;
68         u32 *cs;
69         int err;
70
71         obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE);
72         if (IS_ERR(obj))
73                 return ERR_CAST(obj);
74
75         cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
76         if (IS_ERR(cs)) {
77                 err = PTR_ERR(cs);
78                 goto err_put;
79         }
80
81         cs[0] = MI_BATCH_BUFFER_END;
82
83         i915_gem_object_flush_map(obj);
84
85         vma = i915_vma_instance(obj, ce->vm, NULL);
86         if (IS_ERR(vma)) {
87                 err = PTR_ERR(vma);
88                 goto err_unpin;
89         }
90
91         err = i915_vma_pin(vma, 0, 0, PIN_USER);
92         if (err)
93                 goto err_unpin;
94
95         i915_gem_object_unpin_map(obj);
96         return vma;
97
98 err_unpin:
99         i915_gem_object_unpin_map(obj);
100 err_put:
101         i915_gem_object_put(obj);
102         return ERR_PTR(err);
103 }
104
105 static u32 trifilter(u32 *a)
106 {
107         u64 sum;
108
109         sort(a, COUNT, sizeof(*a), cmp_u32, NULL);
110
111         sum = mul_u32_u32(a[2], 2);
112         sum += a[1];
113         sum += a[3];
114
115         return sum >> 2;
116 }
117
118 static int perf_mi_bb_start(void *arg)
119 {
120         struct intel_gt *gt = arg;
121         struct intel_engine_cs *engine;
122         enum intel_engine_id id;
123         int err = 0;
124
125         if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
126                 return 0;
127
128         perf_begin(gt);
129         for_each_engine(engine, gt, id) {
130                 struct intel_context *ce = engine->kernel_context;
131                 struct i915_vma *batch;
132                 u32 cycles[COUNT];
133                 int i;
134
135                 intel_engine_pm_get(engine);
136
137                 batch = create_empty_batch(ce);
138                 if (IS_ERR(batch)) {
139                         err = PTR_ERR(batch);
140                         intel_engine_pm_put(engine);
141                         break;
142                 }
143
144                 err = i915_vma_sync(batch);
145                 if (err) {
146                         intel_engine_pm_put(engine);
147                         i915_vma_put(batch);
148                         break;
149                 }
150
151                 for (i = 0; i < ARRAY_SIZE(cycles); i++) {
152                         struct i915_request *rq;
153
154                         rq = i915_request_create(ce);
155                         if (IS_ERR(rq)) {
156                                 err = PTR_ERR(rq);
157                                 break;
158                         }
159
160                         err = write_timestamp(rq, 2);
161                         if (err)
162                                 goto out;
163
164                         err = rq->engine->emit_bb_start(rq,
165                                                         batch->node.start, 8,
166                                                         0);
167                         if (err)
168                                 goto out;
169
170                         err = write_timestamp(rq, 3);
171                         if (err)
172                                 goto out;
173
174 out:
175                         i915_request_get(rq);
176                         i915_request_add(rq);
177
178                         if (i915_request_wait(rq, 0, HZ / 5) < 0)
179                                 err = -EIO;
180                         i915_request_put(rq);
181                         if (err)
182                                 break;
183
184                         cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2];
185                 }
186                 i915_vma_put(batch);
187                 intel_engine_pm_put(engine);
188                 if (err)
189                         break;
190
191                 pr_info("%s: MI_BB_START cycles: %u\n",
192                         engine->name, trifilter(cycles));
193         }
194         if (perf_end(gt))
195                 err = -EIO;
196
197         return err;
198 }
199
200 static struct i915_vma *create_nop_batch(struct intel_context *ce)
201 {
202         struct drm_i915_gem_object *obj;
203         struct i915_vma *vma;
204         u32 *cs;
205         int err;
206
207         obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K);
208         if (IS_ERR(obj))
209                 return ERR_CAST(obj);
210
211         cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
212         if (IS_ERR(cs)) {
213                 err = PTR_ERR(cs);
214                 goto err_put;
215         }
216
217         memset(cs, 0, SZ_64K);
218         cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END;
219
220         i915_gem_object_flush_map(obj);
221
222         vma = i915_vma_instance(obj, ce->vm, NULL);
223         if (IS_ERR(vma)) {
224                 err = PTR_ERR(vma);
225                 goto err_unpin;
226         }
227
228         err = i915_vma_pin(vma, 0, 0, PIN_USER);
229         if (err)
230                 goto err_unpin;
231
232         i915_gem_object_unpin_map(obj);
233         return vma;
234
235 err_unpin:
236         i915_gem_object_unpin_map(obj);
237 err_put:
238         i915_gem_object_put(obj);
239         return ERR_PTR(err);
240 }
241
242 static int perf_mi_noop(void *arg)
243 {
244         struct intel_gt *gt = arg;
245         struct intel_engine_cs *engine;
246         enum intel_engine_id id;
247         int err = 0;
248
249         if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
250                 return 0;
251
252         perf_begin(gt);
253         for_each_engine(engine, gt, id) {
254                 struct intel_context *ce = engine->kernel_context;
255                 struct i915_vma *base, *nop;
256                 u32 cycles[COUNT];
257                 int i;
258
259                 intel_engine_pm_get(engine);
260
261                 base = create_empty_batch(ce);
262                 if (IS_ERR(base)) {
263                         err = PTR_ERR(base);
264                         intel_engine_pm_put(engine);
265                         break;
266                 }
267
268                 err = i915_vma_sync(base);
269                 if (err) {
270                         i915_vma_put(base);
271                         intel_engine_pm_put(engine);
272                         break;
273                 }
274
275                 nop = create_nop_batch(ce);
276                 if (IS_ERR(nop)) {
277                         err = PTR_ERR(nop);
278                         i915_vma_put(base);
279                         intel_engine_pm_put(engine);
280                         break;
281                 }
282
283                 err = i915_vma_sync(nop);
284                 if (err) {
285                         i915_vma_put(nop);
286                         i915_vma_put(base);
287                         intel_engine_pm_put(engine);
288                         break;
289                 }
290
291                 for (i = 0; i < ARRAY_SIZE(cycles); i++) {
292                         struct i915_request *rq;
293
294                         rq = i915_request_create(ce);
295                         if (IS_ERR(rq)) {
296                                 err = PTR_ERR(rq);
297                                 break;
298                         }
299
300                         err = write_timestamp(rq, 2);
301                         if (err)
302                                 goto out;
303
304                         err = rq->engine->emit_bb_start(rq,
305                                                         base->node.start, 8,
306                                                         0);
307                         if (err)
308                                 goto out;
309
310                         err = write_timestamp(rq, 3);
311                         if (err)
312                                 goto out;
313
314                         err = rq->engine->emit_bb_start(rq,
315                                                         nop->node.start,
316                                                         nop->node.size,
317                                                         0);
318                         if (err)
319                                 goto out;
320
321                         err = write_timestamp(rq, 4);
322                         if (err)
323                                 goto out;
324
325 out:
326                         i915_request_get(rq);
327                         i915_request_add(rq);
328
329                         if (i915_request_wait(rq, 0, HZ / 5) < 0)
330                                 err = -EIO;
331                         i915_request_put(rq);
332                         if (err)
333                                 break;
334
335                         cycles[i] =
336                                 (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) -
337                                 (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]);
338                 }
339                 i915_vma_put(nop);
340                 i915_vma_put(base);
341                 intel_engine_pm_put(engine);
342                 if (err)
343                         break;
344
345                 pr_info("%s: 16K MI_NOOP cycles: %u\n",
346                         engine->name, trifilter(cycles));
347         }
348         if (perf_end(gt))
349                 err = -EIO;
350
351         return err;
352 }
353
354 int intel_engine_cs_perf_selftests(struct drm_i915_private *i915)
355 {
356         static const struct i915_subtest tests[] = {
357                 SUBTEST(perf_mi_bb_start),
358                 SUBTEST(perf_mi_noop),
359         };
360
361         if (intel_gt_is_wedged(&i915->gt))
362                 return 0;
363
364         return intel_gt_live_subtests(tests, &i915->gt);
365 }
366
367 static int intel_mmio_bases_check(void *arg)
368 {
369         int i, j;
370
371         for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
372                 const struct engine_info *info = &intel_engines[i];
373                 u8 prev = U8_MAX;
374
375                 for (j = 0; j < MAX_MMIO_BASES; j++) {
376                         u8 gen = info->mmio_bases[j].gen;
377                         u32 base = info->mmio_bases[j].base;
378
379                         if (gen >= prev) {
380                                 pr_err("%s(%s, class:%d, instance:%d): mmio base for gen %x is before the one for gen %x\n",
381                                        __func__,
382                                        intel_engine_class_repr(info->class),
383                                        info->class, info->instance,
384                                        prev, gen);
385                                 return -EINVAL;
386                         }
387
388                         if (gen == 0)
389                                 break;
390
391                         if (!base) {
392                                 pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for gen %x at entry %u\n",
393                                        __func__,
394                                        intel_engine_class_repr(info->class),
395                                        info->class, info->instance,
396                                        base, gen, j);
397                                 return -EINVAL;
398                         }
399
400                         prev = gen;
401                 }
402
403                 pr_debug("%s: min gen supported for %s%d is %d\n",
404                          __func__,
405                          intel_engine_class_repr(info->class),
406                          info->instance,
407                          prev);
408         }
409
410         return 0;
411 }
412
413 int intel_engine_cs_mock_selftests(void)
414 {
415         static const struct i915_subtest tests[] = {
416                 SUBTEST(intel_mmio_bases_check),
417         };
418
419         return i915_subtests(tests, NULL);
420 }