GNU Linux-libre 5.10.215-gnu1
[releases.git] / drivers / gpu / drm / i915 / gem / selftests / i915_gem_object_blt.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5
6 #include <linux/sort.h>
7
8 #include "gt/intel_gt.h"
9 #include "gt/intel_engine_user.h"
10
11 #include "i915_selftest.h"
12
13 #include "gem/i915_gem_context.h"
14 #include "selftests/igt_flush_test.h"
15 #include "selftests/i915_random.h"
16 #include "selftests/mock_drm.h"
17 #include "huge_gem_object.h"
18 #include "mock_context.h"
19
20 static int wrap_ktime_compare(const void *A, const void *B)
21 {
22         const ktime_t *a = A, *b = B;
23
24         return ktime_compare(*a, *b);
25 }
26
27 static int __perf_fill_blt(struct drm_i915_gem_object *obj)
28 {
29         struct drm_i915_private *i915 = to_i915(obj->base.dev);
30         int inst = 0;
31
32         do {
33                 struct intel_engine_cs *engine;
34                 ktime_t t[5];
35                 int pass;
36                 int err;
37
38                 engine = intel_engine_lookup_user(i915,
39                                                   I915_ENGINE_CLASS_COPY,
40                                                   inst++);
41                 if (!engine)
42                         return 0;
43
44                 intel_engine_pm_get(engine);
45                 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
46                         struct intel_context *ce = engine->kernel_context;
47                         ktime_t t0, t1;
48
49                         t0 = ktime_get();
50
51                         err = i915_gem_object_fill_blt(obj, ce, 0);
52                         if (err)
53                                 break;
54
55                         err = i915_gem_object_wait(obj,
56                                                    I915_WAIT_ALL,
57                                                    MAX_SCHEDULE_TIMEOUT);
58                         if (err)
59                                 break;
60
61                         t1 = ktime_get();
62                         t[pass] = ktime_sub(t1, t0);
63                 }
64                 intel_engine_pm_put(engine);
65                 if (err)
66                         return err;
67
68                 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
69                 pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
70                         engine->name,
71                         obj->base.size >> 10,
72                         div64_u64(mul_u32_u32(4 * obj->base.size,
73                                               1000 * 1000 * 1000),
74                                   t[1] + 2 * t[2] + t[3]) >> 20);
75         } while (1);
76 }
77
78 static int perf_fill_blt(void *arg)
79 {
80         struct drm_i915_private *i915 = arg;
81         static const unsigned long sizes[] = {
82                 SZ_4K,
83                 SZ_64K,
84                 SZ_2M,
85                 SZ_64M
86         };
87         int i;
88
89         for (i = 0; i < ARRAY_SIZE(sizes); i++) {
90                 struct drm_i915_gem_object *obj;
91                 int err;
92
93                 obj = i915_gem_object_create_internal(i915, sizes[i]);
94                 if (IS_ERR(obj))
95                         return PTR_ERR(obj);
96
97                 err = __perf_fill_blt(obj);
98                 i915_gem_object_put(obj);
99                 if (err)
100                         return err;
101         }
102
103         return 0;
104 }
105
106 static int __perf_copy_blt(struct drm_i915_gem_object *src,
107                            struct drm_i915_gem_object *dst)
108 {
109         struct drm_i915_private *i915 = to_i915(src->base.dev);
110         int inst = 0;
111
112         do {
113                 struct intel_engine_cs *engine;
114                 ktime_t t[5];
115                 int pass;
116                 int err = 0;
117
118                 engine = intel_engine_lookup_user(i915,
119                                                   I915_ENGINE_CLASS_COPY,
120                                                   inst++);
121                 if (!engine)
122                         return 0;
123
124                 intel_engine_pm_get(engine);
125                 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
126                         struct intel_context *ce = engine->kernel_context;
127                         ktime_t t0, t1;
128
129                         t0 = ktime_get();
130
131                         err = i915_gem_object_copy_blt(src, dst, ce);
132                         if (err)
133                                 break;
134
135                         err = i915_gem_object_wait(dst,
136                                                    I915_WAIT_ALL,
137                                                    MAX_SCHEDULE_TIMEOUT);
138                         if (err)
139                                 break;
140
141                         t1 = ktime_get();
142                         t[pass] = ktime_sub(t1, t0);
143                 }
144                 intel_engine_pm_put(engine);
145                 if (err)
146                         return err;
147
148                 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
149                 pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
150                         engine->name,
151                         src->base.size >> 10,
152                         div64_u64(mul_u32_u32(4 * src->base.size,
153                                               1000 * 1000 * 1000),
154                                   t[1] + 2 * t[2] + t[3]) >> 20);
155         } while (1);
156 }
157
158 static int perf_copy_blt(void *arg)
159 {
160         struct drm_i915_private *i915 = arg;
161         static const unsigned long sizes[] = {
162                 SZ_4K,
163                 SZ_64K,
164                 SZ_2M,
165                 SZ_64M
166         };
167         int i;
168
169         for (i = 0; i < ARRAY_SIZE(sizes); i++) {
170                 struct drm_i915_gem_object *src, *dst;
171                 int err;
172
173                 src = i915_gem_object_create_internal(i915, sizes[i]);
174                 if (IS_ERR(src))
175                         return PTR_ERR(src);
176
177                 dst = i915_gem_object_create_internal(i915, sizes[i]);
178                 if (IS_ERR(dst)) {
179                         err = PTR_ERR(dst);
180                         goto err_src;
181                 }
182
183                 err = __perf_copy_blt(src, dst);
184
185                 i915_gem_object_put(dst);
186 err_src:
187                 i915_gem_object_put(src);
188                 if (err)
189                         return err;
190         }
191
192         return 0;
193 }
194
195 struct igt_thread_arg {
196         struct intel_engine_cs *engine;
197         struct i915_gem_context *ctx;
198         struct file *file;
199         struct rnd_state prng;
200         unsigned int n_cpus;
201 };
202
203 static int igt_fill_blt_thread(void *arg)
204 {
205         struct igt_thread_arg *thread = arg;
206         struct intel_engine_cs *engine = thread->engine;
207         struct rnd_state *prng = &thread->prng;
208         struct drm_i915_gem_object *obj;
209         struct i915_gem_context *ctx;
210         struct intel_context *ce;
211         unsigned int prio;
212         IGT_TIMEOUT(end);
213         u64 total, max;
214         int err;
215
216         ctx = thread->ctx;
217         if (!ctx) {
218                 ctx = live_context_for_engine(engine, thread->file);
219                 if (IS_ERR(ctx))
220                         return PTR_ERR(ctx);
221
222                 prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
223                 ctx->sched.priority = I915_USER_PRIORITY(prio);
224         }
225
226         ce = i915_gem_context_get_engine(ctx, 0);
227         GEM_BUG_ON(IS_ERR(ce));
228
229         /*
230          * If we have a tiny shared address space, like for the GGTT
231          * then we can't be too greedy.
232          */
233         max = ce->vm->total;
234         if (i915_is_ggtt(ce->vm) || thread->ctx)
235                 max = div_u64(max, thread->n_cpus);
236         max >>= 4;
237
238         total = PAGE_SIZE;
239         do {
240                 /* Aim to keep the runtime under reasonable bounds! */
241                 const u32 max_phys_size = SZ_64K;
242                 u32 val = prandom_u32_state(prng);
243                 u32 phys_sz;
244                 u32 sz;
245                 u32 *vaddr;
246                 u32 i;
247
248                 total = min(total, max);
249                 sz = i915_prandom_u32_max_state(total, prng) + 1;
250                 phys_sz = sz % max_phys_size + 1;
251
252                 sz = round_up(sz, PAGE_SIZE);
253                 phys_sz = round_up(phys_sz, PAGE_SIZE);
254                 phys_sz = min(phys_sz, sz);
255
256                 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
257                          phys_sz, sz, val);
258
259                 obj = huge_gem_object(engine->i915, phys_sz, sz);
260                 if (IS_ERR(obj)) {
261                         err = PTR_ERR(obj);
262                         goto err_flush;
263                 }
264
265                 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
266                 if (IS_ERR(vaddr)) {
267                         err = PTR_ERR(vaddr);
268                         goto err_put;
269                 }
270
271                 /*
272                  * Make sure the potentially async clflush does its job, if
273                  * required.
274                  */
275                 memset32(vaddr, val ^ 0xdeadbeaf,
276                          huge_gem_object_phys_size(obj) / sizeof(u32));
277
278                 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
279                         obj->cache_dirty = true;
280
281                 err = i915_gem_object_fill_blt(obj, ce, val);
282                 if (err)
283                         goto err_unpin;
284
285                 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
286                 if (err)
287                         goto err_unpin;
288
289                 for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); i += 17) {
290                         if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
291                                 drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i]));
292
293                         if (vaddr[i] != val) {
294                                 pr_err("vaddr[%u]=%x, expected=%x\n", i,
295                                        vaddr[i], val);
296                                 err = -EINVAL;
297                                 goto err_unpin;
298                         }
299                 }
300
301                 i915_gem_object_unpin_map(obj);
302                 i915_gem_object_put(obj);
303
304                 total <<= 1;
305         } while (!time_after(jiffies, end));
306
307         goto err_flush;
308
309 err_unpin:
310         i915_gem_object_unpin_map(obj);
311 err_put:
312         i915_gem_object_put(obj);
313 err_flush:
314         if (err == -ENOMEM)
315                 err = 0;
316
317         intel_context_put(ce);
318         return err;
319 }
320
321 static int igt_copy_blt_thread(void *arg)
322 {
323         struct igt_thread_arg *thread = arg;
324         struct intel_engine_cs *engine = thread->engine;
325         struct rnd_state *prng = &thread->prng;
326         struct drm_i915_gem_object *src, *dst;
327         struct i915_gem_context *ctx;
328         struct intel_context *ce;
329         unsigned int prio;
330         IGT_TIMEOUT(end);
331         u64 total, max;
332         int err;
333
334         ctx = thread->ctx;
335         if (!ctx) {
336                 ctx = live_context_for_engine(engine, thread->file);
337                 if (IS_ERR(ctx))
338                         return PTR_ERR(ctx);
339
340                 prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
341                 ctx->sched.priority = I915_USER_PRIORITY(prio);
342         }
343
344         ce = i915_gem_context_get_engine(ctx, 0);
345         GEM_BUG_ON(IS_ERR(ce));
346
347         /*
348          * If we have a tiny shared address space, like for the GGTT
349          * then we can't be too greedy.
350          */
351         max = ce->vm->total;
352         if (i915_is_ggtt(ce->vm) || thread->ctx)
353                 max = div_u64(max, thread->n_cpus);
354         max >>= 4;
355
356         total = PAGE_SIZE;
357         do {
358                 /* Aim to keep the runtime under reasonable bounds! */
359                 const u32 max_phys_size = SZ_64K;
360                 u32 val = prandom_u32_state(prng);
361                 u32 phys_sz;
362                 u32 sz;
363                 u32 *vaddr;
364                 u32 i;
365
366                 total = min(total, max);
367                 sz = i915_prandom_u32_max_state(total, prng) + 1;
368                 phys_sz = sz % max_phys_size + 1;
369
370                 sz = round_up(sz, PAGE_SIZE);
371                 phys_sz = round_up(phys_sz, PAGE_SIZE);
372                 phys_sz = min(phys_sz, sz);
373
374                 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
375                          phys_sz, sz, val);
376
377                 src = huge_gem_object(engine->i915, phys_sz, sz);
378                 if (IS_ERR(src)) {
379                         err = PTR_ERR(src);
380                         goto err_flush;
381                 }
382
383                 vaddr = i915_gem_object_pin_map(src, I915_MAP_WB);
384                 if (IS_ERR(vaddr)) {
385                         err = PTR_ERR(vaddr);
386                         goto err_put_src;
387                 }
388
389                 memset32(vaddr, val,
390                          huge_gem_object_phys_size(src) / sizeof(u32));
391
392                 i915_gem_object_unpin_map(src);
393
394                 if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
395                         src->cache_dirty = true;
396
397                 dst = huge_gem_object(engine->i915, phys_sz, sz);
398                 if (IS_ERR(dst)) {
399                         err = PTR_ERR(dst);
400                         goto err_put_src;
401                 }
402
403                 vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB);
404                 if (IS_ERR(vaddr)) {
405                         err = PTR_ERR(vaddr);
406                         goto err_put_dst;
407                 }
408
409                 memset32(vaddr, val ^ 0xdeadbeaf,
410                          huge_gem_object_phys_size(dst) / sizeof(u32));
411
412                 if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
413                         dst->cache_dirty = true;
414
415                 err = i915_gem_object_copy_blt(src, dst, ce);
416                 if (err)
417                         goto err_unpin;
418
419                 err = i915_gem_object_wait(dst, 0, MAX_SCHEDULE_TIMEOUT);
420                 if (err)
421                         goto err_unpin;
422
423                 for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); i += 17) {
424                         if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
425                                 drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i]));
426
427                         if (vaddr[i] != val) {
428                                 pr_err("vaddr[%u]=%x, expected=%x\n", i,
429                                        vaddr[i], val);
430                                 err = -EINVAL;
431                                 goto err_unpin;
432                         }
433                 }
434
435                 i915_gem_object_unpin_map(dst);
436
437                 i915_gem_object_put(src);
438                 i915_gem_object_put(dst);
439
440                 total <<= 1;
441         } while (!time_after(jiffies, end));
442
443         goto err_flush;
444
445 err_unpin:
446         i915_gem_object_unpin_map(dst);
447 err_put_dst:
448         i915_gem_object_put(dst);
449 err_put_src:
450         i915_gem_object_put(src);
451 err_flush:
452         if (err == -ENOMEM)
453                 err = 0;
454
455         intel_context_put(ce);
456         return err;
457 }
458
459 static int igt_threaded_blt(struct intel_engine_cs *engine,
460                             int (*blt_fn)(void *arg),
461                             unsigned int flags)
462 #define SINGLE_CTX BIT(0)
463 {
464         struct igt_thread_arg *thread;
465         struct task_struct **tsk;
466         unsigned int n_cpus, i;
467         I915_RND_STATE(prng);
468         int err = 0;
469
470         n_cpus = num_online_cpus() + 1;
471
472         tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL);
473         if (!tsk)
474                 return 0;
475
476         thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL);
477         if (!thread)
478                 goto out_tsk;
479
480         thread[0].file = mock_file(engine->i915);
481         if (IS_ERR(thread[0].file)) {
482                 err = PTR_ERR(thread[0].file);
483                 goto out_thread;
484         }
485
486         if (flags & SINGLE_CTX) {
487                 thread[0].ctx = live_context_for_engine(engine, thread[0].file);
488                 if (IS_ERR(thread[0].ctx)) {
489                         err = PTR_ERR(thread[0].ctx);
490                         goto out_file;
491                 }
492         }
493
494         for (i = 0; i < n_cpus; ++i) {
495                 thread[i].engine = engine;
496                 thread[i].file = thread[0].file;
497                 thread[i].ctx = thread[0].ctx;
498                 thread[i].n_cpus = n_cpus;
499                 thread[i].prng =
500                         I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
501
502                 tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i);
503                 if (IS_ERR(tsk[i])) {
504                         err = PTR_ERR(tsk[i]);
505                         break;
506                 }
507
508                 get_task_struct(tsk[i]);
509         }
510
511         yield(); /* start all threads before we kthread_stop() */
512
513         for (i = 0; i < n_cpus; ++i) {
514                 int status;
515
516                 if (IS_ERR_OR_NULL(tsk[i]))
517                         continue;
518
519                 status = kthread_stop(tsk[i]);
520                 if (status && !err)
521                         err = status;
522
523                 put_task_struct(tsk[i]);
524         }
525
526 out_file:
527         fput(thread[0].file);
528 out_thread:
529         kfree(thread);
530 out_tsk:
531         kfree(tsk);
532         return err;
533 }
534
535 static int test_copy_engines(struct drm_i915_private *i915,
536                              int (*fn)(void *arg),
537                              unsigned int flags)
538 {
539         struct intel_engine_cs *engine;
540         int ret;
541
542         for_each_uabi_class_engine(engine, I915_ENGINE_CLASS_COPY, i915) {
543                 ret = igt_threaded_blt(engine, fn, flags);
544                 if (ret)
545                         return ret;
546         }
547
548         return 0;
549 }
550
551 static int igt_fill_blt(void *arg)
552 {
553         return test_copy_engines(arg, igt_fill_blt_thread, 0);
554 }
555
556 static int igt_fill_blt_ctx0(void *arg)
557 {
558         return test_copy_engines(arg, igt_fill_blt_thread, SINGLE_CTX);
559 }
560
561 static int igt_copy_blt(void *arg)
562 {
563         return test_copy_engines(arg, igt_copy_blt_thread, 0);
564 }
565
566 static int igt_copy_blt_ctx0(void *arg)
567 {
568         return test_copy_engines(arg, igt_copy_blt_thread, SINGLE_CTX);
569 }
570
571 int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
572 {
573         static const struct i915_subtest tests[] = {
574                 SUBTEST(igt_fill_blt),
575                 SUBTEST(igt_fill_blt_ctx0),
576                 SUBTEST(igt_copy_blt),
577                 SUBTEST(igt_copy_blt_ctx0),
578         };
579
580         if (intel_gt_is_wedged(&i915->gt))
581                 return 0;
582
583         return i915_live_subtests(tests, i915);
584 }
585
586 int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915)
587 {
588         static const struct i915_subtest tests[] = {
589                 SUBTEST(perf_fill_blt),
590                 SUBTEST(perf_copy_blt),
591         };
592
593         if (intel_gt_is_wedged(&i915->gt))
594                 return 0;
595
596         return i915_live_subtests(tests, i915);
597 }