GNU Linux-libre 5.10.215-gnu1
[releases.git] / drivers / gpu / drm / i915 / gem / selftests / i915_gem_context.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017 Intel Corporation
5  */
6
7 #include <linux/prime_numbers.h>
8
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_pm.h"
11 #include "gt/intel_gt.h"
12 #include "gt/intel_gt_requests.h"
13 #include "gt/intel_reset.h"
14 #include "i915_selftest.h"
15
16 #include "gem/selftests/igt_gem_utils.h"
17 #include "selftests/i915_random.h"
18 #include "selftests/igt_flush_test.h"
19 #include "selftests/igt_live_test.h"
20 #include "selftests/igt_reset.h"
21 #include "selftests/igt_spinner.h"
22 #include "selftests/mock_drm.h"
23 #include "selftests/mock_gem_device.h"
24
25 #include "huge_gem_object.h"
26 #include "igt_gem_utils.h"
27
28 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
29
30 static inline struct i915_address_space *ctx_vm(struct i915_gem_context *ctx)
31 {
32         /* single threaded, private ctx */
33         return rcu_dereference_protected(ctx->vm, true);
34 }
35
36 static int live_nop_switch(void *arg)
37 {
38         const unsigned int nctx = 1024;
39         struct drm_i915_private *i915 = arg;
40         struct intel_engine_cs *engine;
41         struct i915_gem_context **ctx;
42         struct igt_live_test t;
43         struct file *file;
44         unsigned long n;
45         int err = -ENODEV;
46
47         /*
48          * Create as many contexts as we can feasibly get away with
49          * and check we can switch between them rapidly.
50          *
51          * Serves as very simple stress test for submission and HW switching
52          * between contexts.
53          */
54
55         if (!DRIVER_CAPS(i915)->has_logical_contexts)
56                 return 0;
57
58         file = mock_file(i915);
59         if (IS_ERR(file))
60                 return PTR_ERR(file);
61
62         ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
63         if (!ctx) {
64                 err = -ENOMEM;
65                 goto out_file;
66         }
67
68         for (n = 0; n < nctx; n++) {
69                 ctx[n] = live_context(i915, file);
70                 if (IS_ERR(ctx[n])) {
71                         err = PTR_ERR(ctx[n]);
72                         goto out_file;
73                 }
74         }
75
76         for_each_uabi_engine(engine, i915) {
77                 struct i915_request *rq = NULL;
78                 unsigned long end_time, prime;
79                 ktime_t times[2] = {};
80
81                 times[0] = ktime_get_raw();
82                 for (n = 0; n < nctx; n++) {
83                         struct i915_request *this;
84
85                         this = igt_request_alloc(ctx[n], engine);
86                         if (IS_ERR(this)) {
87                                 err = PTR_ERR(this);
88                                 goto out_file;
89                         }
90                         if (rq) {
91                                 i915_request_await_dma_fence(this, &rq->fence);
92                                 i915_request_put(rq);
93                         }
94                         rq = i915_request_get(this);
95                         i915_request_add(this);
96                 }
97                 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
98                         pr_err("Failed to populated %d contexts\n", nctx);
99                         intel_gt_set_wedged(&i915->gt);
100                         i915_request_put(rq);
101                         err = -EIO;
102                         goto out_file;
103                 }
104                 i915_request_put(rq);
105
106                 times[1] = ktime_get_raw();
107
108                 pr_info("Populated %d contexts on %s in %lluns\n",
109                         nctx, engine->name, ktime_to_ns(times[1] - times[0]));
110
111                 err = igt_live_test_begin(&t, i915, __func__, engine->name);
112                 if (err)
113                         goto out_file;
114
115                 end_time = jiffies + i915_selftest.timeout_jiffies;
116                 for_each_prime_number_from(prime, 2, 8192) {
117                         times[1] = ktime_get_raw();
118
119                         rq = NULL;
120                         for (n = 0; n < prime; n++) {
121                                 struct i915_request *this;
122
123                                 this = igt_request_alloc(ctx[n % nctx], engine);
124                                 if (IS_ERR(this)) {
125                                         err = PTR_ERR(this);
126                                         goto out_file;
127                                 }
128
129                                 if (rq) { /* Force submission order */
130                                         i915_request_await_dma_fence(this, &rq->fence);
131                                         i915_request_put(rq);
132                                 }
133
134                                 /*
135                                  * This space is left intentionally blank.
136                                  *
137                                  * We do not actually want to perform any
138                                  * action with this request, we just want
139                                  * to measure the latency in allocation
140                                  * and submission of our breadcrumbs -
141                                  * ensuring that the bare request is sufficient
142                                  * for the system to work (i.e. proper HEAD
143                                  * tracking of the rings, interrupt handling,
144                                  * etc). It also gives us the lowest bounds
145                                  * for latency.
146                                  */
147
148                                 rq = i915_request_get(this);
149                                 i915_request_add(this);
150                         }
151                         GEM_BUG_ON(!rq);
152                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
153                                 pr_err("Switching between %ld contexts timed out\n",
154                                        prime);
155                                 intel_gt_set_wedged(&i915->gt);
156                                 i915_request_put(rq);
157                                 break;
158                         }
159                         i915_request_put(rq);
160
161                         times[1] = ktime_sub(ktime_get_raw(), times[1]);
162                         if (prime == 2)
163                                 times[0] = times[1];
164
165                         if (__igt_timeout(end_time, NULL))
166                                 break;
167                 }
168
169                 err = igt_live_test_end(&t);
170                 if (err)
171                         goto out_file;
172
173                 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
174                         engine->name,
175                         ktime_to_ns(times[0]),
176                         prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
177         }
178
179 out_file:
180         fput(file);
181         return err;
182 }
183
184 struct parallel_switch {
185         struct task_struct *tsk;
186         struct intel_context *ce[2];
187 };
188
189 static int __live_parallel_switch1(void *data)
190 {
191         struct parallel_switch *arg = data;
192         IGT_TIMEOUT(end_time);
193         unsigned long count;
194
195         count = 0;
196         do {
197                 struct i915_request *rq = NULL;
198                 int err, n;
199
200                 err = 0;
201                 for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
202                         struct i915_request *prev = rq;
203
204                         rq = i915_request_create(arg->ce[n]);
205                         if (IS_ERR(rq)) {
206                                 i915_request_put(prev);
207                                 return PTR_ERR(rq);
208                         }
209
210                         i915_request_get(rq);
211                         if (prev) {
212                                 err = i915_request_await_dma_fence(rq, &prev->fence);
213                                 i915_request_put(prev);
214                         }
215
216                         i915_request_add(rq);
217                 }
218                 if (i915_request_wait(rq, 0, HZ / 5) < 0)
219                         err = -ETIME;
220                 i915_request_put(rq);
221                 if (err)
222                         return err;
223
224                 count++;
225         } while (!__igt_timeout(end_time, NULL));
226
227         pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
228         return 0;
229 }
230
231 static int __live_parallel_switchN(void *data)
232 {
233         struct parallel_switch *arg = data;
234         struct i915_request *rq = NULL;
235         IGT_TIMEOUT(end_time);
236         unsigned long count;
237         int n;
238
239         count = 0;
240         do {
241                 for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
242                         struct i915_request *prev = rq;
243                         int err = 0;
244
245                         rq = i915_request_create(arg->ce[n]);
246                         if (IS_ERR(rq)) {
247                                 i915_request_put(prev);
248                                 return PTR_ERR(rq);
249                         }
250
251                         i915_request_get(rq);
252                         if (prev) {
253                                 err = i915_request_await_dma_fence(rq, &prev->fence);
254                                 i915_request_put(prev);
255                         }
256
257                         i915_request_add(rq);
258                         if (err) {
259                                 i915_request_put(rq);
260                                 return err;
261                         }
262                 }
263
264                 count++;
265         } while (!__igt_timeout(end_time, NULL));
266         i915_request_put(rq);
267
268         pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
269         return 0;
270 }
271
272 static int live_parallel_switch(void *arg)
273 {
274         struct drm_i915_private *i915 = arg;
275         static int (* const func[])(void *arg) = {
276                 __live_parallel_switch1,
277                 __live_parallel_switchN,
278                 NULL,
279         };
280         struct parallel_switch *data = NULL;
281         struct i915_gem_engines *engines;
282         struct i915_gem_engines_iter it;
283         int (* const *fn)(void *arg);
284         struct i915_gem_context *ctx;
285         struct intel_context *ce;
286         struct file *file;
287         int n, m, count;
288         int err = 0;
289
290         /*
291          * Check we can process switches on all engines simultaneously.
292          */
293
294         if (!DRIVER_CAPS(i915)->has_logical_contexts)
295                 return 0;
296
297         file = mock_file(i915);
298         if (IS_ERR(file))
299                 return PTR_ERR(file);
300
301         ctx = live_context(i915, file);
302         if (IS_ERR(ctx)) {
303                 err = PTR_ERR(ctx);
304                 goto out_file;
305         }
306
307         engines = i915_gem_context_lock_engines(ctx);
308         count = engines->num_engines;
309
310         data = kcalloc(count, sizeof(*data), GFP_KERNEL);
311         if (!data) {
312                 i915_gem_context_unlock_engines(ctx);
313                 err = -ENOMEM;
314                 goto out_file;
315         }
316
317         m = 0; /* Use the first context as our template for the engines */
318         for_each_gem_engine(ce, engines, it) {
319                 err = intel_context_pin(ce);
320                 if (err) {
321                         i915_gem_context_unlock_engines(ctx);
322                         goto out;
323                 }
324                 data[m++].ce[0] = intel_context_get(ce);
325         }
326         i915_gem_context_unlock_engines(ctx);
327
328         /* Clone the same set of engines into the other contexts */
329         for (n = 1; n < ARRAY_SIZE(data->ce); n++) {
330                 ctx = live_context(i915, file);
331                 if (IS_ERR(ctx)) {
332                         err = PTR_ERR(ctx);
333                         goto out;
334                 }
335
336                 for (m = 0; m < count; m++) {
337                         if (!data[m].ce[0])
338                                 continue;
339
340                         ce = intel_context_create(data[m].ce[0]->engine);
341                         if (IS_ERR(ce))
342                                 goto out;
343
344                         err = intel_context_pin(ce);
345                         if (err) {
346                                 intel_context_put(ce);
347                                 goto out;
348                         }
349
350                         data[m].ce[n] = ce;
351                 }
352         }
353
354         for (fn = func; !err && *fn; fn++) {
355                 struct igt_live_test t;
356                 int n;
357
358                 err = igt_live_test_begin(&t, i915, __func__, "");
359                 if (err)
360                         break;
361
362                 for (n = 0; n < count; n++) {
363                         if (!data[n].ce[0])
364                                 continue;
365
366                         data[n].tsk = kthread_run(*fn, &data[n],
367                                                   "igt/parallel:%s",
368                                                   data[n].ce[0]->engine->name);
369                         if (IS_ERR(data[n].tsk)) {
370                                 err = PTR_ERR(data[n].tsk);
371                                 break;
372                         }
373                         get_task_struct(data[n].tsk);
374                 }
375
376                 yield(); /* start all threads before we kthread_stop() */
377
378                 for (n = 0; n < count; n++) {
379                         int status;
380
381                         if (IS_ERR_OR_NULL(data[n].tsk))
382                                 continue;
383
384                         status = kthread_stop(data[n].tsk);
385                         if (status && !err)
386                                 err = status;
387
388                         put_task_struct(data[n].tsk);
389                         data[n].tsk = NULL;
390                 }
391
392                 if (igt_live_test_end(&t))
393                         err = -EIO;
394         }
395
396 out:
397         for (n = 0; n < count; n++) {
398                 for (m = 0; m < ARRAY_SIZE(data->ce); m++) {
399                         if (!data[n].ce[m])
400                                 continue;
401
402                         intel_context_unpin(data[n].ce[m]);
403                         intel_context_put(data[n].ce[m]);
404                 }
405         }
406         kfree(data);
407 out_file:
408         fput(file);
409         return err;
410 }
411
412 static unsigned long real_page_count(struct drm_i915_gem_object *obj)
413 {
414         return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
415 }
416
417 static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
418 {
419         return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
420 }
421
422 static int gpu_fill(struct intel_context *ce,
423                     struct drm_i915_gem_object *obj,
424                     unsigned int dw)
425 {
426         struct i915_vma *vma;
427         int err;
428
429         GEM_BUG_ON(obj->base.size > ce->vm->total);
430         GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
431
432         vma = i915_vma_instance(obj, ce->vm, NULL);
433         if (IS_ERR(vma))
434                 return PTR_ERR(vma);
435
436         err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
437         if (err)
438                 return err;
439
440         /*
441          * Within the GTT the huge objects maps every page onto
442          * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
443          * We set the nth dword within the page using the nth
444          * mapping via the GTT - this should exercise the GTT mapping
445          * whilst checking that each context provides a unique view
446          * into the object.
447          */
448         err = igt_gpu_fill_dw(ce, vma,
449                               (dw * real_page_count(obj)) << PAGE_SHIFT |
450                               (dw * sizeof(u32)),
451                               real_page_count(obj),
452                               dw);
453         i915_vma_unpin(vma);
454
455         return err;
456 }
457
458 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
459 {
460         const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
461         unsigned int n, m, need_flush;
462         int err;
463
464         i915_gem_object_lock(obj, NULL);
465         err = i915_gem_object_prepare_write(obj, &need_flush);
466         if (err)
467                 goto out;
468
469         for (n = 0; n < real_page_count(obj); n++) {
470                 u32 *map;
471
472                 map = kmap_atomic(i915_gem_object_get_page(obj, n));
473                 for (m = 0; m < DW_PER_PAGE; m++)
474                         map[m] = value;
475                 if (!has_llc)
476                         drm_clflush_virt_range(map, PAGE_SIZE);
477                 kunmap_atomic(map);
478         }
479
480         i915_gem_object_finish_access(obj);
481         obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
482         obj->write_domain = 0;
483 out:
484         i915_gem_object_unlock(obj);
485         return err;
486 }
487
488 static noinline int cpu_check(struct drm_i915_gem_object *obj,
489                               unsigned int idx, unsigned int max)
490 {
491         unsigned int n, m, needs_flush;
492         int err;
493
494         i915_gem_object_lock(obj, NULL);
495         err = i915_gem_object_prepare_read(obj, &needs_flush);
496         if (err)
497                 goto out_unlock;
498
499         for (n = 0; n < real_page_count(obj); n++) {
500                 u32 *map;
501
502                 map = kmap_atomic(i915_gem_object_get_page(obj, n));
503                 if (needs_flush & CLFLUSH_BEFORE)
504                         drm_clflush_virt_range(map, PAGE_SIZE);
505
506                 for (m = 0; m < max; m++) {
507                         if (map[m] != m) {
508                                 pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
509                                        __builtin_return_address(0), idx,
510                                        n, real_page_count(obj), m, max,
511                                        map[m], m);
512                                 err = -EINVAL;
513                                 goto out_unmap;
514                         }
515                 }
516
517                 for (; m < DW_PER_PAGE; m++) {
518                         if (map[m] != STACK_MAGIC) {
519                                 pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
520                                        __builtin_return_address(0), idx, n, m,
521                                        map[m], STACK_MAGIC);
522                                 err = -EINVAL;
523                                 goto out_unmap;
524                         }
525                 }
526
527 out_unmap:
528                 kunmap_atomic(map);
529                 if (err)
530                         break;
531         }
532
533         i915_gem_object_finish_access(obj);
534 out_unlock:
535         i915_gem_object_unlock(obj);
536         return err;
537 }
538
539 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj)
540 {
541         int err;
542
543         GEM_BUG_ON(obj->base.handle_count);
544
545         /* tie the object to the drm_file for easy reaping */
546         err = idr_alloc(&to_drm_file(file)->object_idr,
547                         &obj->base, 1, 0, GFP_KERNEL);
548         if (err < 0)
549                 return err;
550
551         i915_gem_object_get(obj);
552         obj->base.handle_count++;
553         return 0;
554 }
555
556 static struct drm_i915_gem_object *
557 create_test_object(struct i915_address_space *vm,
558                    struct file *file,
559                    struct list_head *objects)
560 {
561         struct drm_i915_gem_object *obj;
562         u64 size;
563         int err;
564
565         /* Keep in GEM's good graces */
566         intel_gt_retire_requests(vm->gt);
567
568         size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
569         size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
570
571         obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size);
572         if (IS_ERR(obj))
573                 return obj;
574
575         err = file_add_object(file, obj);
576         i915_gem_object_put(obj);
577         if (err)
578                 return ERR_PTR(err);
579
580         err = cpu_fill(obj, STACK_MAGIC);
581         if (err) {
582                 pr_err("Failed to fill object with cpu, err=%d\n",
583                        err);
584                 return ERR_PTR(err);
585         }
586
587         list_add_tail(&obj->st_link, objects);
588         return obj;
589 }
590
591 static unsigned long max_dwords(struct drm_i915_gem_object *obj)
592 {
593         unsigned long npages = fake_page_count(obj);
594
595         GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE));
596         return npages / DW_PER_PAGE;
597 }
598
599 static void throttle_release(struct i915_request **q, int count)
600 {
601         int i;
602
603         for (i = 0; i < count; i++) {
604                 if (IS_ERR_OR_NULL(q[i]))
605                         continue;
606
607                 i915_request_put(fetch_and_zero(&q[i]));
608         }
609 }
610
611 static int throttle(struct intel_context *ce,
612                     struct i915_request **q, int count)
613 {
614         int i;
615
616         if (!IS_ERR_OR_NULL(q[0])) {
617                 if (i915_request_wait(q[0],
618                                       I915_WAIT_INTERRUPTIBLE,
619                                       MAX_SCHEDULE_TIMEOUT) < 0)
620                         return -EINTR;
621
622                 i915_request_put(q[0]);
623         }
624
625         for (i = 0; i < count - 1; i++)
626                 q[i] = q[i + 1];
627
628         q[i] = intel_context_create_request(ce);
629         if (IS_ERR(q[i]))
630                 return PTR_ERR(q[i]);
631
632         i915_request_get(q[i]);
633         i915_request_add(q[i]);
634
635         return 0;
636 }
637
638 static int igt_ctx_exec(void *arg)
639 {
640         struct drm_i915_private *i915 = arg;
641         struct intel_engine_cs *engine;
642         int err = -ENODEV;
643
644         /*
645          * Create a few different contexts (with different mm) and write
646          * through each ctx/mm using the GPU making sure those writes end
647          * up in the expected pages of our obj.
648          */
649
650         if (!DRIVER_CAPS(i915)->has_logical_contexts)
651                 return 0;
652
653         for_each_uabi_engine(engine, i915) {
654                 struct drm_i915_gem_object *obj = NULL;
655                 unsigned long ncontexts, ndwords, dw;
656                 struct i915_request *tq[5] = {};
657                 struct igt_live_test t;
658                 IGT_TIMEOUT(end_time);
659                 LIST_HEAD(objects);
660                 struct file *file;
661
662                 if (!intel_engine_can_store_dword(engine))
663                         continue;
664
665                 if (!engine->context_size)
666                         continue; /* No logical context support in HW */
667
668                 file = mock_file(i915);
669                 if (IS_ERR(file))
670                         return PTR_ERR(file);
671
672                 err = igt_live_test_begin(&t, i915, __func__, engine->name);
673                 if (err)
674                         goto out_file;
675
676                 ncontexts = 0;
677                 ndwords = 0;
678                 dw = 0;
679                 while (!time_after(jiffies, end_time)) {
680                         struct i915_gem_context *ctx;
681                         struct intel_context *ce;
682
683                         ctx = kernel_context(i915);
684                         if (IS_ERR(ctx)) {
685                                 err = PTR_ERR(ctx);
686                                 goto out_file;
687                         }
688
689                         ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
690                         GEM_BUG_ON(IS_ERR(ce));
691
692                         if (!obj) {
693                                 obj = create_test_object(ce->vm, file, &objects);
694                                 if (IS_ERR(obj)) {
695                                         err = PTR_ERR(obj);
696                                         intel_context_put(ce);
697                                         kernel_context_close(ctx);
698                                         goto out_file;
699                                 }
700                         }
701
702                         err = gpu_fill(ce, obj, dw);
703                         if (err) {
704                                 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
705                                        ndwords, dw, max_dwords(obj),
706                                        engine->name,
707                                        yesno(!!rcu_access_pointer(ctx->vm)),
708                                        err);
709                                 intel_context_put(ce);
710                                 kernel_context_close(ctx);
711                                 goto out_file;
712                         }
713
714                         err = throttle(ce, tq, ARRAY_SIZE(tq));
715                         if (err) {
716                                 intel_context_put(ce);
717                                 kernel_context_close(ctx);
718                                 goto out_file;
719                         }
720
721                         if (++dw == max_dwords(obj)) {
722                                 obj = NULL;
723                                 dw = 0;
724                         }
725
726                         ndwords++;
727                         ncontexts++;
728
729                         intel_context_put(ce);
730                         kernel_context_close(ctx);
731                 }
732
733                 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
734                         ncontexts, engine->name, ndwords);
735
736                 ncontexts = dw = 0;
737                 list_for_each_entry(obj, &objects, st_link) {
738                         unsigned int rem =
739                                 min_t(unsigned int, ndwords - dw, max_dwords(obj));
740
741                         err = cpu_check(obj, ncontexts++, rem);
742                         if (err)
743                                 break;
744
745                         dw += rem;
746                 }
747
748 out_file:
749                 throttle_release(tq, ARRAY_SIZE(tq));
750                 if (igt_live_test_end(&t))
751                         err = -EIO;
752
753                 fput(file);
754                 if (err)
755                         return err;
756
757                 i915_gem_drain_freed_objects(i915);
758         }
759
760         return 0;
761 }
762
763 static int igt_shared_ctx_exec(void *arg)
764 {
765         struct drm_i915_private *i915 = arg;
766         struct i915_request *tq[5] = {};
767         struct i915_gem_context *parent;
768         struct intel_engine_cs *engine;
769         struct igt_live_test t;
770         struct file *file;
771         int err = 0;
772
773         /*
774          * Create a few different contexts with the same mm and write
775          * through each ctx using the GPU making sure those writes end
776          * up in the expected pages of our obj.
777          */
778         if (!DRIVER_CAPS(i915)->has_logical_contexts)
779                 return 0;
780
781         file = mock_file(i915);
782         if (IS_ERR(file))
783                 return PTR_ERR(file);
784
785         parent = live_context(i915, file);
786         if (IS_ERR(parent)) {
787                 err = PTR_ERR(parent);
788                 goto out_file;
789         }
790
791         if (!parent->vm) { /* not full-ppgtt; nothing to share */
792                 err = 0;
793                 goto out_file;
794         }
795
796         err = igt_live_test_begin(&t, i915, __func__, "");
797         if (err)
798                 goto out_file;
799
800         for_each_uabi_engine(engine, i915) {
801                 unsigned long ncontexts, ndwords, dw;
802                 struct drm_i915_gem_object *obj = NULL;
803                 IGT_TIMEOUT(end_time);
804                 LIST_HEAD(objects);
805
806                 if (!intel_engine_can_store_dword(engine))
807                         continue;
808
809                 dw = 0;
810                 ndwords = 0;
811                 ncontexts = 0;
812                 while (!time_after(jiffies, end_time)) {
813                         struct i915_gem_context *ctx;
814                         struct intel_context *ce;
815
816                         ctx = kernel_context(i915);
817                         if (IS_ERR(ctx)) {
818                                 err = PTR_ERR(ctx);
819                                 goto out_test;
820                         }
821
822                         mutex_lock(&ctx->mutex);
823                         __assign_ppgtt(ctx, ctx_vm(parent));
824                         mutex_unlock(&ctx->mutex);
825
826                         ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
827                         GEM_BUG_ON(IS_ERR(ce));
828
829                         if (!obj) {
830                                 obj = create_test_object(ctx_vm(parent),
831                                                          file, &objects);
832                                 if (IS_ERR(obj)) {
833                                         err = PTR_ERR(obj);
834                                         intel_context_put(ce);
835                                         kernel_context_close(ctx);
836                                         goto out_test;
837                                 }
838                         }
839
840                         err = gpu_fill(ce, obj, dw);
841                         if (err) {
842                                 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
843                                        ndwords, dw, max_dwords(obj),
844                                        engine->name,
845                                        yesno(!!rcu_access_pointer(ctx->vm)),
846                                        err);
847                                 intel_context_put(ce);
848                                 kernel_context_close(ctx);
849                                 goto out_test;
850                         }
851
852                         err = throttle(ce, tq, ARRAY_SIZE(tq));
853                         if (err) {
854                                 intel_context_put(ce);
855                                 kernel_context_close(ctx);
856                                 goto out_test;
857                         }
858
859                         if (++dw == max_dwords(obj)) {
860                                 obj = NULL;
861                                 dw = 0;
862                         }
863
864                         ndwords++;
865                         ncontexts++;
866
867                         intel_context_put(ce);
868                         kernel_context_close(ctx);
869                 }
870                 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
871                         ncontexts, engine->name, ndwords);
872
873                 ncontexts = dw = 0;
874                 list_for_each_entry(obj, &objects, st_link) {
875                         unsigned int rem =
876                                 min_t(unsigned int, ndwords - dw, max_dwords(obj));
877
878                         err = cpu_check(obj, ncontexts++, rem);
879                         if (err)
880                                 goto out_test;
881
882                         dw += rem;
883                 }
884
885                 i915_gem_drain_freed_objects(i915);
886         }
887 out_test:
888         throttle_release(tq, ARRAY_SIZE(tq));
889         if (igt_live_test_end(&t))
890                 err = -EIO;
891 out_file:
892         fput(file);
893         return err;
894 }
895
896 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *vma)
897 {
898         u32 *cmd;
899
900         GEM_BUG_ON(INTEL_GEN(vma->vm->i915) < 8);
901
902         cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB);
903         if (IS_ERR(cmd))
904                 return PTR_ERR(cmd);
905
906         *cmd++ = MI_STORE_REGISTER_MEM_GEN8;
907         *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
908         *cmd++ = lower_32_bits(vma->node.start);
909         *cmd++ = upper_32_bits(vma->node.start);
910         *cmd = MI_BATCH_BUFFER_END;
911
912         __i915_gem_object_flush_map(rpcs, 0, 64);
913         i915_gem_object_unpin_map(rpcs);
914
915         intel_gt_chipset_flush(vma->vm->gt);
916
917         return 0;
918 }
919
920 static int
921 emit_rpcs_query(struct drm_i915_gem_object *obj,
922                 struct intel_context *ce,
923                 struct i915_request **rq_out)
924 {
925         struct drm_i915_private *i915 = to_i915(obj->base.dev);
926         struct i915_request *rq;
927         struct i915_gem_ww_ctx ww;
928         struct i915_vma *batch;
929         struct i915_vma *vma;
930         struct drm_i915_gem_object *rpcs;
931         int err;
932
933         GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
934
935         if (INTEL_GEN(i915) < 8)
936                 return -EINVAL;
937
938         vma = i915_vma_instance(obj, ce->vm, NULL);
939         if (IS_ERR(vma))
940                 return PTR_ERR(vma);
941
942         rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE);
943         if (IS_ERR(rpcs))
944                 return PTR_ERR(rpcs);
945
946         batch = i915_vma_instance(rpcs, ce->vm, NULL);
947         if (IS_ERR(batch)) {
948                 err = PTR_ERR(batch);
949                 goto err_put;
950         }
951
952         i915_gem_ww_ctx_init(&ww, false);
953 retry:
954         err = i915_gem_object_lock(obj, &ww);
955         if (!err)
956                 err = i915_gem_object_lock(rpcs, &ww);
957         if (!err)
958                 err = i915_gem_object_set_to_gtt_domain(obj, false);
959         if (!err)
960                 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
961         if (err)
962                 goto err_put;
963
964         err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER);
965         if (err)
966                 goto err_vma;
967
968         err = rpcs_query_batch(rpcs, vma);
969         if (err)
970                 goto err_batch;
971
972         rq = i915_request_create(ce);
973         if (IS_ERR(rq)) {
974                 err = PTR_ERR(rq);
975                 goto err_batch;
976         }
977
978         err = i915_request_await_object(rq, batch->obj, false);
979         if (err == 0)
980                 err = i915_vma_move_to_active(batch, rq, 0);
981         if (err)
982                 goto skip_request;
983
984         err = i915_request_await_object(rq, vma->obj, true);
985         if (err == 0)
986                 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
987         if (err)
988                 goto skip_request;
989
990         if (rq->engine->emit_init_breadcrumb) {
991                 err = rq->engine->emit_init_breadcrumb(rq);
992                 if (err)
993                         goto skip_request;
994         }
995
996         err = rq->engine->emit_bb_start(rq,
997                                         batch->node.start, batch->node.size,
998                                         0);
999         if (err)
1000                 goto skip_request;
1001
1002         *rq_out = i915_request_get(rq);
1003
1004 skip_request:
1005         if (err)
1006                 i915_request_set_error_once(rq, err);
1007         i915_request_add(rq);
1008 err_batch:
1009         i915_vma_unpin(batch);
1010 err_vma:
1011         i915_vma_unpin(vma);
1012 err_put:
1013         if (err == -EDEADLK) {
1014                 err = i915_gem_ww_ctx_backoff(&ww);
1015                 if (!err)
1016                         goto retry;
1017         }
1018         i915_gem_ww_ctx_fini(&ww);
1019         i915_gem_object_put(rpcs);
1020         return err;
1021 }
1022
1023 #define TEST_IDLE       BIT(0)
1024 #define TEST_BUSY       BIT(1)
1025 #define TEST_RESET      BIT(2)
1026
1027 static int
1028 __sseu_prepare(const char *name,
1029                unsigned int flags,
1030                struct intel_context *ce,
1031                struct igt_spinner **spin)
1032 {
1033         struct i915_request *rq;
1034         int ret;
1035
1036         *spin = NULL;
1037         if (!(flags & (TEST_BUSY | TEST_RESET)))
1038                 return 0;
1039
1040         *spin = kzalloc(sizeof(**spin), GFP_KERNEL);
1041         if (!*spin)
1042                 return -ENOMEM;
1043
1044         ret = igt_spinner_init(*spin, ce->engine->gt);
1045         if (ret)
1046                 goto err_free;
1047
1048         rq = igt_spinner_create_request(*spin, ce, MI_NOOP);
1049         if (IS_ERR(rq)) {
1050                 ret = PTR_ERR(rq);
1051                 goto err_fini;
1052         }
1053
1054         i915_request_add(rq);
1055
1056         if (!igt_wait_for_spinner(*spin, rq)) {
1057                 pr_err("%s: Spinner failed to start!\n", name);
1058                 ret = -ETIMEDOUT;
1059                 goto err_end;
1060         }
1061
1062         return 0;
1063
1064 err_end:
1065         igt_spinner_end(*spin);
1066 err_fini:
1067         igt_spinner_fini(*spin);
1068 err_free:
1069         kfree(fetch_and_zero(spin));
1070         return ret;
1071 }
1072
1073 static int
1074 __read_slice_count(struct intel_context *ce,
1075                    struct drm_i915_gem_object *obj,
1076                    struct igt_spinner *spin,
1077                    u32 *rpcs)
1078 {
1079         struct i915_request *rq = NULL;
1080         u32 s_mask, s_shift;
1081         unsigned int cnt;
1082         u32 *buf, val;
1083         long ret;
1084
1085         ret = emit_rpcs_query(obj, ce, &rq);
1086         if (ret)
1087                 return ret;
1088
1089         if (spin)
1090                 igt_spinner_end(spin);
1091
1092         ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
1093         i915_request_put(rq);
1094         if (ret < 0)
1095                 return ret;
1096
1097         buf = i915_gem_object_pin_map(obj, I915_MAP_WB);
1098         if (IS_ERR(buf)) {
1099                 ret = PTR_ERR(buf);
1100                 return ret;
1101         }
1102
1103         if (INTEL_GEN(ce->engine->i915) >= 11) {
1104                 s_mask = GEN11_RPCS_S_CNT_MASK;
1105                 s_shift = GEN11_RPCS_S_CNT_SHIFT;
1106         } else {
1107                 s_mask = GEN8_RPCS_S_CNT_MASK;
1108                 s_shift = GEN8_RPCS_S_CNT_SHIFT;
1109         }
1110
1111         val = *buf;
1112         cnt = (val & s_mask) >> s_shift;
1113         *rpcs = val;
1114
1115         i915_gem_object_unpin_map(obj);
1116
1117         return cnt;
1118 }
1119
1120 static int
1121 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
1122              const char *prefix, const char *suffix)
1123 {
1124         if (slices == expected)
1125                 return 0;
1126
1127         if (slices < 0) {
1128                 pr_err("%s: %s read slice count failed with %d%s\n",
1129                        name, prefix, slices, suffix);
1130                 return slices;
1131         }
1132
1133         pr_err("%s: %s slice count %d is not %u%s\n",
1134                name, prefix, slices, expected, suffix);
1135
1136         pr_info("RPCS=0x%x; %u%sx%u%s\n",
1137                 rpcs, slices,
1138                 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
1139                 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
1140                 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
1141
1142         return -EINVAL;
1143 }
1144
1145 static int
1146 __sseu_finish(const char *name,
1147               unsigned int flags,
1148               struct intel_context *ce,
1149               struct drm_i915_gem_object *obj,
1150               unsigned int expected,
1151               struct igt_spinner *spin)
1152 {
1153         unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
1154         u32 rpcs = 0;
1155         int ret = 0;
1156
1157         if (flags & TEST_RESET) {
1158                 ret = intel_engine_reset(ce->engine, "sseu");
1159                 if (ret)
1160                         goto out;
1161         }
1162
1163         ret = __read_slice_count(ce, obj,
1164                                  flags & TEST_RESET ? NULL : spin, &rpcs);
1165         ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
1166         if (ret)
1167                 goto out;
1168
1169         ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs);
1170         ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
1171
1172 out:
1173         if (spin)
1174                 igt_spinner_end(spin);
1175
1176         if ((flags & TEST_IDLE) && ret == 0) {
1177                 ret = igt_flush_test(ce->engine->i915);
1178                 if (ret)
1179                         return ret;
1180
1181                 ret = __read_slice_count(ce, obj, NULL, &rpcs);
1182                 ret = __check_rpcs(name, rpcs, ret, expected,
1183                                    "Context", " after idle!");
1184         }
1185
1186         return ret;
1187 }
1188
1189 static int
1190 __sseu_test(const char *name,
1191             unsigned int flags,
1192             struct intel_context *ce,
1193             struct drm_i915_gem_object *obj,
1194             struct intel_sseu sseu)
1195 {
1196         struct igt_spinner *spin = NULL;
1197         int ret;
1198
1199         intel_engine_pm_get(ce->engine);
1200
1201         ret = __sseu_prepare(name, flags, ce, &spin);
1202         if (ret)
1203                 goto out_pm;
1204
1205         ret = intel_context_reconfigure_sseu(ce, sseu);
1206         if (ret)
1207                 goto out_spin;
1208
1209         ret = __sseu_finish(name, flags, ce, obj,
1210                             hweight32(sseu.slice_mask), spin);
1211
1212 out_spin:
1213         if (spin) {
1214                 igt_spinner_end(spin);
1215                 igt_spinner_fini(spin);
1216                 kfree(spin);
1217         }
1218 out_pm:
1219         intel_engine_pm_put(ce->engine);
1220         return ret;
1221 }
1222
1223 static int
1224 __igt_ctx_sseu(struct drm_i915_private *i915,
1225                const char *name,
1226                unsigned int flags)
1227 {
1228         struct drm_i915_gem_object *obj;
1229         int inst = 0;
1230         int ret = 0;
1231
1232         if (INTEL_GEN(i915) < 9)
1233                 return 0;
1234
1235         if (flags & TEST_RESET)
1236                 igt_global_reset_lock(&i915->gt);
1237
1238         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1239         if (IS_ERR(obj)) {
1240                 ret = PTR_ERR(obj);
1241                 goto out_unlock;
1242         }
1243
1244         do {
1245                 struct intel_engine_cs *engine;
1246                 struct intel_context *ce;
1247                 struct intel_sseu pg_sseu;
1248
1249                 engine = intel_engine_lookup_user(i915,
1250                                                   I915_ENGINE_CLASS_RENDER,
1251                                                   inst++);
1252                 if (!engine)
1253                         break;
1254
1255                 if (hweight32(engine->sseu.slice_mask) < 2)
1256                         continue;
1257
1258                 if (!engine->gt->info.sseu.has_slice_pg)
1259                         continue;
1260
1261                 /*
1262                  * Gen11 VME friendly power-gated configuration with
1263                  * half enabled sub-slices.
1264                  */
1265                 pg_sseu = engine->sseu;
1266                 pg_sseu.slice_mask = 1;
1267                 pg_sseu.subslice_mask =
1268                         ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
1269
1270                 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
1271                         engine->name, name, flags,
1272                         hweight32(engine->sseu.slice_mask),
1273                         hweight32(pg_sseu.slice_mask));
1274
1275                 ce = intel_context_create(engine);
1276                 if (IS_ERR(ce)) {
1277                         ret = PTR_ERR(ce);
1278                         goto out_put;
1279                 }
1280
1281                 ret = intel_context_pin(ce);
1282                 if (ret)
1283                         goto out_ce;
1284
1285                 /* First set the default mask. */
1286                 ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1287                 if (ret)
1288                         goto out_unpin;
1289
1290                 /* Then set a power-gated configuration. */
1291                 ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1292                 if (ret)
1293                         goto out_unpin;
1294
1295                 /* Back to defaults. */
1296                 ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1297                 if (ret)
1298                         goto out_unpin;
1299
1300                 /* One last power-gated configuration for the road. */
1301                 ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1302                 if (ret)
1303                         goto out_unpin;
1304
1305 out_unpin:
1306                 intel_context_unpin(ce);
1307 out_ce:
1308                 intel_context_put(ce);
1309         } while (!ret);
1310
1311         if (igt_flush_test(i915))
1312                 ret = -EIO;
1313
1314 out_put:
1315         i915_gem_object_put(obj);
1316
1317 out_unlock:
1318         if (flags & TEST_RESET)
1319                 igt_global_reset_unlock(&i915->gt);
1320
1321         if (ret)
1322                 pr_err("%s: Failed with %d!\n", name, ret);
1323
1324         return ret;
1325 }
1326
1327 static int igt_ctx_sseu(void *arg)
1328 {
1329         struct {
1330                 const char *name;
1331                 unsigned int flags;
1332         } *phase, phases[] = {
1333                 { .name = "basic", .flags = 0 },
1334                 { .name = "idle", .flags = TEST_IDLE },
1335                 { .name = "busy", .flags = TEST_BUSY },
1336                 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET },
1337                 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE },
1338                 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE },
1339         };
1340         unsigned int i;
1341         int ret = 0;
1342
1343         for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases);
1344              i++, phase++)
1345                 ret = __igt_ctx_sseu(arg, phase->name, phase->flags);
1346
1347         return ret;
1348 }
1349
1350 static int igt_ctx_readonly(void *arg)
1351 {
1352         struct drm_i915_private *i915 = arg;
1353         unsigned long idx, ndwords, dw, num_engines;
1354         struct drm_i915_gem_object *obj = NULL;
1355         struct i915_request *tq[5] = {};
1356         struct i915_gem_engines_iter it;
1357         struct i915_address_space *vm;
1358         struct i915_gem_context *ctx;
1359         struct intel_context *ce;
1360         struct igt_live_test t;
1361         I915_RND_STATE(prng);
1362         IGT_TIMEOUT(end_time);
1363         LIST_HEAD(objects);
1364         struct file *file;
1365         int err = -ENODEV;
1366
1367         /*
1368          * Create a few read-only objects (with the occasional writable object)
1369          * and try to write into these object checking that the GPU discards
1370          * any write to a read-only object.
1371          */
1372
1373         file = mock_file(i915);
1374         if (IS_ERR(file))
1375                 return PTR_ERR(file);
1376
1377         err = igt_live_test_begin(&t, i915, __func__, "");
1378         if (err)
1379                 goto out_file;
1380
1381         ctx = live_context(i915, file);
1382         if (IS_ERR(ctx)) {
1383                 err = PTR_ERR(ctx);
1384                 goto out_file;
1385         }
1386
1387         vm = ctx_vm(ctx) ?: &i915->ggtt.alias->vm;
1388         if (!vm || !vm->has_read_only) {
1389                 err = 0;
1390                 goto out_file;
1391         }
1392
1393         num_engines = 0;
1394         for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
1395                 if (intel_engine_can_store_dword(ce->engine))
1396                         num_engines++;
1397         i915_gem_context_unlock_engines(ctx);
1398
1399         ndwords = 0;
1400         dw = 0;
1401         while (!time_after(jiffies, end_time)) {
1402                 for_each_gem_engine(ce,
1403                                     i915_gem_context_lock_engines(ctx), it) {
1404                         if (!intel_engine_can_store_dword(ce->engine))
1405                                 continue;
1406
1407                         if (!obj) {
1408                                 obj = create_test_object(ce->vm, file, &objects);
1409                                 if (IS_ERR(obj)) {
1410                                         err = PTR_ERR(obj);
1411                                         i915_gem_context_unlock_engines(ctx);
1412                                         goto out_file;
1413                                 }
1414
1415                                 if (prandom_u32_state(&prng) & 1)
1416                                         i915_gem_object_set_readonly(obj);
1417                         }
1418
1419                         err = gpu_fill(ce, obj, dw);
1420                         if (err) {
1421                                 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
1422                                        ndwords, dw, max_dwords(obj),
1423                                        ce->engine->name,
1424                                        yesno(!!ctx_vm(ctx)),
1425                                        err);
1426                                 i915_gem_context_unlock_engines(ctx);
1427                                 goto out_file;
1428                         }
1429
1430                         err = throttle(ce, tq, ARRAY_SIZE(tq));
1431                         if (err) {
1432                                 i915_gem_context_unlock_engines(ctx);
1433                                 goto out_file;
1434                         }
1435
1436                         if (++dw == max_dwords(obj)) {
1437                                 obj = NULL;
1438                                 dw = 0;
1439                         }
1440                         ndwords++;
1441                 }
1442                 i915_gem_context_unlock_engines(ctx);
1443         }
1444         pr_info("Submitted %lu dwords (across %lu engines)\n",
1445                 ndwords, num_engines);
1446
1447         dw = 0;
1448         idx = 0;
1449         list_for_each_entry(obj, &objects, st_link) {
1450                 unsigned int rem =
1451                         min_t(unsigned int, ndwords - dw, max_dwords(obj));
1452                 unsigned int num_writes;
1453
1454                 num_writes = rem;
1455                 if (i915_gem_object_is_readonly(obj))
1456                         num_writes = 0;
1457
1458                 err = cpu_check(obj, idx++, num_writes);
1459                 if (err)
1460                         break;
1461
1462                 dw += rem;
1463         }
1464
1465 out_file:
1466         throttle_release(tq, ARRAY_SIZE(tq));
1467         if (igt_live_test_end(&t))
1468                 err = -EIO;
1469
1470         fput(file);
1471         return err;
1472 }
1473
1474 static int check_scratch(struct i915_address_space *vm, u64 offset)
1475 {
1476         struct drm_mm_node *node;
1477
1478         mutex_lock(&vm->mutex);
1479         node = __drm_mm_interval_first(&vm->mm,
1480                                        offset, offset + sizeof(u32) - 1);
1481         mutex_unlock(&vm->mutex);
1482         if (!node || node->start > offset)
1483                 return 0;
1484
1485         GEM_BUG_ON(offset >= node->start + node->size);
1486
1487         pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n",
1488                upper_32_bits(offset), lower_32_bits(offset));
1489         return -EINVAL;
1490 }
1491
1492 static int write_to_scratch(struct i915_gem_context *ctx,
1493                             struct intel_engine_cs *engine,
1494                             u64 offset, u32 value)
1495 {
1496         struct drm_i915_private *i915 = ctx->i915;
1497         struct drm_i915_gem_object *obj;
1498         struct i915_address_space *vm;
1499         struct i915_request *rq;
1500         struct i915_vma *vma;
1501         u32 *cmd;
1502         int err;
1503
1504         GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1505
1506         err = check_scratch(ctx_vm(ctx), offset);
1507         if (err)
1508                 return err;
1509
1510         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1511         if (IS_ERR(obj))
1512                 return PTR_ERR(obj);
1513
1514         cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
1515         if (IS_ERR(cmd)) {
1516                 err = PTR_ERR(cmd);
1517                 goto out;
1518         }
1519
1520         *cmd++ = MI_STORE_DWORD_IMM_GEN4;
1521         if (INTEL_GEN(i915) >= 8) {
1522                 *cmd++ = lower_32_bits(offset);
1523                 *cmd++ = upper_32_bits(offset);
1524         } else {
1525                 *cmd++ = 0;
1526                 *cmd++ = offset;
1527         }
1528         *cmd++ = value;
1529         *cmd = MI_BATCH_BUFFER_END;
1530         __i915_gem_object_flush_map(obj, 0, 64);
1531         i915_gem_object_unpin_map(obj);
1532
1533         intel_gt_chipset_flush(engine->gt);
1534
1535         vm = i915_gem_context_get_vm_rcu(ctx);
1536         vma = i915_vma_instance(obj, vm, NULL);
1537         if (IS_ERR(vma)) {
1538                 err = PTR_ERR(vma);
1539                 goto out_vm;
1540         }
1541
1542         err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1543         if (err)
1544                 goto out_vm;
1545
1546         rq = igt_request_alloc(ctx, engine);
1547         if (IS_ERR(rq)) {
1548                 err = PTR_ERR(rq);
1549                 goto err_unpin;
1550         }
1551
1552         i915_vma_lock(vma);
1553         err = i915_request_await_object(rq, vma->obj, false);
1554         if (err == 0)
1555                 err = i915_vma_move_to_active(vma, rq, 0);
1556         i915_vma_unlock(vma);
1557         if (err)
1558                 goto skip_request;
1559
1560         if (rq->engine->emit_init_breadcrumb) {
1561                 err = rq->engine->emit_init_breadcrumb(rq);
1562                 if (err)
1563                         goto skip_request;
1564         }
1565
1566         err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
1567         if (err)
1568                 goto skip_request;
1569
1570         i915_vma_unpin(vma);
1571
1572         i915_request_add(rq);
1573
1574         goto out_vm;
1575 skip_request:
1576         i915_request_set_error_once(rq, err);
1577         i915_request_add(rq);
1578 err_unpin:
1579         i915_vma_unpin(vma);
1580 out_vm:
1581         i915_vm_put(vm);
1582 out:
1583         i915_gem_object_put(obj);
1584         return err;
1585 }
1586
1587 static int read_from_scratch(struct i915_gem_context *ctx,
1588                              struct intel_engine_cs *engine,
1589                              u64 offset, u32 *value)
1590 {
1591         struct drm_i915_private *i915 = ctx->i915;
1592         struct drm_i915_gem_object *obj;
1593         struct i915_address_space *vm;
1594         const u32 result = 0x100;
1595         struct i915_request *rq;
1596         struct i915_vma *vma;
1597         unsigned int flags;
1598         u32 *cmd;
1599         int err;
1600
1601         GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1602
1603         err = check_scratch(ctx_vm(ctx), offset);
1604         if (err)
1605                 return err;
1606
1607         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1608         if (IS_ERR(obj))
1609                 return PTR_ERR(obj);
1610
1611         if (INTEL_GEN(i915) >= 8) {
1612                 const u32 GPR0 = engine->mmio_base + 0x600;
1613
1614                 vm = i915_gem_context_get_vm_rcu(ctx);
1615                 vma = i915_vma_instance(obj, vm, NULL);
1616                 if (IS_ERR(vma)) {
1617                         err = PTR_ERR(vma);
1618                         goto out_vm;
1619                 }
1620
1621                 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1622                 if (err)
1623                         goto out_vm;
1624
1625                 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
1626                 if (IS_ERR(cmd)) {
1627                         err = PTR_ERR(cmd);
1628                         goto out;
1629                 }
1630
1631                 memset(cmd, POISON_INUSE, PAGE_SIZE);
1632                 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
1633                 *cmd++ = GPR0;
1634                 *cmd++ = lower_32_bits(offset);
1635                 *cmd++ = upper_32_bits(offset);
1636                 *cmd++ = MI_STORE_REGISTER_MEM_GEN8;
1637                 *cmd++ = GPR0;
1638                 *cmd++ = result;
1639                 *cmd++ = 0;
1640                 *cmd = MI_BATCH_BUFFER_END;
1641
1642                 i915_gem_object_flush_map(obj);
1643                 i915_gem_object_unpin_map(obj);
1644
1645                 flags = 0;
1646         } else {
1647                 const u32 reg = engine->mmio_base + 0x420;
1648
1649                 /* hsw: register access even to 3DPRIM! is protected */
1650                 vm = i915_vm_get(&engine->gt->ggtt->vm);
1651                 vma = i915_vma_instance(obj, vm, NULL);
1652                 if (IS_ERR(vma)) {
1653                         err = PTR_ERR(vma);
1654                         goto out_vm;
1655                 }
1656
1657                 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1658                 if (err)
1659                         goto out_vm;
1660
1661                 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
1662                 if (IS_ERR(cmd)) {
1663                         err = PTR_ERR(cmd);
1664                         goto out;
1665                 }
1666
1667                 memset(cmd, POISON_INUSE, PAGE_SIZE);
1668                 *cmd++ = MI_LOAD_REGISTER_MEM;
1669                 *cmd++ = reg;
1670                 *cmd++ = offset;
1671                 *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
1672                 *cmd++ = reg;
1673                 *cmd++ = vma->node.start + result;
1674                 *cmd = MI_BATCH_BUFFER_END;
1675
1676                 i915_gem_object_flush_map(obj);
1677                 i915_gem_object_unpin_map(obj);
1678
1679                 flags = I915_DISPATCH_SECURE;
1680         }
1681
1682         intel_gt_chipset_flush(engine->gt);
1683
1684         rq = igt_request_alloc(ctx, engine);
1685         if (IS_ERR(rq)) {
1686                 err = PTR_ERR(rq);
1687                 goto err_unpin;
1688         }
1689
1690         i915_vma_lock(vma);
1691         err = i915_request_await_object(rq, vma->obj, true);
1692         if (err == 0)
1693                 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1694         i915_vma_unlock(vma);
1695         if (err)
1696                 goto skip_request;
1697
1698         if (rq->engine->emit_init_breadcrumb) {
1699                 err = rq->engine->emit_init_breadcrumb(rq);
1700                 if (err)
1701                         goto skip_request;
1702         }
1703
1704         err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags);
1705         if (err)
1706                 goto skip_request;
1707
1708         i915_vma_unpin(vma);
1709
1710         i915_request_add(rq);
1711
1712         i915_gem_object_lock(obj, NULL);
1713         err = i915_gem_object_set_to_cpu_domain(obj, false);
1714         i915_gem_object_unlock(obj);
1715         if (err)
1716                 goto out_vm;
1717
1718         cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
1719         if (IS_ERR(cmd)) {
1720                 err = PTR_ERR(cmd);
1721                 goto out_vm;
1722         }
1723
1724         *value = cmd[result / sizeof(*cmd)];
1725         i915_gem_object_unpin_map(obj);
1726
1727         goto out_vm;
1728 skip_request:
1729         i915_request_set_error_once(rq, err);
1730         i915_request_add(rq);
1731 err_unpin:
1732         i915_vma_unpin(vma);
1733 out_vm:
1734         i915_vm_put(vm);
1735 out:
1736         i915_gem_object_put(obj);
1737         return err;
1738 }
1739
1740 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
1741 {
1742         struct i915_address_space *vm;
1743         struct page *page;
1744         u32 *vaddr;
1745         int err = 0;
1746
1747         vm = ctx_vm(ctx);
1748         if (!vm)
1749                 return -ENODEV;
1750
1751         page = __px_page(vm->scratch[0]);
1752         if (!page) {
1753                 pr_err("No scratch page!\n");
1754                 return -EINVAL;
1755         }
1756
1757         vaddr = kmap(page);
1758         if (!vaddr) {
1759                 pr_err("No (mappable) scratch page!\n");
1760                 return -EINVAL;
1761         }
1762
1763         memcpy(out, vaddr, sizeof(*out));
1764         if (memchr_inv(vaddr, *out, PAGE_SIZE)) {
1765                 pr_err("Inconsistent initial state of scratch page!\n");
1766                 err = -EINVAL;
1767         }
1768         kunmap(page);
1769
1770         return err;
1771 }
1772
1773 static int igt_vm_isolation(void *arg)
1774 {
1775         struct drm_i915_private *i915 = arg;
1776         struct i915_gem_context *ctx_a, *ctx_b;
1777         unsigned long num_engines, count;
1778         struct intel_engine_cs *engine;
1779         struct igt_live_test t;
1780         I915_RND_STATE(prng);
1781         struct file *file;
1782         u64 vm_total;
1783         u32 expected;
1784         int err;
1785
1786         if (INTEL_GEN(i915) < 7)
1787                 return 0;
1788
1789         /*
1790          * The simple goal here is that a write into one context is not
1791          * observed in a second (separate page tables and scratch).
1792          */
1793
1794         file = mock_file(i915);
1795         if (IS_ERR(file))
1796                 return PTR_ERR(file);
1797
1798         err = igt_live_test_begin(&t, i915, __func__, "");
1799         if (err)
1800                 goto out_file;
1801
1802         ctx_a = live_context(i915, file);
1803         if (IS_ERR(ctx_a)) {
1804                 err = PTR_ERR(ctx_a);
1805                 goto out_file;
1806         }
1807
1808         ctx_b = live_context(i915, file);
1809         if (IS_ERR(ctx_b)) {
1810                 err = PTR_ERR(ctx_b);
1811                 goto out_file;
1812         }
1813
1814         /* We can only test vm isolation, if the vm are distinct */
1815         if (ctx_vm(ctx_a) == ctx_vm(ctx_b))
1816                 goto out_file;
1817
1818         /* Read the initial state of the scratch page */
1819         err = check_scratch_page(ctx_a, &expected);
1820         if (err)
1821                 goto out_file;
1822
1823         err = check_scratch_page(ctx_b, &expected);
1824         if (err)
1825                 goto out_file;
1826
1827         vm_total = ctx_vm(ctx_a)->total;
1828         GEM_BUG_ON(ctx_vm(ctx_b)->total != vm_total);
1829
1830         count = 0;
1831         num_engines = 0;
1832         for_each_uabi_engine(engine, i915) {
1833                 IGT_TIMEOUT(end_time);
1834                 unsigned long this = 0;
1835
1836                 if (!intel_engine_can_store_dword(engine))
1837                         continue;
1838
1839                 /* Not all engines have their own GPR! */
1840                 if (INTEL_GEN(i915) < 8 && engine->class != RENDER_CLASS)
1841                         continue;
1842
1843                 while (!__igt_timeout(end_time, NULL)) {
1844                         u32 value = 0xc5c5c5c5;
1845                         u64 offset;
1846
1847                         /* Leave enough space at offset 0 for the batch */
1848                         offset = igt_random_offset(&prng,
1849                                                    I915_GTT_PAGE_SIZE, vm_total,
1850                                                    sizeof(u32), alignof_dword);
1851
1852                         err = write_to_scratch(ctx_a, engine,
1853                                                offset, 0xdeadbeef);
1854                         if (err == 0)
1855                                 err = read_from_scratch(ctx_b, engine,
1856                                                         offset, &value);
1857                         if (err)
1858                                 goto out_file;
1859
1860                         if (value != expected) {
1861                                 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
1862                                        engine->name, value,
1863                                        upper_32_bits(offset),
1864                                        lower_32_bits(offset),
1865                                        this);
1866                                 err = -EINVAL;
1867                                 goto out_file;
1868                         }
1869
1870                         this++;
1871                 }
1872                 count += this;
1873                 num_engines++;
1874         }
1875         pr_info("Checked %lu scratch offsets across %lu engines\n",
1876                 count, num_engines);
1877
1878 out_file:
1879         if (igt_live_test_end(&t))
1880                 err = -EIO;
1881         fput(file);
1882         return err;
1883 }
1884
1885 static bool skip_unused_engines(struct intel_context *ce, void *data)
1886 {
1887         return !ce->state;
1888 }
1889
1890 static void mock_barrier_task(void *data)
1891 {
1892         unsigned int *counter = data;
1893
1894         ++*counter;
1895 }
1896
1897 static int mock_context_barrier(void *arg)
1898 {
1899 #undef pr_fmt
1900 #define pr_fmt(x) "context_barrier_task():" # x
1901         struct drm_i915_private *i915 = arg;
1902         struct i915_gem_context *ctx;
1903         struct i915_request *rq;
1904         unsigned int counter;
1905         int err;
1906
1907         /*
1908          * The context barrier provides us with a callback after it emits
1909          * a request; useful for retiring old state after loading new.
1910          */
1911
1912         ctx = mock_context(i915, "mock");
1913         if (!ctx)
1914                 return -ENOMEM;
1915
1916         counter = 0;
1917         err = context_barrier_task(ctx, 0, NULL, NULL, NULL,
1918                                    mock_barrier_task, &counter);
1919         if (err) {
1920                 pr_err("Failed at line %d, err=%d\n", __LINE__, err);
1921                 goto out;
1922         }
1923         if (counter == 0) {
1924                 pr_err("Did not retire immediately with 0 engines\n");
1925                 err = -EINVAL;
1926                 goto out;
1927         }
1928
1929         counter = 0;
1930         err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines,
1931                                    NULL, NULL, mock_barrier_task, &counter);
1932         if (err) {
1933                 pr_err("Failed at line %d, err=%d\n", __LINE__, err);
1934                 goto out;
1935         }
1936         if (counter == 0) {
1937                 pr_err("Did not retire immediately for all unused engines\n");
1938                 err = -EINVAL;
1939                 goto out;
1940         }
1941
1942         rq = igt_request_alloc(ctx, i915->gt.engine[RCS0]);
1943         if (IS_ERR(rq)) {
1944                 pr_err("Request allocation failed!\n");
1945                 goto out;
1946         }
1947         i915_request_add(rq);
1948
1949         counter = 0;
1950         context_barrier_inject_fault = BIT(RCS0);
1951         err = context_barrier_task(ctx, ALL_ENGINES, NULL, NULL, NULL,
1952                                    mock_barrier_task, &counter);
1953         context_barrier_inject_fault = 0;
1954         if (err == -ENXIO)
1955                 err = 0;
1956         else
1957                 pr_err("Did not hit fault injection!\n");
1958         if (counter != 0) {
1959                 pr_err("Invoked callback on error!\n");
1960                 err = -EIO;
1961         }
1962         if (err)
1963                 goto out;
1964
1965         counter = 0;
1966         err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines,
1967                                    NULL, NULL, mock_barrier_task, &counter);
1968         if (err) {
1969                 pr_err("Failed at line %d, err=%d\n", __LINE__, err);
1970                 goto out;
1971         }
1972         mock_device_flush(i915);
1973         if (counter == 0) {
1974                 pr_err("Did not retire on each active engines\n");
1975                 err = -EINVAL;
1976                 goto out;
1977         }
1978
1979 out:
1980         mock_context_close(ctx);
1981         return err;
1982 #undef pr_fmt
1983 #define pr_fmt(x) x
1984 }
1985
1986 int i915_gem_context_mock_selftests(void)
1987 {
1988         static const struct i915_subtest tests[] = {
1989                 SUBTEST(mock_context_barrier),
1990         };
1991         struct drm_i915_private *i915;
1992         int err;
1993
1994         i915 = mock_gem_device();
1995         if (!i915)
1996                 return -ENOMEM;
1997
1998         err = i915_subtests(tests, i915);
1999
2000         mock_destroy_device(i915);
2001         return err;
2002 }
2003
2004 int i915_gem_context_live_selftests(struct drm_i915_private *i915)
2005 {
2006         static const struct i915_subtest tests[] = {
2007                 SUBTEST(live_nop_switch),
2008                 SUBTEST(live_parallel_switch),
2009                 SUBTEST(igt_ctx_exec),
2010                 SUBTEST(igt_ctx_readonly),
2011                 SUBTEST(igt_ctx_sseu),
2012                 SUBTEST(igt_shared_ctx_exec),
2013                 SUBTEST(igt_vm_isolation),
2014         };
2015
2016         if (intel_gt_is_wedged(&i915->gt))
2017                 return 0;
2018
2019         return i915_live_subtests(tests, i915);
2020 }