GNU Linux-libre 5.10.215-gnu1
[releases.git] / drivers / gpu / drm / i915 / gt / selftest_lrc.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6
7 #include <linux/prime_numbers.h>
8
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12 #include "gt/selftest_engine_heartbeat.h"
13
14 #include "i915_selftest.h"
15 #include "selftests/i915_random.h"
16 #include "selftests/igt_flush_test.h"
17 #include "selftests/igt_live_test.h"
18 #include "selftests/igt_spinner.h"
19 #include "selftests/lib_sw_fence.h"
20
21 #include "gem/selftests/igt_gem_utils.h"
22 #include "gem/selftests/mock_context.h"
23
24 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
25 #define NUM_GPR 16
26 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
27
28 static struct i915_vma *create_scratch(struct intel_gt *gt)
29 {
30         struct drm_i915_gem_object *obj;
31         struct i915_vma *vma;
32         int err;
33
34         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
35         if (IS_ERR(obj))
36                 return ERR_CAST(obj);
37
38         i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
39
40         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
41         if (IS_ERR(vma)) {
42                 i915_gem_object_put(obj);
43                 return vma;
44         }
45
46         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
47         if (err) {
48                 i915_gem_object_put(obj);
49                 return ERR_PTR(err);
50         }
51
52         return vma;
53 }
54
55 static bool is_active(struct i915_request *rq)
56 {
57         if (i915_request_is_active(rq))
58                 return true;
59
60         if (i915_request_on_hold(rq))
61                 return true;
62
63         if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
64                 return true;
65
66         return false;
67 }
68
69 static int wait_for_submit(struct intel_engine_cs *engine,
70                            struct i915_request *rq,
71                            unsigned long timeout)
72 {
73         timeout += jiffies;
74         do {
75                 bool done = time_after(jiffies, timeout);
76
77                 if (i915_request_completed(rq)) /* that was quick! */
78                         return 0;
79
80                 /* Wait until the HW has acknowleged the submission (or err) */
81                 intel_engine_flush_submission(engine);
82                 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
83                         return 0;
84
85                 if (done)
86                         return -ETIME;
87
88                 cond_resched();
89         } while (1);
90 }
91
92 static int wait_for_reset(struct intel_engine_cs *engine,
93                           struct i915_request *rq,
94                           unsigned long timeout)
95 {
96         timeout += jiffies;
97
98         do {
99                 cond_resched();
100                 intel_engine_flush_submission(engine);
101
102                 if (READ_ONCE(engine->execlists.pending[0]))
103                         continue;
104
105                 if (i915_request_completed(rq))
106                         break;
107
108                 if (READ_ONCE(rq->fence.error))
109                         break;
110         } while (time_before(jiffies, timeout));
111
112         flush_scheduled_work();
113
114         if (rq->fence.error != -EIO) {
115                 pr_err("%s: hanging request %llx:%lld not reset\n",
116                        engine->name,
117                        rq->fence.context,
118                        rq->fence.seqno);
119                 return -EINVAL;
120         }
121
122         /* Give the request a jiffie to complete after flushing the worker */
123         if (i915_request_wait(rq, 0,
124                               max(0l, (long)(timeout - jiffies)) + 1) < 0) {
125                 pr_err("%s: hanging request %llx:%lld did not complete\n",
126                        engine->name,
127                        rq->fence.context,
128                        rq->fence.seqno);
129                 return -ETIME;
130         }
131
132         return 0;
133 }
134
135 static int live_sanitycheck(void *arg)
136 {
137         struct intel_gt *gt = arg;
138         struct intel_engine_cs *engine;
139         enum intel_engine_id id;
140         struct igt_spinner spin;
141         int err = 0;
142
143         if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
144                 return 0;
145
146         if (igt_spinner_init(&spin, gt))
147                 return -ENOMEM;
148
149         for_each_engine(engine, gt, id) {
150                 struct intel_context *ce;
151                 struct i915_request *rq;
152
153                 ce = intel_context_create(engine);
154                 if (IS_ERR(ce)) {
155                         err = PTR_ERR(ce);
156                         break;
157                 }
158
159                 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
160                 if (IS_ERR(rq)) {
161                         err = PTR_ERR(rq);
162                         goto out_ctx;
163                 }
164
165                 i915_request_add(rq);
166                 if (!igt_wait_for_spinner(&spin, rq)) {
167                         GEM_TRACE("spinner failed to start\n");
168                         GEM_TRACE_DUMP();
169                         intel_gt_set_wedged(gt);
170                         err = -EIO;
171                         goto out_ctx;
172                 }
173
174                 igt_spinner_end(&spin);
175                 if (igt_flush_test(gt->i915)) {
176                         err = -EIO;
177                         goto out_ctx;
178                 }
179
180 out_ctx:
181                 intel_context_put(ce);
182                 if (err)
183                         break;
184         }
185
186         igt_spinner_fini(&spin);
187         return err;
188 }
189
190 static int live_unlite_restore(struct intel_gt *gt, int prio)
191 {
192         struct intel_engine_cs *engine;
193         enum intel_engine_id id;
194         struct igt_spinner spin;
195         int err = -ENOMEM;
196
197         /*
198          * Check that we can correctly context switch between 2 instances
199          * on the same engine from the same parent context.
200          */
201
202         if (igt_spinner_init(&spin, gt))
203                 return err;
204
205         err = 0;
206         for_each_engine(engine, gt, id) {
207                 struct intel_context *ce[2] = {};
208                 struct i915_request *rq[2];
209                 struct igt_live_test t;
210                 int n;
211
212                 if (prio && !intel_engine_has_preemption(engine))
213                         continue;
214
215                 if (!intel_engine_can_store_dword(engine))
216                         continue;
217
218                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
219                         err = -EIO;
220                         break;
221                 }
222                 st_engine_heartbeat_disable(engine);
223
224                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
225                         struct intel_context *tmp;
226
227                         tmp = intel_context_create(engine);
228                         if (IS_ERR(tmp)) {
229                                 err = PTR_ERR(tmp);
230                                 goto err_ce;
231                         }
232
233                         err = intel_context_pin(tmp);
234                         if (err) {
235                                 intel_context_put(tmp);
236                                 goto err_ce;
237                         }
238
239                         /*
240                          * Setup the pair of contexts such that if we
241                          * lite-restore using the RING_TAIL from ce[1] it
242                          * will execute garbage from ce[0]->ring.
243                          */
244                         memset(tmp->ring->vaddr,
245                                POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
246                                tmp->ring->vma->size);
247
248                         ce[n] = tmp;
249                 }
250                 GEM_BUG_ON(!ce[1]->ring->size);
251                 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
252                 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
253
254                 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
255                 if (IS_ERR(rq[0])) {
256                         err = PTR_ERR(rq[0]);
257                         goto err_ce;
258                 }
259
260                 i915_request_get(rq[0]);
261                 i915_request_add(rq[0]);
262                 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
263
264                 if (!igt_wait_for_spinner(&spin, rq[0])) {
265                         i915_request_put(rq[0]);
266                         goto err_ce;
267                 }
268
269                 rq[1] = i915_request_create(ce[1]);
270                 if (IS_ERR(rq[1])) {
271                         err = PTR_ERR(rq[1]);
272                         i915_request_put(rq[0]);
273                         goto err_ce;
274                 }
275
276                 if (!prio) {
277                         /*
278                          * Ensure we do the switch to ce[1] on completion.
279                          *
280                          * rq[0] is already submitted, so this should reduce
281                          * to a no-op (a wait on a request on the same engine
282                          * uses the submit fence, not the completion fence),
283                          * but it will install a dependency on rq[1] for rq[0]
284                          * that will prevent the pair being reordered by
285                          * timeslicing.
286                          */
287                         i915_request_await_dma_fence(rq[1], &rq[0]->fence);
288                 }
289
290                 i915_request_get(rq[1]);
291                 i915_request_add(rq[1]);
292                 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
293                 i915_request_put(rq[0]);
294
295                 if (prio) {
296                         struct i915_sched_attr attr = {
297                                 .priority = prio,
298                         };
299
300                         /* Alternatively preempt the spinner with ce[1] */
301                         engine->schedule(rq[1], &attr);
302                 }
303
304                 /* And switch back to ce[0] for good measure */
305                 rq[0] = i915_request_create(ce[0]);
306                 if (IS_ERR(rq[0])) {
307                         err = PTR_ERR(rq[0]);
308                         i915_request_put(rq[1]);
309                         goto err_ce;
310                 }
311
312                 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
313                 i915_request_get(rq[0]);
314                 i915_request_add(rq[0]);
315                 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
316                 i915_request_put(rq[1]);
317                 i915_request_put(rq[0]);
318
319 err_ce:
320                 intel_engine_flush_submission(engine);
321                 igt_spinner_end(&spin);
322                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
323                         if (IS_ERR_OR_NULL(ce[n]))
324                                 break;
325
326                         intel_context_unpin(ce[n]);
327                         intel_context_put(ce[n]);
328                 }
329
330                 st_engine_heartbeat_enable(engine);
331                 if (igt_live_test_end(&t))
332                         err = -EIO;
333                 if (err)
334                         break;
335         }
336
337         igt_spinner_fini(&spin);
338         return err;
339 }
340
341 static int live_unlite_switch(void *arg)
342 {
343         return live_unlite_restore(arg, 0);
344 }
345
346 static int live_unlite_preempt(void *arg)
347 {
348         return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
349 }
350
351 static int live_unlite_ring(void *arg)
352 {
353         struct intel_gt *gt = arg;
354         struct intel_engine_cs *engine;
355         struct igt_spinner spin;
356         enum intel_engine_id id;
357         int err = 0;
358
359         /*
360          * Setup a preemption event that will cause almost the entire ring
361          * to be unwound, potentially fooling our intel_ring_direction()
362          * into emitting a forward lite-restore instead of the rollback.
363          */
364
365         if (igt_spinner_init(&spin, gt))
366                 return -ENOMEM;
367
368         for_each_engine(engine, gt, id) {
369                 struct intel_context *ce[2] = {};
370                 struct i915_request *rq;
371                 struct igt_live_test t;
372                 int n;
373
374                 if (!intel_engine_has_preemption(engine))
375                         continue;
376
377                 if (!intel_engine_can_store_dword(engine))
378                         continue;
379
380                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
381                         err = -EIO;
382                         break;
383                 }
384                 st_engine_heartbeat_disable(engine);
385
386                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
387                         struct intel_context *tmp;
388
389                         tmp = intel_context_create(engine);
390                         if (IS_ERR(tmp)) {
391                                 err = PTR_ERR(tmp);
392                                 goto err_ce;
393                         }
394
395                         err = intel_context_pin(tmp);
396                         if (err) {
397                                 intel_context_put(tmp);
398                                 goto err_ce;
399                         }
400
401                         memset32(tmp->ring->vaddr,
402                                  0xdeadbeef, /* trigger a hang if executed */
403                                  tmp->ring->vma->size / sizeof(u32));
404
405                         ce[n] = tmp;
406                 }
407
408                 /* Create max prio spinner, followed by N low prio nops */
409                 rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
410                 if (IS_ERR(rq)) {
411                         err = PTR_ERR(rq);
412                         goto err_ce;
413                 }
414
415                 i915_request_get(rq);
416                 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
417                 i915_request_add(rq);
418
419                 if (!igt_wait_for_spinner(&spin, rq)) {
420                         intel_gt_set_wedged(gt);
421                         i915_request_put(rq);
422                         err = -ETIME;
423                         goto err_ce;
424                 }
425
426                 /* Fill the ring, until we will cause a wrap */
427                 n = 0;
428                 while (intel_ring_direction(ce[0]->ring,
429                                             rq->wa_tail,
430                                             ce[0]->ring->tail) <= 0) {
431                         struct i915_request *tmp;
432
433                         tmp = intel_context_create_request(ce[0]);
434                         if (IS_ERR(tmp)) {
435                                 err = PTR_ERR(tmp);
436                                 i915_request_put(rq);
437                                 goto err_ce;
438                         }
439
440                         i915_request_add(tmp);
441                         intel_engine_flush_submission(engine);
442                         n++;
443                 }
444                 intel_engine_flush_submission(engine);
445                 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
446                          engine->name, n,
447                          ce[0]->ring->size,
448                          ce[0]->ring->tail,
449                          ce[0]->ring->emit,
450                          rq->tail);
451                 GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
452                                                 rq->tail,
453                                                 ce[0]->ring->tail) <= 0);
454                 i915_request_put(rq);
455
456                 /* Create a second ring to preempt the first ring after rq[0] */
457                 rq = intel_context_create_request(ce[1]);
458                 if (IS_ERR(rq)) {
459                         err = PTR_ERR(rq);
460                         goto err_ce;
461                 }
462
463                 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
464                 i915_request_get(rq);
465                 i915_request_add(rq);
466
467                 err = wait_for_submit(engine, rq, HZ / 2);
468                 i915_request_put(rq);
469                 if (err) {
470                         pr_err("%s: preemption request was not submitted\n",
471                                engine->name);
472                         err = -ETIME;
473                 }
474
475                 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
476                          engine->name,
477                          ce[0]->ring->tail, ce[0]->ring->emit,
478                          ce[1]->ring->tail, ce[1]->ring->emit);
479
480 err_ce:
481                 intel_engine_flush_submission(engine);
482                 igt_spinner_end(&spin);
483                 for (n = 0; n < ARRAY_SIZE(ce); n++) {
484                         if (IS_ERR_OR_NULL(ce[n]))
485                                 break;
486
487                         intel_context_unpin(ce[n]);
488                         intel_context_put(ce[n]);
489                 }
490                 st_engine_heartbeat_enable(engine);
491                 if (igt_live_test_end(&t))
492                         err = -EIO;
493                 if (err)
494                         break;
495         }
496
497         igt_spinner_fini(&spin);
498         return err;
499 }
500
501 static int live_pin_rewind(void *arg)
502 {
503         struct intel_gt *gt = arg;
504         struct intel_engine_cs *engine;
505         enum intel_engine_id id;
506         int err = 0;
507
508         /*
509          * We have to be careful not to trust intel_ring too much, for example
510          * ring->head is updated upon retire which is out of sync with pinning
511          * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
512          * or else we risk writing an older, stale value.
513          *
514          * To simulate this, let's apply a bit of deliberate sabotague.
515          */
516
517         for_each_engine(engine, gt, id) {
518                 struct intel_context *ce;
519                 struct i915_request *rq;
520                 struct intel_ring *ring;
521                 struct igt_live_test t;
522
523                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
524                         err = -EIO;
525                         break;
526                 }
527
528                 ce = intel_context_create(engine);
529                 if (IS_ERR(ce)) {
530                         err = PTR_ERR(ce);
531                         break;
532                 }
533
534                 err = intel_context_pin(ce);
535                 if (err) {
536                         intel_context_put(ce);
537                         break;
538                 }
539
540                 /* Keep the context awake while we play games */
541                 err = i915_active_acquire(&ce->active);
542                 if (err) {
543                         intel_context_unpin(ce);
544                         intel_context_put(ce);
545                         break;
546                 }
547                 ring = ce->ring;
548
549                 /* Poison the ring, and offset the next request from HEAD */
550                 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
551                 ring->emit = ring->size / 2;
552                 ring->tail = ring->emit;
553                 GEM_BUG_ON(ring->head);
554
555                 intel_context_unpin(ce);
556
557                 /* Submit a simple nop request */
558                 GEM_BUG_ON(intel_context_is_pinned(ce));
559                 rq = intel_context_create_request(ce);
560                 i915_active_release(&ce->active); /* e.g. async retire */
561                 intel_context_put(ce);
562                 if (IS_ERR(rq)) {
563                         err = PTR_ERR(rq);
564                         break;
565                 }
566                 GEM_BUG_ON(!rq->head);
567                 i915_request_add(rq);
568
569                 /* Expect not to hang! */
570                 if (igt_live_test_end(&t)) {
571                         err = -EIO;
572                         break;
573                 }
574         }
575
576         return err;
577 }
578
579 static int live_hold_reset(void *arg)
580 {
581         struct intel_gt *gt = arg;
582         struct intel_engine_cs *engine;
583         enum intel_engine_id id;
584         struct igt_spinner spin;
585         int err = 0;
586
587         /*
588          * In order to support offline error capture for fast preempt reset,
589          * we need to decouple the guilty request and ensure that it and its
590          * descendents are not executed while the capture is in progress.
591          */
592
593         if (!intel_has_reset_engine(gt))
594                 return 0;
595
596         if (igt_spinner_init(&spin, gt))
597                 return -ENOMEM;
598
599         for_each_engine(engine, gt, id) {
600                 struct intel_context *ce;
601                 struct i915_request *rq;
602
603                 ce = intel_context_create(engine);
604                 if (IS_ERR(ce)) {
605                         err = PTR_ERR(ce);
606                         break;
607                 }
608
609                 st_engine_heartbeat_disable(engine);
610
611                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
612                 if (IS_ERR(rq)) {
613                         err = PTR_ERR(rq);
614                         goto out;
615                 }
616                 i915_request_add(rq);
617
618                 if (!igt_wait_for_spinner(&spin, rq)) {
619                         intel_gt_set_wedged(gt);
620                         err = -ETIME;
621                         goto out;
622                 }
623
624                 /* We have our request executing, now remove it and reset */
625
626                 if (test_and_set_bit(I915_RESET_ENGINE + id,
627                                      &gt->reset.flags)) {
628                         intel_gt_set_wedged(gt);
629                         err = -EBUSY;
630                         goto out;
631                 }
632                 tasklet_disable(&engine->execlists.tasklet);
633
634                 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
635                 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
636
637                 i915_request_get(rq);
638                 execlists_hold(engine, rq);
639                 GEM_BUG_ON(!i915_request_on_hold(rq));
640
641                 intel_engine_reset(engine, NULL);
642                 GEM_BUG_ON(rq->fence.error != -EIO);
643
644                 tasklet_enable(&engine->execlists.tasklet);
645                 clear_and_wake_up_bit(I915_RESET_ENGINE + id,
646                                       &gt->reset.flags);
647
648                 /* Check that we do not resubmit the held request */
649                 if (!i915_request_wait(rq, 0, HZ / 5)) {
650                         pr_err("%s: on hold request completed!\n",
651                                engine->name);
652                         i915_request_put(rq);
653                         err = -EIO;
654                         goto out;
655                 }
656                 GEM_BUG_ON(!i915_request_on_hold(rq));
657
658                 /* But is resubmitted on release */
659                 execlists_unhold(engine, rq);
660                 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
661                         pr_err("%s: held request did not complete!\n",
662                                engine->name);
663                         intel_gt_set_wedged(gt);
664                         err = -ETIME;
665                 }
666                 i915_request_put(rq);
667
668 out:
669                 st_engine_heartbeat_enable(engine);
670                 intel_context_put(ce);
671                 if (err)
672                         break;
673         }
674
675         igt_spinner_fini(&spin);
676         return err;
677 }
678
679 static const char *error_repr(int err)
680 {
681         return err ? "bad" : "good";
682 }
683
684 static int live_error_interrupt(void *arg)
685 {
686         static const struct error_phase {
687                 enum { GOOD = 0, BAD = -EIO } error[2];
688         } phases[] = {
689                 { { BAD,  GOOD } },
690                 { { BAD,  BAD  } },
691                 { { BAD,  GOOD } },
692                 { { GOOD, GOOD } }, /* sentinel */
693         };
694         struct intel_gt *gt = arg;
695         struct intel_engine_cs *engine;
696         enum intel_engine_id id;
697
698         /*
699          * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
700          * of invalid commands in user batches that will cause a GPU hang.
701          * This is a faster mechanism than using hangcheck/heartbeats, but
702          * only detects problems the HW knows about -- it will not warn when
703          * we kill the HW!
704          *
705          * To verify our detection and reset, we throw some invalid commands
706          * at the HW and wait for the interrupt.
707          */
708
709         if (!intel_has_reset_engine(gt))
710                 return 0;
711
712         for_each_engine(engine, gt, id) {
713                 const struct error_phase *p;
714                 int err = 0;
715
716                 st_engine_heartbeat_disable(engine);
717
718                 for (p = phases; p->error[0] != GOOD; p++) {
719                         struct i915_request *client[ARRAY_SIZE(phases->error)];
720                         u32 *cs;
721                         int i;
722
723                         memset(client, 0, sizeof(*client));
724                         for (i = 0; i < ARRAY_SIZE(client); i++) {
725                                 struct intel_context *ce;
726                                 struct i915_request *rq;
727
728                                 ce = intel_context_create(engine);
729                                 if (IS_ERR(ce)) {
730                                         err = PTR_ERR(ce);
731                                         goto out;
732                                 }
733
734                                 rq = intel_context_create_request(ce);
735                                 intel_context_put(ce);
736                                 if (IS_ERR(rq)) {
737                                         err = PTR_ERR(rq);
738                                         goto out;
739                                 }
740
741                                 if (rq->engine->emit_init_breadcrumb) {
742                                         err = rq->engine->emit_init_breadcrumb(rq);
743                                         if (err) {
744                                                 i915_request_add(rq);
745                                                 goto out;
746                                         }
747                                 }
748
749                                 cs = intel_ring_begin(rq, 2);
750                                 if (IS_ERR(cs)) {
751                                         i915_request_add(rq);
752                                         err = PTR_ERR(cs);
753                                         goto out;
754                                 }
755
756                                 if (p->error[i]) {
757                                         *cs++ = 0xdeadbeef;
758                                         *cs++ = 0xdeadbeef;
759                                 } else {
760                                         *cs++ = MI_NOOP;
761                                         *cs++ = MI_NOOP;
762                                 }
763
764                                 client[i] = i915_request_get(rq);
765                                 i915_request_add(rq);
766                         }
767
768                         err = wait_for_submit(engine, client[0], HZ / 2);
769                         if (err) {
770                                 pr_err("%s: first request did not start within time!\n",
771                                        engine->name);
772                                 err = -ETIME;
773                                 goto out;
774                         }
775
776                         for (i = 0; i < ARRAY_SIZE(client); i++) {
777                                 if (i915_request_wait(client[i], 0, HZ / 5) < 0)
778                                         pr_debug("%s: %s request incomplete!\n",
779                                                  engine->name,
780                                                  error_repr(p->error[i]));
781
782                                 if (!i915_request_started(client[i])) {
783                                         pr_err("%s: %s request not started!\n",
784                                                engine->name,
785                                                error_repr(p->error[i]));
786                                         err = -ETIME;
787                                         goto out;
788                                 }
789
790                                 /* Kick the tasklet to process the error */
791                                 intel_engine_flush_submission(engine);
792                                 if (client[i]->fence.error != p->error[i]) {
793                                         pr_err("%s: %s request (%s) with wrong error code: %d\n",
794                                                engine->name,
795                                                error_repr(p->error[i]),
796                                                i915_request_completed(client[i]) ? "completed" : "running",
797                                                client[i]->fence.error);
798                                         err = -EINVAL;
799                                         goto out;
800                                 }
801                         }
802
803 out:
804                         for (i = 0; i < ARRAY_SIZE(client); i++)
805                                 if (client[i])
806                                         i915_request_put(client[i]);
807                         if (err) {
808                                 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
809                                        engine->name, p - phases,
810                                        p->error[0], p->error[1]);
811                                 break;
812                         }
813                 }
814
815                 st_engine_heartbeat_enable(engine);
816                 if (err) {
817                         intel_gt_set_wedged(gt);
818                         return err;
819                 }
820         }
821
822         return 0;
823 }
824
825 static int
826 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
827 {
828         u32 *cs;
829
830         cs = intel_ring_begin(rq, 10);
831         if (IS_ERR(cs))
832                 return PTR_ERR(cs);
833
834         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
835
836         *cs++ = MI_SEMAPHORE_WAIT |
837                 MI_SEMAPHORE_GLOBAL_GTT |
838                 MI_SEMAPHORE_POLL |
839                 MI_SEMAPHORE_SAD_NEQ_SDD;
840         *cs++ = 0;
841         *cs++ = i915_ggtt_offset(vma) + 4 * idx;
842         *cs++ = 0;
843
844         if (idx > 0) {
845                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
846                 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
847                 *cs++ = 0;
848                 *cs++ = 1;
849         } else {
850                 *cs++ = MI_NOOP;
851                 *cs++ = MI_NOOP;
852                 *cs++ = MI_NOOP;
853                 *cs++ = MI_NOOP;
854         }
855
856         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
857
858         intel_ring_advance(rq, cs);
859         return 0;
860 }
861
862 static struct i915_request *
863 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
864 {
865         struct intel_context *ce;
866         struct i915_request *rq;
867         int err;
868
869         ce = intel_context_create(engine);
870         if (IS_ERR(ce))
871                 return ERR_CAST(ce);
872
873         rq = intel_context_create_request(ce);
874         if (IS_ERR(rq))
875                 goto out_ce;
876
877         err = 0;
878         if (rq->engine->emit_init_breadcrumb)
879                 err = rq->engine->emit_init_breadcrumb(rq);
880         if (err == 0)
881                 err = emit_semaphore_chain(rq, vma, idx);
882         if (err == 0)
883                 i915_request_get(rq);
884         i915_request_add(rq);
885         if (err)
886                 rq = ERR_PTR(err);
887
888 out_ce:
889         intel_context_put(ce);
890         return rq;
891 }
892
893 static int
894 release_queue(struct intel_engine_cs *engine,
895               struct i915_vma *vma,
896               int idx, int prio)
897 {
898         struct i915_sched_attr attr = {
899                 .priority = prio,
900         };
901         struct i915_request *rq;
902         u32 *cs;
903
904         rq = intel_engine_create_kernel_request(engine);
905         if (IS_ERR(rq))
906                 return PTR_ERR(rq);
907
908         cs = intel_ring_begin(rq, 4);
909         if (IS_ERR(cs)) {
910                 i915_request_add(rq);
911                 return PTR_ERR(cs);
912         }
913
914         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
915         *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
916         *cs++ = 0;
917         *cs++ = 1;
918
919         intel_ring_advance(rq, cs);
920
921         i915_request_get(rq);
922         i915_request_add(rq);
923
924         local_bh_disable();
925         engine->schedule(rq, &attr);
926         local_bh_enable(); /* kick tasklet */
927
928         i915_request_put(rq);
929
930         return 0;
931 }
932
933 static int
934 slice_semaphore_queue(struct intel_engine_cs *outer,
935                       struct i915_vma *vma,
936                       int count)
937 {
938         struct intel_engine_cs *engine;
939         struct i915_request *head;
940         enum intel_engine_id id;
941         int err, i, n = 0;
942
943         head = semaphore_queue(outer, vma, n++);
944         if (IS_ERR(head))
945                 return PTR_ERR(head);
946
947         for_each_engine(engine, outer->gt, id) {
948                 for (i = 0; i < count; i++) {
949                         struct i915_request *rq;
950
951                         rq = semaphore_queue(engine, vma, n++);
952                         if (IS_ERR(rq)) {
953                                 err = PTR_ERR(rq);
954                                 goto out;
955                         }
956
957                         i915_request_put(rq);
958                 }
959         }
960
961         err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
962         if (err)
963                 goto out;
964
965         if (i915_request_wait(head, 0,
966                               2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
967                 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
968                        count, n);
969                 GEM_TRACE_DUMP();
970                 intel_gt_set_wedged(outer->gt);
971                 err = -EIO;
972         }
973
974 out:
975         i915_request_put(head);
976         return err;
977 }
978
979 static int live_timeslice_preempt(void *arg)
980 {
981         struct intel_gt *gt = arg;
982         struct drm_i915_gem_object *obj;
983         struct intel_engine_cs *engine;
984         enum intel_engine_id id;
985         struct i915_vma *vma;
986         void *vaddr;
987         int err = 0;
988
989         /*
990          * If a request takes too long, we would like to give other users
991          * a fair go on the GPU. In particular, users may create batches
992          * that wait upon external input, where that input may even be
993          * supplied by another GPU job. To avoid blocking forever, we
994          * need to preempt the current task and replace it with another
995          * ready task.
996          */
997         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
998                 return 0;
999
1000         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1001         if (IS_ERR(obj))
1002                 return PTR_ERR(obj);
1003
1004         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1005         if (IS_ERR(vma)) {
1006                 err = PTR_ERR(vma);
1007                 goto err_obj;
1008         }
1009
1010         vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1011         if (IS_ERR(vaddr)) {
1012                 err = PTR_ERR(vaddr);
1013                 goto err_obj;
1014         }
1015
1016         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1017         if (err)
1018                 goto err_map;
1019
1020         err = i915_vma_sync(vma);
1021         if (err)
1022                 goto err_pin;
1023
1024         for_each_engine(engine, gt, id) {
1025                 if (!intel_engine_has_preemption(engine))
1026                         continue;
1027
1028                 memset(vaddr, 0, PAGE_SIZE);
1029
1030                 st_engine_heartbeat_disable(engine);
1031                 err = slice_semaphore_queue(engine, vma, 5);
1032                 st_engine_heartbeat_enable(engine);
1033                 if (err)
1034                         goto err_pin;
1035
1036                 if (igt_flush_test(gt->i915)) {
1037                         err = -EIO;
1038                         goto err_pin;
1039                 }
1040         }
1041
1042 err_pin:
1043         i915_vma_unpin(vma);
1044 err_map:
1045         i915_gem_object_unpin_map(obj);
1046 err_obj:
1047         i915_gem_object_put(obj);
1048         return err;
1049 }
1050
1051 static struct i915_request *
1052 create_rewinder(struct intel_context *ce,
1053                 struct i915_request *wait,
1054                 void *slot, int idx)
1055 {
1056         const u32 offset =
1057                 i915_ggtt_offset(ce->engine->status_page.vma) +
1058                 offset_in_page(slot);
1059         struct i915_request *rq;
1060         u32 *cs;
1061         int err;
1062
1063         rq = intel_context_create_request(ce);
1064         if (IS_ERR(rq))
1065                 return rq;
1066
1067         if (wait) {
1068                 err = i915_request_await_dma_fence(rq, &wait->fence);
1069                 if (err)
1070                         goto err;
1071         }
1072
1073         cs = intel_ring_begin(rq, 14);
1074         if (IS_ERR(cs)) {
1075                 err = PTR_ERR(cs);
1076                 goto err;
1077         }
1078
1079         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1080         *cs++ = MI_NOOP;
1081
1082         *cs++ = MI_SEMAPHORE_WAIT |
1083                 MI_SEMAPHORE_GLOBAL_GTT |
1084                 MI_SEMAPHORE_POLL |
1085                 MI_SEMAPHORE_SAD_GTE_SDD;
1086         *cs++ = idx;
1087         *cs++ = offset;
1088         *cs++ = 0;
1089
1090         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1091         *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1092         *cs++ = offset + idx * sizeof(u32);
1093         *cs++ = 0;
1094
1095         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1096         *cs++ = offset;
1097         *cs++ = 0;
1098         *cs++ = idx + 1;
1099
1100         intel_ring_advance(rq, cs);
1101
1102         rq->sched.attr.priority = I915_PRIORITY_MASK;
1103         err = 0;
1104 err:
1105         i915_request_get(rq);
1106         i915_request_add(rq);
1107         if (err) {
1108                 i915_request_put(rq);
1109                 return ERR_PTR(err);
1110         }
1111
1112         return rq;
1113 }
1114
1115 static int live_timeslice_rewind(void *arg)
1116 {
1117         struct intel_gt *gt = arg;
1118         struct intel_engine_cs *engine;
1119         enum intel_engine_id id;
1120
1121         /*
1122          * The usual presumption on timeslice expiration is that we replace
1123          * the active context with another. However, given a chain of
1124          * dependencies we may end up with replacing the context with itself,
1125          * but only a few of those requests, forcing us to rewind the
1126          * RING_TAIL of the original request.
1127          */
1128         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1129                 return 0;
1130
1131         for_each_engine(engine, gt, id) {
1132                 enum { A1, A2, B1 };
1133                 enum { X = 1, Z, Y };
1134                 struct i915_request *rq[3] = {};
1135                 struct intel_context *ce;
1136                 unsigned long timeslice;
1137                 int i, err = 0;
1138                 u32 *slot;
1139
1140                 if (!intel_engine_has_timeslices(engine))
1141                         continue;
1142
1143                 /*
1144                  * A:rq1 -- semaphore wait, timestamp X
1145                  * A:rq2 -- write timestamp Y
1146                  *
1147                  * B:rq1 [await A:rq1] -- write timestamp Z
1148                  *
1149                  * Force timeslice, release semaphore.
1150                  *
1151                  * Expect execution/evaluation order XZY
1152                  */
1153
1154                 st_engine_heartbeat_disable(engine);
1155                 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1156
1157                 slot = memset32(engine->status_page.addr + 1000, 0, 4);
1158
1159                 ce = intel_context_create(engine);
1160                 if (IS_ERR(ce)) {
1161                         err = PTR_ERR(ce);
1162                         goto err;
1163                 }
1164
1165                 rq[A1] = create_rewinder(ce, NULL, slot, X);
1166                 if (IS_ERR(rq[A1])) {
1167                         intel_context_put(ce);
1168                         goto err;
1169                 }
1170
1171                 rq[A2] = create_rewinder(ce, NULL, slot, Y);
1172                 intel_context_put(ce);
1173                 if (IS_ERR(rq[A2]))
1174                         goto err;
1175
1176                 err = wait_for_submit(engine, rq[A2], HZ / 2);
1177                 if (err) {
1178                         pr_err("%s: failed to submit first context\n",
1179                                engine->name);
1180                         goto err;
1181                 }
1182
1183                 ce = intel_context_create(engine);
1184                 if (IS_ERR(ce)) {
1185                         err = PTR_ERR(ce);
1186                         goto err;
1187                 }
1188
1189                 rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1190                 intel_context_put(ce);
1191                 if (IS_ERR(rq[2]))
1192                         goto err;
1193
1194                 err = wait_for_submit(engine, rq[B1], HZ / 2);
1195                 if (err) {
1196                         pr_err("%s: failed to submit second context\n",
1197                                engine->name);
1198                         goto err;
1199                 }
1200
1201                 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1202                 ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1203                 if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
1204                         /* Wait for the timeslice to kick in */
1205                         del_timer(&engine->execlists.timer);
1206                         tasklet_hi_schedule(&engine->execlists.tasklet);
1207                         intel_engine_flush_submission(engine);
1208                 }
1209                 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1210                 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1211                 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1212                 GEM_BUG_ON(i915_request_is_active(rq[A2]));
1213
1214                 /* Release the hounds! */
1215                 slot[0] = 1;
1216                 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1217
1218                 for (i = 1; i <= 3; i++) {
1219                         unsigned long timeout = jiffies + HZ / 2;
1220
1221                         while (!READ_ONCE(slot[i]) &&
1222                                time_before(jiffies, timeout))
1223                                 ;
1224
1225                         if (!time_before(jiffies, timeout)) {
1226                                 pr_err("%s: rq[%d] timed out\n",
1227                                        engine->name, i - 1);
1228                                 err = -ETIME;
1229                                 goto err;
1230                         }
1231
1232                         pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1233                 }
1234
1235                 /* XZY: XZ < XY */
1236                 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1237                         pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1238                                engine->name,
1239                                slot[Z] - slot[X],
1240                                slot[Y] - slot[X]);
1241                         err = -EINVAL;
1242                 }
1243
1244 err:
1245                 memset32(&slot[0], -1, 4);
1246                 wmb();
1247
1248                 engine->props.timeslice_duration_ms = timeslice;
1249                 st_engine_heartbeat_enable(engine);
1250                 for (i = 0; i < 3; i++)
1251                         i915_request_put(rq[i]);
1252                 if (igt_flush_test(gt->i915))
1253                         err = -EIO;
1254                 if (err)
1255                         return err;
1256         }
1257
1258         return 0;
1259 }
1260
1261 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1262 {
1263         struct i915_request *rq;
1264
1265         rq = intel_engine_create_kernel_request(engine);
1266         if (IS_ERR(rq))
1267                 return rq;
1268
1269         i915_request_get(rq);
1270         i915_request_add(rq);
1271
1272         return rq;
1273 }
1274
1275 static long slice_timeout(struct intel_engine_cs *engine)
1276 {
1277         long timeout;
1278
1279         /* Enough time for a timeslice to kick in, and kick out */
1280         timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1281
1282         /* Enough time for the nop request to complete */
1283         timeout += HZ / 5;
1284
1285         return timeout + 1;
1286 }
1287
1288 static int live_timeslice_queue(void *arg)
1289 {
1290         struct intel_gt *gt = arg;
1291         struct drm_i915_gem_object *obj;
1292         struct intel_engine_cs *engine;
1293         enum intel_engine_id id;
1294         struct i915_vma *vma;
1295         void *vaddr;
1296         int err = 0;
1297
1298         /*
1299          * Make sure that even if ELSP[0] and ELSP[1] are filled with
1300          * timeslicing between them disabled, we *do* enable timeslicing
1301          * if the queue demands it. (Normally, we do not submit if
1302          * ELSP[1] is already occupied, so must rely on timeslicing to
1303          * eject ELSP[0] in favour of the queue.)
1304          */
1305         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1306                 return 0;
1307
1308         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1309         if (IS_ERR(obj))
1310                 return PTR_ERR(obj);
1311
1312         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1313         if (IS_ERR(vma)) {
1314                 err = PTR_ERR(vma);
1315                 goto err_obj;
1316         }
1317
1318         vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1319         if (IS_ERR(vaddr)) {
1320                 err = PTR_ERR(vaddr);
1321                 goto err_obj;
1322         }
1323
1324         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1325         if (err)
1326                 goto err_map;
1327
1328         err = i915_vma_sync(vma);
1329         if (err)
1330                 goto err_pin;
1331
1332         for_each_engine(engine, gt, id) {
1333                 struct i915_sched_attr attr = {
1334                         .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1335                 };
1336                 struct i915_request *rq, *nop;
1337
1338                 if (!intel_engine_has_preemption(engine))
1339                         continue;
1340
1341                 st_engine_heartbeat_disable(engine);
1342                 memset(vaddr, 0, PAGE_SIZE);
1343
1344                 /* ELSP[0]: semaphore wait */
1345                 rq = semaphore_queue(engine, vma, 0);
1346                 if (IS_ERR(rq)) {
1347                         err = PTR_ERR(rq);
1348                         goto err_heartbeat;
1349                 }
1350                 engine->schedule(rq, &attr);
1351                 err = wait_for_submit(engine, rq, HZ / 2);
1352                 if (err) {
1353                         pr_err("%s: Timed out trying to submit semaphores\n",
1354                                engine->name);
1355                         goto err_rq;
1356                 }
1357
1358                 /* ELSP[1]: nop request */
1359                 nop = nop_request(engine);
1360                 if (IS_ERR(nop)) {
1361                         err = PTR_ERR(nop);
1362                         goto err_rq;
1363                 }
1364                 err = wait_for_submit(engine, nop, HZ / 2);
1365                 i915_request_put(nop);
1366                 if (err) {
1367                         pr_err("%s: Timed out trying to submit nop\n",
1368                                engine->name);
1369                         goto err_rq;
1370                 }
1371
1372                 GEM_BUG_ON(i915_request_completed(rq));
1373                 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1374
1375                 /* Queue: semaphore signal, matching priority as semaphore */
1376                 err = release_queue(engine, vma, 1, effective_prio(rq));
1377                 if (err)
1378                         goto err_rq;
1379
1380                 /* Wait until we ack the release_queue and start timeslicing */
1381                 do {
1382                         cond_resched();
1383                         intel_engine_flush_submission(engine);
1384                 } while (READ_ONCE(engine->execlists.pending[0]));
1385
1386                 /* Timeslice every jiffy, so within 2 we should signal */
1387                 if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1388                         struct drm_printer p =
1389                                 drm_info_printer(gt->i915->drm.dev);
1390
1391                         pr_err("%s: Failed to timeslice into queue\n",
1392                                engine->name);
1393                         intel_engine_dump(engine, &p,
1394                                           "%s\n", engine->name);
1395
1396                         memset(vaddr, 0xff, PAGE_SIZE);
1397                         err = -EIO;
1398                 }
1399 err_rq:
1400                 i915_request_put(rq);
1401 err_heartbeat:
1402                 st_engine_heartbeat_enable(engine);
1403                 if (err)
1404                         break;
1405         }
1406
1407 err_pin:
1408         i915_vma_unpin(vma);
1409 err_map:
1410         i915_gem_object_unpin_map(obj);
1411 err_obj:
1412         i915_gem_object_put(obj);
1413         return err;
1414 }
1415
1416 static int live_timeslice_nopreempt(void *arg)
1417 {
1418         struct intel_gt *gt = arg;
1419         struct intel_engine_cs *engine;
1420         enum intel_engine_id id;
1421         struct igt_spinner spin;
1422         int err = 0;
1423
1424         /*
1425          * We should not timeslice into a request that is marked with
1426          * I915_REQUEST_NOPREEMPT.
1427          */
1428         if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1429                 return 0;
1430
1431         if (igt_spinner_init(&spin, gt))
1432                 return -ENOMEM;
1433
1434         for_each_engine(engine, gt, id) {
1435                 struct intel_context *ce;
1436                 struct i915_request *rq;
1437                 unsigned long timeslice;
1438
1439                 if (!intel_engine_has_preemption(engine))
1440                         continue;
1441
1442                 ce = intel_context_create(engine);
1443                 if (IS_ERR(ce)) {
1444                         err = PTR_ERR(ce);
1445                         break;
1446                 }
1447
1448                 st_engine_heartbeat_disable(engine);
1449                 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1450
1451                 /* Create an unpreemptible spinner */
1452
1453                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1454                 intel_context_put(ce);
1455                 if (IS_ERR(rq)) {
1456                         err = PTR_ERR(rq);
1457                         goto out_heartbeat;
1458                 }
1459
1460                 i915_request_get(rq);
1461                 i915_request_add(rq);
1462
1463                 if (!igt_wait_for_spinner(&spin, rq)) {
1464                         i915_request_put(rq);
1465                         err = -ETIME;
1466                         goto out_spin;
1467                 }
1468
1469                 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1470                 i915_request_put(rq);
1471
1472                 /* Followed by a maximum priority barrier (heartbeat) */
1473
1474                 ce = intel_context_create(engine);
1475                 if (IS_ERR(ce)) {
1476                         err = PTR_ERR(ce);
1477                         goto out_spin;
1478                 }
1479
1480                 rq = intel_context_create_request(ce);
1481                 intel_context_put(ce);
1482                 if (IS_ERR(rq)) {
1483                         err = PTR_ERR(rq);
1484                         goto out_spin;
1485                 }
1486
1487                 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1488                 i915_request_get(rq);
1489                 i915_request_add(rq);
1490
1491                 /*
1492                  * Wait until the barrier is in ELSP, and we know timeslicing
1493                  * will have been activated.
1494                  */
1495                 if (wait_for_submit(engine, rq, HZ / 2)) {
1496                         i915_request_put(rq);
1497                         err = -ETIME;
1498                         goto out_spin;
1499                 }
1500
1501                 /*
1502                  * Since the ELSP[0] request is unpreemptible, it should not
1503                  * allow the maximum priority barrier through. Wait long
1504                  * enough to see if it is timesliced in by mistake.
1505                  */
1506                 if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1507                         pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1508                                engine->name);
1509                         err = -EINVAL;
1510                 }
1511                 i915_request_put(rq);
1512
1513 out_spin:
1514                 igt_spinner_end(&spin);
1515 out_heartbeat:
1516                 xchg(&engine->props.timeslice_duration_ms, timeslice);
1517                 st_engine_heartbeat_enable(engine);
1518                 if (err)
1519                         break;
1520
1521                 if (igt_flush_test(gt->i915)) {
1522                         err = -EIO;
1523                         break;
1524                 }
1525         }
1526
1527         igt_spinner_fini(&spin);
1528         return err;
1529 }
1530
1531 static int live_busywait_preempt(void *arg)
1532 {
1533         struct intel_gt *gt = arg;
1534         struct i915_gem_context *ctx_hi, *ctx_lo;
1535         struct intel_engine_cs *engine;
1536         struct drm_i915_gem_object *obj;
1537         struct i915_vma *vma;
1538         enum intel_engine_id id;
1539         int err = -ENOMEM;
1540         u32 *map;
1541
1542         /*
1543          * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1544          * preempt the busywaits used to synchronise between rings.
1545          */
1546
1547         ctx_hi = kernel_context(gt->i915);
1548         if (!ctx_hi)
1549                 return -ENOMEM;
1550         ctx_hi->sched.priority =
1551                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1552
1553         ctx_lo = kernel_context(gt->i915);
1554         if (!ctx_lo)
1555                 goto err_ctx_hi;
1556         ctx_lo->sched.priority =
1557                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1558
1559         obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1560         if (IS_ERR(obj)) {
1561                 err = PTR_ERR(obj);
1562                 goto err_ctx_lo;
1563         }
1564
1565         map = i915_gem_object_pin_map(obj, I915_MAP_WC);
1566         if (IS_ERR(map)) {
1567                 err = PTR_ERR(map);
1568                 goto err_obj;
1569         }
1570
1571         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1572         if (IS_ERR(vma)) {
1573                 err = PTR_ERR(vma);
1574                 goto err_map;
1575         }
1576
1577         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1578         if (err)
1579                 goto err_map;
1580
1581         err = i915_vma_sync(vma);
1582         if (err)
1583                 goto err_vma;
1584
1585         for_each_engine(engine, gt, id) {
1586                 struct i915_request *lo, *hi;
1587                 struct igt_live_test t;
1588                 u32 *cs;
1589
1590                 if (!intel_engine_has_preemption(engine))
1591                         continue;
1592
1593                 if (!intel_engine_can_store_dword(engine))
1594                         continue;
1595
1596                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1597                         err = -EIO;
1598                         goto err_vma;
1599                 }
1600
1601                 /*
1602                  * We create two requests. The low priority request
1603                  * busywaits on a semaphore (inside the ringbuffer where
1604                  * is should be preemptible) and the high priority requests
1605                  * uses a MI_STORE_DWORD_IMM to update the semaphore value
1606                  * allowing the first request to complete. If preemption
1607                  * fails, we hang instead.
1608                  */
1609
1610                 lo = igt_request_alloc(ctx_lo, engine);
1611                 if (IS_ERR(lo)) {
1612                         err = PTR_ERR(lo);
1613                         goto err_vma;
1614                 }
1615
1616                 cs = intel_ring_begin(lo, 8);
1617                 if (IS_ERR(cs)) {
1618                         err = PTR_ERR(cs);
1619                         i915_request_add(lo);
1620                         goto err_vma;
1621                 }
1622
1623                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1624                 *cs++ = i915_ggtt_offset(vma);
1625                 *cs++ = 0;
1626                 *cs++ = 1;
1627
1628                 /* XXX Do we need a flush + invalidate here? */
1629
1630                 *cs++ = MI_SEMAPHORE_WAIT |
1631                         MI_SEMAPHORE_GLOBAL_GTT |
1632                         MI_SEMAPHORE_POLL |
1633                         MI_SEMAPHORE_SAD_EQ_SDD;
1634                 *cs++ = 0;
1635                 *cs++ = i915_ggtt_offset(vma);
1636                 *cs++ = 0;
1637
1638                 intel_ring_advance(lo, cs);
1639
1640                 i915_request_get(lo);
1641                 i915_request_add(lo);
1642
1643                 if (wait_for(READ_ONCE(*map), 10)) {
1644                         i915_request_put(lo);
1645                         err = -ETIMEDOUT;
1646                         goto err_vma;
1647                 }
1648
1649                 /* Low priority request should be busywaiting now */
1650                 if (i915_request_wait(lo, 0, 1) != -ETIME) {
1651                         i915_request_put(lo);
1652                         pr_err("%s: Busywaiting request did not!\n",
1653                                engine->name);
1654                         err = -EIO;
1655                         goto err_vma;
1656                 }
1657
1658                 hi = igt_request_alloc(ctx_hi, engine);
1659                 if (IS_ERR(hi)) {
1660                         err = PTR_ERR(hi);
1661                         i915_request_put(lo);
1662                         goto err_vma;
1663                 }
1664
1665                 cs = intel_ring_begin(hi, 4);
1666                 if (IS_ERR(cs)) {
1667                         err = PTR_ERR(cs);
1668                         i915_request_add(hi);
1669                         i915_request_put(lo);
1670                         goto err_vma;
1671                 }
1672
1673                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1674                 *cs++ = i915_ggtt_offset(vma);
1675                 *cs++ = 0;
1676                 *cs++ = 0;
1677
1678                 intel_ring_advance(hi, cs);
1679                 i915_request_add(hi);
1680
1681                 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1682                         struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1683
1684                         pr_err("%s: Failed to preempt semaphore busywait!\n",
1685                                engine->name);
1686
1687                         intel_engine_dump(engine, &p, "%s\n", engine->name);
1688                         GEM_TRACE_DUMP();
1689
1690                         i915_request_put(lo);
1691                         intel_gt_set_wedged(gt);
1692                         err = -EIO;
1693                         goto err_vma;
1694                 }
1695                 GEM_BUG_ON(READ_ONCE(*map));
1696                 i915_request_put(lo);
1697
1698                 if (igt_live_test_end(&t)) {
1699                         err = -EIO;
1700                         goto err_vma;
1701                 }
1702         }
1703
1704         err = 0;
1705 err_vma:
1706         i915_vma_unpin(vma);
1707 err_map:
1708         i915_gem_object_unpin_map(obj);
1709 err_obj:
1710         i915_gem_object_put(obj);
1711 err_ctx_lo:
1712         kernel_context_close(ctx_lo);
1713 err_ctx_hi:
1714         kernel_context_close(ctx_hi);
1715         return err;
1716 }
1717
1718 static struct i915_request *
1719 spinner_create_request(struct igt_spinner *spin,
1720                        struct i915_gem_context *ctx,
1721                        struct intel_engine_cs *engine,
1722                        u32 arb)
1723 {
1724         struct intel_context *ce;
1725         struct i915_request *rq;
1726
1727         ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1728         if (IS_ERR(ce))
1729                 return ERR_CAST(ce);
1730
1731         rq = igt_spinner_create_request(spin, ce, arb);
1732         intel_context_put(ce);
1733         return rq;
1734 }
1735
1736 static int live_preempt(void *arg)
1737 {
1738         struct intel_gt *gt = arg;
1739         struct i915_gem_context *ctx_hi, *ctx_lo;
1740         struct igt_spinner spin_hi, spin_lo;
1741         struct intel_engine_cs *engine;
1742         enum intel_engine_id id;
1743         int err = -ENOMEM;
1744
1745         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1746                 return 0;
1747
1748         if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
1749                 pr_err("Logical preemption supported, but not exposed\n");
1750
1751         if (igt_spinner_init(&spin_hi, gt))
1752                 return -ENOMEM;
1753
1754         if (igt_spinner_init(&spin_lo, gt))
1755                 goto err_spin_hi;
1756
1757         ctx_hi = kernel_context(gt->i915);
1758         if (!ctx_hi)
1759                 goto err_spin_lo;
1760         ctx_hi->sched.priority =
1761                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1762
1763         ctx_lo = kernel_context(gt->i915);
1764         if (!ctx_lo)
1765                 goto err_ctx_hi;
1766         ctx_lo->sched.priority =
1767                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1768
1769         for_each_engine(engine, gt, id) {
1770                 struct igt_live_test t;
1771                 struct i915_request *rq;
1772
1773                 if (!intel_engine_has_preemption(engine))
1774                         continue;
1775
1776                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1777                         err = -EIO;
1778                         goto err_ctx_lo;
1779                 }
1780
1781                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1782                                             MI_ARB_CHECK);
1783                 if (IS_ERR(rq)) {
1784                         err = PTR_ERR(rq);
1785                         goto err_ctx_lo;
1786                 }
1787
1788                 i915_request_add(rq);
1789                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1790                         GEM_TRACE("lo spinner failed to start\n");
1791                         GEM_TRACE_DUMP();
1792                         intel_gt_set_wedged(gt);
1793                         err = -EIO;
1794                         goto err_ctx_lo;
1795                 }
1796
1797                 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1798                                             MI_ARB_CHECK);
1799                 if (IS_ERR(rq)) {
1800                         igt_spinner_end(&spin_lo);
1801                         err = PTR_ERR(rq);
1802                         goto err_ctx_lo;
1803                 }
1804
1805                 i915_request_add(rq);
1806                 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1807                         GEM_TRACE("hi spinner failed to start\n");
1808                         GEM_TRACE_DUMP();
1809                         intel_gt_set_wedged(gt);
1810                         err = -EIO;
1811                         goto err_ctx_lo;
1812                 }
1813
1814                 igt_spinner_end(&spin_hi);
1815                 igt_spinner_end(&spin_lo);
1816
1817                 if (igt_live_test_end(&t)) {
1818                         err = -EIO;
1819                         goto err_ctx_lo;
1820                 }
1821         }
1822
1823         err = 0;
1824 err_ctx_lo:
1825         kernel_context_close(ctx_lo);
1826 err_ctx_hi:
1827         kernel_context_close(ctx_hi);
1828 err_spin_lo:
1829         igt_spinner_fini(&spin_lo);
1830 err_spin_hi:
1831         igt_spinner_fini(&spin_hi);
1832         return err;
1833 }
1834
1835 static int live_late_preempt(void *arg)
1836 {
1837         struct intel_gt *gt = arg;
1838         struct i915_gem_context *ctx_hi, *ctx_lo;
1839         struct igt_spinner spin_hi, spin_lo;
1840         struct intel_engine_cs *engine;
1841         struct i915_sched_attr attr = {};
1842         enum intel_engine_id id;
1843         int err = -ENOMEM;
1844
1845         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1846                 return 0;
1847
1848         if (igt_spinner_init(&spin_hi, gt))
1849                 return -ENOMEM;
1850
1851         if (igt_spinner_init(&spin_lo, gt))
1852                 goto err_spin_hi;
1853
1854         ctx_hi = kernel_context(gt->i915);
1855         if (!ctx_hi)
1856                 goto err_spin_lo;
1857
1858         ctx_lo = kernel_context(gt->i915);
1859         if (!ctx_lo)
1860                 goto err_ctx_hi;
1861
1862         /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1863         ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1864
1865         for_each_engine(engine, gt, id) {
1866                 struct igt_live_test t;
1867                 struct i915_request *rq;
1868
1869                 if (!intel_engine_has_preemption(engine))
1870                         continue;
1871
1872                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1873                         err = -EIO;
1874                         goto err_ctx_lo;
1875                 }
1876
1877                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1878                                             MI_ARB_CHECK);
1879                 if (IS_ERR(rq)) {
1880                         err = PTR_ERR(rq);
1881                         goto err_ctx_lo;
1882                 }
1883
1884                 i915_request_add(rq);
1885                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1886                         pr_err("First context failed to start\n");
1887                         goto err_wedged;
1888                 }
1889
1890                 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1891                                             MI_NOOP);
1892                 if (IS_ERR(rq)) {
1893                         igt_spinner_end(&spin_lo);
1894                         err = PTR_ERR(rq);
1895                         goto err_ctx_lo;
1896                 }
1897
1898                 i915_request_add(rq);
1899                 if (igt_wait_for_spinner(&spin_hi, rq)) {
1900                         pr_err("Second context overtook first?\n");
1901                         goto err_wedged;
1902                 }
1903
1904                 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1905                 engine->schedule(rq, &attr);
1906
1907                 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1908                         pr_err("High priority context failed to preempt the low priority context\n");
1909                         GEM_TRACE_DUMP();
1910                         goto err_wedged;
1911                 }
1912
1913                 igt_spinner_end(&spin_hi);
1914                 igt_spinner_end(&spin_lo);
1915
1916                 if (igt_live_test_end(&t)) {
1917                         err = -EIO;
1918                         goto err_ctx_lo;
1919                 }
1920         }
1921
1922         err = 0;
1923 err_ctx_lo:
1924         kernel_context_close(ctx_lo);
1925 err_ctx_hi:
1926         kernel_context_close(ctx_hi);
1927 err_spin_lo:
1928         igt_spinner_fini(&spin_lo);
1929 err_spin_hi:
1930         igt_spinner_fini(&spin_hi);
1931         return err;
1932
1933 err_wedged:
1934         igt_spinner_end(&spin_hi);
1935         igt_spinner_end(&spin_lo);
1936         intel_gt_set_wedged(gt);
1937         err = -EIO;
1938         goto err_ctx_lo;
1939 }
1940
1941 struct preempt_client {
1942         struct igt_spinner spin;
1943         struct i915_gem_context *ctx;
1944 };
1945
1946 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1947 {
1948         c->ctx = kernel_context(gt->i915);
1949         if (!c->ctx)
1950                 return -ENOMEM;
1951
1952         if (igt_spinner_init(&c->spin, gt))
1953                 goto err_ctx;
1954
1955         return 0;
1956
1957 err_ctx:
1958         kernel_context_close(c->ctx);
1959         return -ENOMEM;
1960 }
1961
1962 static void preempt_client_fini(struct preempt_client *c)
1963 {
1964         igt_spinner_fini(&c->spin);
1965         kernel_context_close(c->ctx);
1966 }
1967
1968 static int live_nopreempt(void *arg)
1969 {
1970         struct intel_gt *gt = arg;
1971         struct intel_engine_cs *engine;
1972         struct preempt_client a, b;
1973         enum intel_engine_id id;
1974         int err = -ENOMEM;
1975
1976         /*
1977          * Verify that we can disable preemption for an individual request
1978          * that may be being observed and not want to be interrupted.
1979          */
1980
1981         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1982                 return 0;
1983
1984         if (preempt_client_init(gt, &a))
1985                 return -ENOMEM;
1986         if (preempt_client_init(gt, &b))
1987                 goto err_client_a;
1988         b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1989
1990         for_each_engine(engine, gt, id) {
1991                 struct i915_request *rq_a, *rq_b;
1992
1993                 if (!intel_engine_has_preemption(engine))
1994                         continue;
1995
1996                 engine->execlists.preempt_hang.count = 0;
1997
1998                 rq_a = spinner_create_request(&a.spin,
1999                                               a.ctx, engine,
2000                                               MI_ARB_CHECK);
2001                 if (IS_ERR(rq_a)) {
2002                         err = PTR_ERR(rq_a);
2003                         goto err_client_b;
2004                 }
2005
2006                 /* Low priority client, but unpreemptable! */
2007                 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
2008
2009                 i915_request_add(rq_a);
2010                 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2011                         pr_err("First client failed to start\n");
2012                         goto err_wedged;
2013                 }
2014
2015                 rq_b = spinner_create_request(&b.spin,
2016                                               b.ctx, engine,
2017                                               MI_ARB_CHECK);
2018                 if (IS_ERR(rq_b)) {
2019                         err = PTR_ERR(rq_b);
2020                         goto err_client_b;
2021                 }
2022
2023                 i915_request_add(rq_b);
2024
2025                 /* B is much more important than A! (But A is unpreemptable.) */
2026                 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
2027
2028                 /* Wait long enough for preemption and timeslicing */
2029                 if (igt_wait_for_spinner(&b.spin, rq_b)) {
2030                         pr_err("Second client started too early!\n");
2031                         goto err_wedged;
2032                 }
2033
2034                 igt_spinner_end(&a.spin);
2035
2036                 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2037                         pr_err("Second client failed to start\n");
2038                         goto err_wedged;
2039                 }
2040
2041                 igt_spinner_end(&b.spin);
2042
2043                 if (engine->execlists.preempt_hang.count) {
2044                         pr_err("Preemption recorded x%d; should have been suppressed!\n",
2045                                engine->execlists.preempt_hang.count);
2046                         err = -EINVAL;
2047                         goto err_wedged;
2048                 }
2049
2050                 if (igt_flush_test(gt->i915))
2051                         goto err_wedged;
2052         }
2053
2054         err = 0;
2055 err_client_b:
2056         preempt_client_fini(&b);
2057 err_client_a:
2058         preempt_client_fini(&a);
2059         return err;
2060
2061 err_wedged:
2062         igt_spinner_end(&b.spin);
2063         igt_spinner_end(&a.spin);
2064         intel_gt_set_wedged(gt);
2065         err = -EIO;
2066         goto err_client_b;
2067 }
2068
2069 struct live_preempt_cancel {
2070         struct intel_engine_cs *engine;
2071         struct preempt_client a, b;
2072 };
2073
2074 static int __cancel_active0(struct live_preempt_cancel *arg)
2075 {
2076         struct i915_request *rq;
2077         struct igt_live_test t;
2078         int err;
2079
2080         /* Preempt cancel of ELSP0 */
2081         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2082         if (igt_live_test_begin(&t, arg->engine->i915,
2083                                 __func__, arg->engine->name))
2084                 return -EIO;
2085
2086         rq = spinner_create_request(&arg->a.spin,
2087                                     arg->a.ctx, arg->engine,
2088                                     MI_ARB_CHECK);
2089         if (IS_ERR(rq))
2090                 return PTR_ERR(rq);
2091
2092         clear_bit(CONTEXT_BANNED, &rq->context->flags);
2093         i915_request_get(rq);
2094         i915_request_add(rq);
2095         if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2096                 err = -EIO;
2097                 goto out;
2098         }
2099
2100         intel_context_set_banned(rq->context);
2101         err = intel_engine_pulse(arg->engine);
2102         if (err)
2103                 goto out;
2104
2105         err = wait_for_reset(arg->engine, rq, HZ / 2);
2106         if (err) {
2107                 pr_err("Cancelled inflight0 request did not reset\n");
2108                 goto out;
2109         }
2110
2111 out:
2112         i915_request_put(rq);
2113         if (igt_live_test_end(&t))
2114                 err = -EIO;
2115         return err;
2116 }
2117
2118 static int __cancel_active1(struct live_preempt_cancel *arg)
2119 {
2120         struct i915_request *rq[2] = {};
2121         struct igt_live_test t;
2122         int err;
2123
2124         /* Preempt cancel of ELSP1 */
2125         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2126         if (igt_live_test_begin(&t, arg->engine->i915,
2127                                 __func__, arg->engine->name))
2128                 return -EIO;
2129
2130         rq[0] = spinner_create_request(&arg->a.spin,
2131                                        arg->a.ctx, arg->engine,
2132                                        MI_NOOP); /* no preemption */
2133         if (IS_ERR(rq[0]))
2134                 return PTR_ERR(rq[0]);
2135
2136         clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2137         i915_request_get(rq[0]);
2138         i915_request_add(rq[0]);
2139         if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2140                 err = -EIO;
2141                 goto out;
2142         }
2143
2144         rq[1] = spinner_create_request(&arg->b.spin,
2145                                        arg->b.ctx, arg->engine,
2146                                        MI_ARB_CHECK);
2147         if (IS_ERR(rq[1])) {
2148                 err = PTR_ERR(rq[1]);
2149                 goto out;
2150         }
2151
2152         clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2153         i915_request_get(rq[1]);
2154         err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2155         i915_request_add(rq[1]);
2156         if (err)
2157                 goto out;
2158
2159         intel_context_set_banned(rq[1]->context);
2160         err = intel_engine_pulse(arg->engine);
2161         if (err)
2162                 goto out;
2163
2164         igt_spinner_end(&arg->a.spin);
2165         err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2166         if (err)
2167                 goto out;
2168
2169         if (rq[0]->fence.error != 0) {
2170                 pr_err("Normal inflight0 request did not complete\n");
2171                 err = -EINVAL;
2172                 goto out;
2173         }
2174
2175         if (rq[1]->fence.error != -EIO) {
2176                 pr_err("Cancelled inflight1 request did not report -EIO\n");
2177                 err = -EINVAL;
2178                 goto out;
2179         }
2180
2181 out:
2182         i915_request_put(rq[1]);
2183         i915_request_put(rq[0]);
2184         if (igt_live_test_end(&t))
2185                 err = -EIO;
2186         return err;
2187 }
2188
2189 static int __cancel_queued(struct live_preempt_cancel *arg)
2190 {
2191         struct i915_request *rq[3] = {};
2192         struct igt_live_test t;
2193         int err;
2194
2195         /* Full ELSP and one in the wings */
2196         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2197         if (igt_live_test_begin(&t, arg->engine->i915,
2198                                 __func__, arg->engine->name))
2199                 return -EIO;
2200
2201         rq[0] = spinner_create_request(&arg->a.spin,
2202                                        arg->a.ctx, arg->engine,
2203                                        MI_ARB_CHECK);
2204         if (IS_ERR(rq[0]))
2205                 return PTR_ERR(rq[0]);
2206
2207         clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2208         i915_request_get(rq[0]);
2209         i915_request_add(rq[0]);
2210         if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2211                 err = -EIO;
2212                 goto out;
2213         }
2214
2215         rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2216         if (IS_ERR(rq[1])) {
2217                 err = PTR_ERR(rq[1]);
2218                 goto out;
2219         }
2220
2221         clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2222         i915_request_get(rq[1]);
2223         err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2224         i915_request_add(rq[1]);
2225         if (err)
2226                 goto out;
2227
2228         rq[2] = spinner_create_request(&arg->b.spin,
2229                                        arg->a.ctx, arg->engine,
2230                                        MI_ARB_CHECK);
2231         if (IS_ERR(rq[2])) {
2232                 err = PTR_ERR(rq[2]);
2233                 goto out;
2234         }
2235
2236         i915_request_get(rq[2]);
2237         err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2238         i915_request_add(rq[2]);
2239         if (err)
2240                 goto out;
2241
2242         intel_context_set_banned(rq[2]->context);
2243         err = intel_engine_pulse(arg->engine);
2244         if (err)
2245                 goto out;
2246
2247         err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2248         if (err)
2249                 goto out;
2250
2251         if (rq[0]->fence.error != -EIO) {
2252                 pr_err("Cancelled inflight0 request did not report -EIO\n");
2253                 err = -EINVAL;
2254                 goto out;
2255         }
2256
2257         if (rq[1]->fence.error != 0) {
2258                 pr_err("Normal inflight1 request did not complete\n");
2259                 err = -EINVAL;
2260                 goto out;
2261         }
2262
2263         if (rq[2]->fence.error != -EIO) {
2264                 pr_err("Cancelled queued request did not report -EIO\n");
2265                 err = -EINVAL;
2266                 goto out;
2267         }
2268
2269 out:
2270         i915_request_put(rq[2]);
2271         i915_request_put(rq[1]);
2272         i915_request_put(rq[0]);
2273         if (igt_live_test_end(&t))
2274                 err = -EIO;
2275         return err;
2276 }
2277
2278 static int __cancel_hostile(struct live_preempt_cancel *arg)
2279 {
2280         struct i915_request *rq;
2281         int err;
2282
2283         /* Preempt cancel non-preemptible spinner in ELSP0 */
2284         if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2285                 return 0;
2286
2287         if (!intel_has_reset_engine(arg->engine->gt))
2288                 return 0;
2289
2290         GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2291         rq = spinner_create_request(&arg->a.spin,
2292                                     arg->a.ctx, arg->engine,
2293                                     MI_NOOP); /* preemption disabled */
2294         if (IS_ERR(rq))
2295                 return PTR_ERR(rq);
2296
2297         clear_bit(CONTEXT_BANNED, &rq->context->flags);
2298         i915_request_get(rq);
2299         i915_request_add(rq);
2300         if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2301                 err = -EIO;
2302                 goto out;
2303         }
2304
2305         intel_context_set_banned(rq->context);
2306         err = intel_engine_pulse(arg->engine); /* force reset */
2307         if (err)
2308                 goto out;
2309
2310         err = wait_for_reset(arg->engine, rq, HZ / 2);
2311         if (err) {
2312                 pr_err("Cancelled inflight0 request did not reset\n");
2313                 goto out;
2314         }
2315
2316 out:
2317         i915_request_put(rq);
2318         if (igt_flush_test(arg->engine->i915))
2319                 err = -EIO;
2320         return err;
2321 }
2322
2323 static int live_preempt_cancel(void *arg)
2324 {
2325         struct intel_gt *gt = arg;
2326         struct live_preempt_cancel data;
2327         enum intel_engine_id id;
2328         int err = -ENOMEM;
2329
2330         /*
2331          * To cancel an inflight context, we need to first remove it from the
2332          * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2333          */
2334
2335         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2336                 return 0;
2337
2338         if (preempt_client_init(gt, &data.a))
2339                 return -ENOMEM;
2340         if (preempt_client_init(gt, &data.b))
2341                 goto err_client_a;
2342
2343         for_each_engine(data.engine, gt, id) {
2344                 if (!intel_engine_has_preemption(data.engine))
2345                         continue;
2346
2347                 err = __cancel_active0(&data);
2348                 if (err)
2349                         goto err_wedged;
2350
2351                 err = __cancel_active1(&data);
2352                 if (err)
2353                         goto err_wedged;
2354
2355                 err = __cancel_queued(&data);
2356                 if (err)
2357                         goto err_wedged;
2358
2359                 err = __cancel_hostile(&data);
2360                 if (err)
2361                         goto err_wedged;
2362         }
2363
2364         err = 0;
2365 err_client_b:
2366         preempt_client_fini(&data.b);
2367 err_client_a:
2368         preempt_client_fini(&data.a);
2369         return err;
2370
2371 err_wedged:
2372         GEM_TRACE_DUMP();
2373         igt_spinner_end(&data.b.spin);
2374         igt_spinner_end(&data.a.spin);
2375         intel_gt_set_wedged(gt);
2376         goto err_client_b;
2377 }
2378
2379 static int live_suppress_self_preempt(void *arg)
2380 {
2381         struct intel_gt *gt = arg;
2382         struct intel_engine_cs *engine;
2383         struct i915_sched_attr attr = {
2384                 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
2385         };
2386         struct preempt_client a, b;
2387         enum intel_engine_id id;
2388         int err = -ENOMEM;
2389
2390         /*
2391          * Verify that if a preemption request does not cause a change in
2392          * the current execution order, the preempt-to-idle injection is
2393          * skipped and that we do not accidentally apply it after the CS
2394          * completion event.
2395          */
2396
2397         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2398                 return 0;
2399
2400         if (intel_uc_uses_guc_submission(&gt->uc))
2401                 return 0; /* presume black blox */
2402
2403         if (intel_vgpu_active(gt->i915))
2404                 return 0; /* GVT forces single port & request submission */
2405
2406         if (preempt_client_init(gt, &a))
2407                 return -ENOMEM;
2408         if (preempt_client_init(gt, &b))
2409                 goto err_client_a;
2410
2411         for_each_engine(engine, gt, id) {
2412                 struct i915_request *rq_a, *rq_b;
2413                 int depth;
2414
2415                 if (!intel_engine_has_preemption(engine))
2416                         continue;
2417
2418                 if (igt_flush_test(gt->i915))
2419                         goto err_wedged;
2420
2421                 st_engine_heartbeat_disable(engine);
2422                 engine->execlists.preempt_hang.count = 0;
2423
2424                 rq_a = spinner_create_request(&a.spin,
2425                                               a.ctx, engine,
2426                                               MI_NOOP);
2427                 if (IS_ERR(rq_a)) {
2428                         err = PTR_ERR(rq_a);
2429                         st_engine_heartbeat_enable(engine);
2430                         goto err_client_b;
2431                 }
2432
2433                 i915_request_add(rq_a);
2434                 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2435                         pr_err("First client failed to start\n");
2436                         st_engine_heartbeat_enable(engine);
2437                         goto err_wedged;
2438                 }
2439
2440                 /* Keep postponing the timer to avoid premature slicing */
2441                 mod_timer(&engine->execlists.timer, jiffies + HZ);
2442                 for (depth = 0; depth < 8; depth++) {
2443                         rq_b = spinner_create_request(&b.spin,
2444                                                       b.ctx, engine,
2445                                                       MI_NOOP);
2446                         if (IS_ERR(rq_b)) {
2447                                 err = PTR_ERR(rq_b);
2448                                 st_engine_heartbeat_enable(engine);
2449                                 goto err_client_b;
2450                         }
2451                         i915_request_add(rq_b);
2452
2453                         GEM_BUG_ON(i915_request_completed(rq_a));
2454                         engine->schedule(rq_a, &attr);
2455                         igt_spinner_end(&a.spin);
2456
2457                         if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2458                                 pr_err("Second client failed to start\n");
2459                                 st_engine_heartbeat_enable(engine);
2460                                 goto err_wedged;
2461                         }
2462
2463                         swap(a, b);
2464                         rq_a = rq_b;
2465                 }
2466                 igt_spinner_end(&a.spin);
2467
2468                 if (engine->execlists.preempt_hang.count) {
2469                         pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2470                                engine->name,
2471                                engine->execlists.preempt_hang.count,
2472                                depth);
2473                         st_engine_heartbeat_enable(engine);
2474                         err = -EINVAL;
2475                         goto err_client_b;
2476                 }
2477
2478                 st_engine_heartbeat_enable(engine);
2479                 if (igt_flush_test(gt->i915))
2480                         goto err_wedged;
2481         }
2482
2483         err = 0;
2484 err_client_b:
2485         preempt_client_fini(&b);
2486 err_client_a:
2487         preempt_client_fini(&a);
2488         return err;
2489
2490 err_wedged:
2491         igt_spinner_end(&b.spin);
2492         igt_spinner_end(&a.spin);
2493         intel_gt_set_wedged(gt);
2494         err = -EIO;
2495         goto err_client_b;
2496 }
2497
2498 static int live_chain_preempt(void *arg)
2499 {
2500         struct intel_gt *gt = arg;
2501         struct intel_engine_cs *engine;
2502         struct preempt_client hi, lo;
2503         enum intel_engine_id id;
2504         int err = -ENOMEM;
2505
2506         /*
2507          * Build a chain AB...BA between two contexts (A, B) and request
2508          * preemption of the last request. It should then complete before
2509          * the previously submitted spinner in B.
2510          */
2511
2512         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2513                 return 0;
2514
2515         if (preempt_client_init(gt, &hi))
2516                 return -ENOMEM;
2517
2518         if (preempt_client_init(gt, &lo))
2519                 goto err_client_hi;
2520
2521         for_each_engine(engine, gt, id) {
2522                 struct i915_sched_attr attr = {
2523                         .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
2524                 };
2525                 struct igt_live_test t;
2526                 struct i915_request *rq;
2527                 int ring_size, count, i;
2528
2529                 if (!intel_engine_has_preemption(engine))
2530                         continue;
2531
2532                 rq = spinner_create_request(&lo.spin,
2533                                             lo.ctx, engine,
2534                                             MI_ARB_CHECK);
2535                 if (IS_ERR(rq))
2536                         goto err_wedged;
2537
2538                 i915_request_get(rq);
2539                 i915_request_add(rq);
2540
2541                 ring_size = rq->wa_tail - rq->head;
2542                 if (ring_size < 0)
2543                         ring_size += rq->ring->size;
2544                 ring_size = rq->ring->size / ring_size;
2545                 pr_debug("%s(%s): Using maximum of %d requests\n",
2546                          __func__, engine->name, ring_size);
2547
2548                 igt_spinner_end(&lo.spin);
2549                 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2550                         pr_err("Timed out waiting to flush %s\n", engine->name);
2551                         i915_request_put(rq);
2552                         goto err_wedged;
2553                 }
2554                 i915_request_put(rq);
2555
2556                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2557                         err = -EIO;
2558                         goto err_wedged;
2559                 }
2560
2561                 for_each_prime_number_from(count, 1, ring_size) {
2562                         rq = spinner_create_request(&hi.spin,
2563                                                     hi.ctx, engine,
2564                                                     MI_ARB_CHECK);
2565                         if (IS_ERR(rq))
2566                                 goto err_wedged;
2567                         i915_request_add(rq);
2568                         if (!igt_wait_for_spinner(&hi.spin, rq))
2569                                 goto err_wedged;
2570
2571                         rq = spinner_create_request(&lo.spin,
2572                                                     lo.ctx, engine,
2573                                                     MI_ARB_CHECK);
2574                         if (IS_ERR(rq))
2575                                 goto err_wedged;
2576                         i915_request_add(rq);
2577
2578                         for (i = 0; i < count; i++) {
2579                                 rq = igt_request_alloc(lo.ctx, engine);
2580                                 if (IS_ERR(rq))
2581                                         goto err_wedged;
2582                                 i915_request_add(rq);
2583                         }
2584
2585                         rq = igt_request_alloc(hi.ctx, engine);
2586                         if (IS_ERR(rq))
2587                                 goto err_wedged;
2588
2589                         i915_request_get(rq);
2590                         i915_request_add(rq);
2591                         engine->schedule(rq, &attr);
2592
2593                         igt_spinner_end(&hi.spin);
2594                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2595                                 struct drm_printer p =
2596                                         drm_info_printer(gt->i915->drm.dev);
2597
2598                                 pr_err("Failed to preempt over chain of %d\n",
2599                                        count);
2600                                 intel_engine_dump(engine, &p,
2601                                                   "%s\n", engine->name);
2602                                 i915_request_put(rq);
2603                                 goto err_wedged;
2604                         }
2605                         igt_spinner_end(&lo.spin);
2606                         i915_request_put(rq);
2607
2608                         rq = igt_request_alloc(lo.ctx, engine);
2609                         if (IS_ERR(rq))
2610                                 goto err_wedged;
2611
2612                         i915_request_get(rq);
2613                         i915_request_add(rq);
2614
2615                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2616                                 struct drm_printer p =
2617                                         drm_info_printer(gt->i915->drm.dev);
2618
2619                                 pr_err("Failed to flush low priority chain of %d requests\n",
2620                                        count);
2621                                 intel_engine_dump(engine, &p,
2622                                                   "%s\n", engine->name);
2623
2624                                 i915_request_put(rq);
2625                                 goto err_wedged;
2626                         }
2627                         i915_request_put(rq);
2628                 }
2629
2630                 if (igt_live_test_end(&t)) {
2631                         err = -EIO;
2632                         goto err_wedged;
2633                 }
2634         }
2635
2636         err = 0;
2637 err_client_lo:
2638         preempt_client_fini(&lo);
2639 err_client_hi:
2640         preempt_client_fini(&hi);
2641         return err;
2642
2643 err_wedged:
2644         igt_spinner_end(&hi.spin);
2645         igt_spinner_end(&lo.spin);
2646         intel_gt_set_wedged(gt);
2647         err = -EIO;
2648         goto err_client_lo;
2649 }
2650
2651 static int create_gang(struct intel_engine_cs *engine,
2652                        struct i915_request **prev)
2653 {
2654         struct drm_i915_gem_object *obj;
2655         struct intel_context *ce;
2656         struct i915_request *rq;
2657         struct i915_vma *vma;
2658         u32 *cs;
2659         int err;
2660
2661         ce = intel_context_create(engine);
2662         if (IS_ERR(ce))
2663                 return PTR_ERR(ce);
2664
2665         obj = i915_gem_object_create_internal(engine->i915, 4096);
2666         if (IS_ERR(obj)) {
2667                 err = PTR_ERR(obj);
2668                 goto err_ce;
2669         }
2670
2671         vma = i915_vma_instance(obj, ce->vm, NULL);
2672         if (IS_ERR(vma)) {
2673                 err = PTR_ERR(vma);
2674                 goto err_obj;
2675         }
2676
2677         err = i915_vma_pin(vma, 0, 0, PIN_USER);
2678         if (err)
2679                 goto err_obj;
2680
2681         cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2682         if (IS_ERR(cs))
2683                 goto err_obj;
2684
2685         /* Semaphore target: spin until zero */
2686         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2687
2688         *cs++ = MI_SEMAPHORE_WAIT |
2689                 MI_SEMAPHORE_POLL |
2690                 MI_SEMAPHORE_SAD_EQ_SDD;
2691         *cs++ = 0;
2692         *cs++ = lower_32_bits(vma->node.start);
2693         *cs++ = upper_32_bits(vma->node.start);
2694
2695         if (*prev) {
2696                 u64 offset = (*prev)->batch->node.start;
2697
2698                 /* Terminate the spinner in the next lower priority batch. */
2699                 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2700                 *cs++ = lower_32_bits(offset);
2701                 *cs++ = upper_32_bits(offset);
2702                 *cs++ = 0;
2703         }
2704
2705         *cs++ = MI_BATCH_BUFFER_END;
2706         i915_gem_object_flush_map(obj);
2707         i915_gem_object_unpin_map(obj);
2708
2709         rq = intel_context_create_request(ce);
2710         if (IS_ERR(rq))
2711                 goto err_obj;
2712
2713         rq->batch = i915_vma_get(vma);
2714         i915_request_get(rq);
2715
2716         i915_vma_lock(vma);
2717         err = i915_request_await_object(rq, vma->obj, false);
2718         if (!err)
2719                 err = i915_vma_move_to_active(vma, rq, 0);
2720         if (!err)
2721                 err = rq->engine->emit_bb_start(rq,
2722                                                 vma->node.start,
2723                                                 PAGE_SIZE, 0);
2724         i915_vma_unlock(vma);
2725         i915_request_add(rq);
2726         if (err)
2727                 goto err_rq;
2728
2729         i915_gem_object_put(obj);
2730         intel_context_put(ce);
2731
2732         rq->mock.link.next = &(*prev)->mock.link;
2733         *prev = rq;
2734         return 0;
2735
2736 err_rq:
2737         i915_vma_put(rq->batch);
2738         i915_request_put(rq);
2739 err_obj:
2740         i915_gem_object_put(obj);
2741 err_ce:
2742         intel_context_put(ce);
2743         return err;
2744 }
2745
2746 static int __live_preempt_ring(struct intel_engine_cs *engine,
2747                                struct igt_spinner *spin,
2748                                int queue_sz, int ring_sz)
2749 {
2750         struct intel_context *ce[2] = {};
2751         struct i915_request *rq;
2752         struct igt_live_test t;
2753         int err = 0;
2754         int n;
2755
2756         if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2757                 return -EIO;
2758
2759         for (n = 0; n < ARRAY_SIZE(ce); n++) {
2760                 struct intel_context *tmp;
2761
2762                 tmp = intel_context_create(engine);
2763                 if (IS_ERR(tmp)) {
2764                         err = PTR_ERR(tmp);
2765                         goto err_ce;
2766                 }
2767
2768                 tmp->ring = __intel_context_ring_size(ring_sz);
2769
2770                 err = intel_context_pin(tmp);
2771                 if (err) {
2772                         intel_context_put(tmp);
2773                         goto err_ce;
2774                 }
2775
2776                 memset32(tmp->ring->vaddr,
2777                          0xdeadbeef, /* trigger a hang if executed */
2778                          tmp->ring->vma->size / sizeof(u32));
2779
2780                 ce[n] = tmp;
2781         }
2782
2783         rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2784         if (IS_ERR(rq)) {
2785                 err = PTR_ERR(rq);
2786                 goto err_ce;
2787         }
2788
2789         i915_request_get(rq);
2790         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2791         i915_request_add(rq);
2792
2793         if (!igt_wait_for_spinner(spin, rq)) {
2794                 intel_gt_set_wedged(engine->gt);
2795                 i915_request_put(rq);
2796                 err = -ETIME;
2797                 goto err_ce;
2798         }
2799
2800         /* Fill the ring, until we will cause a wrap */
2801         n = 0;
2802         while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2803                 struct i915_request *tmp;
2804
2805                 tmp = intel_context_create_request(ce[0]);
2806                 if (IS_ERR(tmp)) {
2807                         err = PTR_ERR(tmp);
2808                         i915_request_put(rq);
2809                         goto err_ce;
2810                 }
2811
2812                 i915_request_add(tmp);
2813                 intel_engine_flush_submission(engine);
2814                 n++;
2815         }
2816         intel_engine_flush_submission(engine);
2817         pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2818                  engine->name, queue_sz, n,
2819                  ce[0]->ring->size,
2820                  ce[0]->ring->tail,
2821                  ce[0]->ring->emit,
2822                  rq->tail);
2823         i915_request_put(rq);
2824
2825         /* Create a second request to preempt the first ring */
2826         rq = intel_context_create_request(ce[1]);
2827         if (IS_ERR(rq)) {
2828                 err = PTR_ERR(rq);
2829                 goto err_ce;
2830         }
2831
2832         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2833         i915_request_get(rq);
2834         i915_request_add(rq);
2835
2836         err = wait_for_submit(engine, rq, HZ / 2);
2837         i915_request_put(rq);
2838         if (err) {
2839                 pr_err("%s: preemption request was not submited\n",
2840                        engine->name);
2841                 err = -ETIME;
2842         }
2843
2844         pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2845                  engine->name,
2846                  ce[0]->ring->tail, ce[0]->ring->emit,
2847                  ce[1]->ring->tail, ce[1]->ring->emit);
2848
2849 err_ce:
2850         intel_engine_flush_submission(engine);
2851         igt_spinner_end(spin);
2852         for (n = 0; n < ARRAY_SIZE(ce); n++) {
2853                 if (IS_ERR_OR_NULL(ce[n]))
2854                         break;
2855
2856                 intel_context_unpin(ce[n]);
2857                 intel_context_put(ce[n]);
2858         }
2859         if (igt_live_test_end(&t))
2860                 err = -EIO;
2861         return err;
2862 }
2863
2864 static int live_preempt_ring(void *arg)
2865 {
2866         struct intel_gt *gt = arg;
2867         struct intel_engine_cs *engine;
2868         struct igt_spinner spin;
2869         enum intel_engine_id id;
2870         int err = 0;
2871
2872         /*
2873          * Check that we rollback large chunks of a ring in order to do a
2874          * preemption event. Similar to live_unlite_ring, but looking at
2875          * ring size rather than the impact of intel_ring_direction().
2876          */
2877
2878         if (igt_spinner_init(&spin, gt))
2879                 return -ENOMEM;
2880
2881         for_each_engine(engine, gt, id) {
2882                 int n;
2883
2884                 if (!intel_engine_has_preemption(engine))
2885                         continue;
2886
2887                 if (!intel_engine_can_store_dword(engine))
2888                         continue;
2889
2890                 st_engine_heartbeat_disable(engine);
2891
2892                 for (n = 0; n <= 3; n++) {
2893                         err = __live_preempt_ring(engine, &spin,
2894                                                   n * SZ_4K / 4, SZ_4K);
2895                         if (err)
2896                                 break;
2897                 }
2898
2899                 st_engine_heartbeat_enable(engine);
2900                 if (err)
2901                         break;
2902         }
2903
2904         igt_spinner_fini(&spin);
2905         return err;
2906 }
2907
2908 static int live_preempt_gang(void *arg)
2909 {
2910         struct intel_gt *gt = arg;
2911         struct intel_engine_cs *engine;
2912         enum intel_engine_id id;
2913
2914         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2915                 return 0;
2916
2917         /*
2918          * Build as long a chain of preempters as we can, with each
2919          * request higher priority than the last. Once we are ready, we release
2920          * the last batch which then precolates down the chain, each releasing
2921          * the next oldest in turn. The intent is to simply push as hard as we
2922          * can with the number of preemptions, trying to exceed narrow HW
2923          * limits. At a minimum, we insist that we can sort all the user
2924          * high priority levels into execution order.
2925          */
2926
2927         for_each_engine(engine, gt, id) {
2928                 struct i915_request *rq = NULL;
2929                 struct igt_live_test t;
2930                 IGT_TIMEOUT(end_time);
2931                 int prio = 0;
2932                 int err = 0;
2933                 u32 *cs;
2934
2935                 if (!intel_engine_has_preemption(engine))
2936                         continue;
2937
2938                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2939                         return -EIO;
2940
2941                 do {
2942                         struct i915_sched_attr attr = {
2943                                 .priority = I915_USER_PRIORITY(prio++),
2944                         };
2945
2946                         err = create_gang(engine, &rq);
2947                         if (err)
2948                                 break;
2949
2950                         /* Submit each spinner at increasing priority */
2951                         engine->schedule(rq, &attr);
2952                 } while (prio <= I915_PRIORITY_MAX &&
2953                          !__igt_timeout(end_time, NULL));
2954                 pr_debug("%s: Preempt chain of %d requests\n",
2955                          engine->name, prio);
2956
2957                 /*
2958                  * Such that the last spinner is the highest priority and
2959                  * should execute first. When that spinner completes,
2960                  * it will terminate the next lowest spinner until there
2961                  * are no more spinners and the gang is complete.
2962                  */
2963                 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2964                 if (!IS_ERR(cs)) {
2965                         *cs = 0;
2966                         i915_gem_object_unpin_map(rq->batch->obj);
2967                 } else {
2968                         err = PTR_ERR(cs);
2969                         intel_gt_set_wedged(gt);
2970                 }
2971
2972                 while (rq) { /* wait for each rq from highest to lowest prio */
2973                         struct i915_request *n = list_next_entry(rq, mock.link);
2974
2975                         if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2976                                 struct drm_printer p =
2977                                         drm_info_printer(engine->i915->drm.dev);
2978
2979                                 pr_err("Failed to flush chain of %d requests, at %d\n",
2980                                        prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2981                                 intel_engine_dump(engine, &p,
2982                                                   "%s\n", engine->name);
2983
2984                                 err = -ETIME;
2985                         }
2986
2987                         i915_vma_put(rq->batch);
2988                         i915_request_put(rq);
2989                         rq = n;
2990                 }
2991
2992                 if (igt_live_test_end(&t))
2993                         err = -EIO;
2994                 if (err)
2995                         return err;
2996         }
2997
2998         return 0;
2999 }
3000
3001 static struct i915_vma *
3002 create_gpr_user(struct intel_engine_cs *engine,
3003                 struct i915_vma *result,
3004                 unsigned int offset)
3005 {
3006         struct drm_i915_gem_object *obj;
3007         struct i915_vma *vma;
3008         u32 *cs;
3009         int err;
3010         int i;
3011
3012         obj = i915_gem_object_create_internal(engine->i915, 4096);
3013         if (IS_ERR(obj))
3014                 return ERR_CAST(obj);
3015
3016         vma = i915_vma_instance(obj, result->vm, NULL);
3017         if (IS_ERR(vma)) {
3018                 i915_gem_object_put(obj);
3019                 return vma;
3020         }
3021
3022         err = i915_vma_pin(vma, 0, 0, PIN_USER);
3023         if (err) {
3024                 i915_vma_put(vma);
3025                 return ERR_PTR(err);
3026         }
3027
3028         cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
3029         if (IS_ERR(cs)) {
3030                 i915_vma_put(vma);
3031                 return ERR_CAST(cs);
3032         }
3033
3034         /* All GPR are clear for new contexts. We use GPR(0) as a constant */
3035         *cs++ = MI_LOAD_REGISTER_IMM(1);
3036         *cs++ = CS_GPR(engine, 0);
3037         *cs++ = 1;
3038
3039         for (i = 1; i < NUM_GPR; i++) {
3040                 u64 addr;
3041
3042                 /*
3043                  * Perform: GPR[i]++
3044                  *
3045                  * As we read and write into the context saved GPR[i], if
3046                  * we restart this batch buffer from an earlier point, we
3047                  * will repeat the increment and store a value > 1.
3048                  */
3049                 *cs++ = MI_MATH(4);
3050                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3051                 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3052                 *cs++ = MI_MATH_ADD;
3053                 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3054
3055                 addr = result->node.start + offset + i * sizeof(*cs);
3056                 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
3057                 *cs++ = CS_GPR(engine, 2 * i);
3058                 *cs++ = lower_32_bits(addr);
3059                 *cs++ = upper_32_bits(addr);
3060
3061                 *cs++ = MI_SEMAPHORE_WAIT |
3062                         MI_SEMAPHORE_POLL |
3063                         MI_SEMAPHORE_SAD_GTE_SDD;
3064                 *cs++ = i;
3065                 *cs++ = lower_32_bits(result->node.start);
3066                 *cs++ = upper_32_bits(result->node.start);
3067         }
3068
3069         *cs++ = MI_BATCH_BUFFER_END;
3070         i915_gem_object_flush_map(obj);
3071         i915_gem_object_unpin_map(obj);
3072
3073         return vma;
3074 }
3075
3076 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3077 {
3078         struct drm_i915_gem_object *obj;
3079         struct i915_vma *vma;
3080         int err;
3081
3082         obj = i915_gem_object_create_internal(gt->i915, sz);
3083         if (IS_ERR(obj))
3084                 return ERR_CAST(obj);
3085
3086         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
3087         if (IS_ERR(vma)) {
3088                 i915_gem_object_put(obj);
3089                 return vma;
3090         }
3091
3092         err = i915_ggtt_pin(vma, NULL, 0, 0);
3093         if (err) {
3094                 i915_vma_put(vma);
3095                 return ERR_PTR(err);
3096         }
3097
3098         return vma;
3099 }
3100
3101 static struct i915_request *
3102 create_gpr_client(struct intel_engine_cs *engine,
3103                   struct i915_vma *global,
3104                   unsigned int offset)
3105 {
3106         struct i915_vma *batch, *vma;
3107         struct intel_context *ce;
3108         struct i915_request *rq;
3109         int err;
3110
3111         ce = intel_context_create(engine);
3112         if (IS_ERR(ce))
3113                 return ERR_CAST(ce);
3114
3115         vma = i915_vma_instance(global->obj, ce->vm, NULL);
3116         if (IS_ERR(vma)) {
3117                 err = PTR_ERR(vma);
3118                 goto out_ce;
3119         }
3120
3121         err = i915_vma_pin(vma, 0, 0, PIN_USER);
3122         if (err)
3123                 goto out_ce;
3124
3125         batch = create_gpr_user(engine, vma, offset);
3126         if (IS_ERR(batch)) {
3127                 err = PTR_ERR(batch);
3128                 goto out_vma;
3129         }
3130
3131         rq = intel_context_create_request(ce);
3132         if (IS_ERR(rq)) {
3133                 err = PTR_ERR(rq);
3134                 goto out_batch;
3135         }
3136
3137         i915_vma_lock(vma);
3138         err = i915_request_await_object(rq, vma->obj, false);
3139         if (!err)
3140                 err = i915_vma_move_to_active(vma, rq, 0);
3141         i915_vma_unlock(vma);
3142
3143         i915_vma_lock(batch);
3144         if (!err)
3145                 err = i915_request_await_object(rq, batch->obj, false);
3146         if (!err)
3147                 err = i915_vma_move_to_active(batch, rq, 0);
3148         if (!err)
3149                 err = rq->engine->emit_bb_start(rq,
3150                                                 batch->node.start,
3151                                                 PAGE_SIZE, 0);
3152         i915_vma_unlock(batch);
3153         i915_vma_unpin(batch);
3154
3155         if (!err)
3156                 i915_request_get(rq);
3157         i915_request_add(rq);
3158
3159 out_batch:
3160         i915_vma_put(batch);
3161 out_vma:
3162         i915_vma_unpin(vma);
3163 out_ce:
3164         intel_context_put(ce);
3165         return err ? ERR_PTR(err) : rq;
3166 }
3167
3168 static int preempt_user(struct intel_engine_cs *engine,
3169                         struct i915_vma *global,
3170                         int id)
3171 {
3172         struct i915_sched_attr attr = {
3173                 .priority = I915_PRIORITY_MAX
3174         };
3175         struct i915_request *rq;
3176         int err = 0;
3177         u32 *cs;
3178
3179         rq = intel_engine_create_kernel_request(engine);
3180         if (IS_ERR(rq))
3181                 return PTR_ERR(rq);
3182
3183         cs = intel_ring_begin(rq, 4);
3184         if (IS_ERR(cs)) {
3185                 i915_request_add(rq);
3186                 return PTR_ERR(cs);
3187         }
3188
3189         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3190         *cs++ = i915_ggtt_offset(global);
3191         *cs++ = 0;
3192         *cs++ = id;
3193
3194         intel_ring_advance(rq, cs);
3195
3196         i915_request_get(rq);
3197         i915_request_add(rq);
3198
3199         engine->schedule(rq, &attr);
3200
3201         if (i915_request_wait(rq, 0, HZ / 2) < 0)
3202                 err = -ETIME;
3203         i915_request_put(rq);
3204
3205         return err;
3206 }
3207
3208 static int live_preempt_user(void *arg)
3209 {
3210         struct intel_gt *gt = arg;
3211         struct intel_engine_cs *engine;
3212         struct i915_vma *global;
3213         enum intel_engine_id id;
3214         u32 *result;
3215         int err = 0;
3216
3217         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3218                 return 0;
3219
3220         /*
3221          * In our other tests, we look at preemption in carefully
3222          * controlled conditions in the ringbuffer. Since most of the
3223          * time is spent in user batches, most of our preemptions naturally
3224          * occur there. We want to verify that when we preempt inside a batch
3225          * we continue on from the current instruction and do not roll back
3226          * to the start, or another earlier arbitration point.
3227          *
3228          * To verify this, we create a batch which is a mixture of
3229          * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3230          * a few preempting contexts thrown into the mix, we look for any
3231          * repeated instructions (which show up as incorrect values).
3232          */
3233
3234         global = create_global(gt, 4096);
3235         if (IS_ERR(global))
3236                 return PTR_ERR(global);
3237
3238         result = i915_gem_object_pin_map(global->obj, I915_MAP_WC);
3239         if (IS_ERR(result)) {
3240                 i915_vma_unpin_and_release(&global, 0);
3241                 return PTR_ERR(result);
3242         }
3243
3244         for_each_engine(engine, gt, id) {
3245                 struct i915_request *client[3] = {};
3246                 struct igt_live_test t;
3247                 int i;
3248
3249                 if (!intel_engine_has_preemption(engine))
3250                         continue;
3251
3252                 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
3253                         continue; /* we need per-context GPR */
3254
3255                 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3256                         err = -EIO;
3257                         break;
3258                 }
3259
3260                 memset(result, 0, 4096);
3261
3262                 for (i = 0; i < ARRAY_SIZE(client); i++) {
3263                         struct i915_request *rq;
3264
3265                         rq = create_gpr_client(engine, global,
3266                                                NUM_GPR * i * sizeof(u32));
3267                         if (IS_ERR(rq))
3268                                 goto end_test;
3269
3270                         client[i] = rq;
3271                 }
3272
3273                 /* Continuously preempt the set of 3 running contexts */
3274                 for (i = 1; i <= NUM_GPR; i++) {
3275                         err = preempt_user(engine, global, i);
3276                         if (err)
3277                                 goto end_test;
3278                 }
3279
3280                 if (READ_ONCE(result[0]) != NUM_GPR) {
3281                         pr_err("%s: Failed to release semaphore\n",
3282                                engine->name);
3283                         err = -EIO;
3284                         goto end_test;
3285                 }
3286
3287                 for (i = 0; i < ARRAY_SIZE(client); i++) {
3288                         int gpr;
3289
3290                         if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3291                                 err = -ETIME;
3292                                 goto end_test;
3293                         }
3294
3295                         for (gpr = 1; gpr < NUM_GPR; gpr++) {
3296                                 if (result[NUM_GPR * i + gpr] != 1) {
3297                                         pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3298                                                engine->name,
3299                                                i, gpr, result[NUM_GPR * i + gpr]);
3300                                         err = -EINVAL;
3301                                         goto end_test;
3302                                 }
3303                         }
3304                 }
3305
3306 end_test:
3307                 for (i = 0; i < ARRAY_SIZE(client); i++) {
3308                         if (!client[i])
3309                                 break;
3310
3311                         i915_request_put(client[i]);
3312                 }
3313
3314                 /* Flush the semaphores on error */
3315                 smp_store_mb(result[0], -1);
3316                 if (igt_live_test_end(&t))
3317                         err = -EIO;
3318                 if (err)
3319                         break;
3320         }
3321
3322         i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3323         return err;
3324 }
3325
3326 static int live_preempt_timeout(void *arg)
3327 {
3328         struct intel_gt *gt = arg;
3329         struct i915_gem_context *ctx_hi, *ctx_lo;
3330         struct igt_spinner spin_lo;
3331         struct intel_engine_cs *engine;
3332         enum intel_engine_id id;
3333         int err = -ENOMEM;
3334
3335         /*
3336          * Check that we force preemption to occur by cancelling the previous
3337          * context if it refuses to yield the GPU.
3338          */
3339         if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3340                 return 0;
3341
3342         if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3343                 return 0;
3344
3345         if (!intel_has_reset_engine(gt))
3346                 return 0;
3347
3348         if (igt_spinner_init(&spin_lo, gt))
3349                 return -ENOMEM;
3350
3351         ctx_hi = kernel_context(gt->i915);
3352         if (!ctx_hi)
3353                 goto err_spin_lo;
3354         ctx_hi->sched.priority =
3355                 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
3356
3357         ctx_lo = kernel_context(gt->i915);
3358         if (!ctx_lo)
3359                 goto err_ctx_hi;
3360         ctx_lo->sched.priority =
3361                 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
3362
3363         for_each_engine(engine, gt, id) {
3364                 unsigned long saved_timeout;
3365                 struct i915_request *rq;
3366
3367                 if (!intel_engine_has_preemption(engine))
3368                         continue;
3369
3370                 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3371                                             MI_NOOP); /* preemption disabled */
3372                 if (IS_ERR(rq)) {
3373                         err = PTR_ERR(rq);
3374                         goto err_ctx_lo;
3375                 }
3376
3377                 i915_request_add(rq);
3378                 if (!igt_wait_for_spinner(&spin_lo, rq)) {
3379                         intel_gt_set_wedged(gt);
3380                         err = -EIO;
3381                         goto err_ctx_lo;
3382                 }
3383
3384                 rq = igt_request_alloc(ctx_hi, engine);
3385                 if (IS_ERR(rq)) {
3386                         igt_spinner_end(&spin_lo);
3387                         err = PTR_ERR(rq);
3388                         goto err_ctx_lo;
3389                 }
3390
3391                 /* Flush the previous CS ack before changing timeouts */
3392                 while (READ_ONCE(engine->execlists.pending[0]))
3393                         cpu_relax();
3394
3395                 saved_timeout = engine->props.preempt_timeout_ms;
3396                 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3397
3398                 i915_request_get(rq);
3399                 i915_request_add(rq);
3400
3401                 intel_engine_flush_submission(engine);
3402                 engine->props.preempt_timeout_ms = saved_timeout;
3403
3404                 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3405                         intel_gt_set_wedged(gt);
3406                         i915_request_put(rq);
3407                         err = -ETIME;
3408                         goto err_ctx_lo;
3409                 }
3410
3411                 igt_spinner_end(&spin_lo);
3412                 i915_request_put(rq);
3413         }
3414
3415         err = 0;
3416 err_ctx_lo:
3417         kernel_context_close(ctx_lo);
3418 err_ctx_hi:
3419         kernel_context_close(ctx_hi);
3420 err_spin_lo:
3421         igt_spinner_fini(&spin_lo);
3422         return err;
3423 }
3424
3425 static int random_range(struct rnd_state *rnd, int min, int max)
3426 {
3427         return i915_prandom_u32_max_state(max - min, rnd) + min;
3428 }
3429
3430 static int random_priority(struct rnd_state *rnd)
3431 {
3432         return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3433 }
3434
3435 struct preempt_smoke {
3436         struct intel_gt *gt;
3437         struct i915_gem_context **contexts;
3438         struct intel_engine_cs *engine;
3439         struct drm_i915_gem_object *batch;
3440         unsigned int ncontext;
3441         struct rnd_state prng;
3442         unsigned long count;
3443 };
3444
3445 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3446 {
3447         return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3448                                                           &smoke->prng)];
3449 }
3450
3451 static int smoke_submit(struct preempt_smoke *smoke,
3452                         struct i915_gem_context *ctx, int prio,
3453                         struct drm_i915_gem_object *batch)
3454 {
3455         struct i915_request *rq;
3456         struct i915_vma *vma = NULL;
3457         int err = 0;
3458
3459         if (batch) {
3460                 struct i915_address_space *vm;
3461
3462                 vm = i915_gem_context_get_vm_rcu(ctx);
3463                 vma = i915_vma_instance(batch, vm, NULL);
3464                 i915_vm_put(vm);
3465                 if (IS_ERR(vma))
3466                         return PTR_ERR(vma);
3467
3468                 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3469                 if (err)
3470                         return err;
3471         }
3472
3473         ctx->sched.priority = prio;
3474
3475         rq = igt_request_alloc(ctx, smoke->engine);
3476         if (IS_ERR(rq)) {
3477                 err = PTR_ERR(rq);
3478                 goto unpin;
3479         }
3480
3481         if (vma) {
3482                 i915_vma_lock(vma);
3483                 err = i915_request_await_object(rq, vma->obj, false);
3484                 if (!err)
3485                         err = i915_vma_move_to_active(vma, rq, 0);
3486                 if (!err)
3487                         err = rq->engine->emit_bb_start(rq,
3488                                                         vma->node.start,
3489                                                         PAGE_SIZE, 0);
3490                 i915_vma_unlock(vma);
3491         }
3492
3493         i915_request_add(rq);
3494
3495 unpin:
3496         if (vma)
3497                 i915_vma_unpin(vma);
3498
3499         return err;
3500 }
3501
3502 static int smoke_crescendo_thread(void *arg)
3503 {
3504         struct preempt_smoke *smoke = arg;
3505         IGT_TIMEOUT(end_time);
3506         unsigned long count;
3507
3508         count = 0;
3509         do {
3510                 struct i915_gem_context *ctx = smoke_context(smoke);
3511                 int err;
3512
3513                 err = smoke_submit(smoke,
3514                                    ctx, count % I915_PRIORITY_MAX,
3515                                    smoke->batch);
3516                 if (err)
3517                         return err;
3518
3519                 count++;
3520         } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3521
3522         smoke->count = count;
3523         return 0;
3524 }
3525
3526 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3527 #define BATCH BIT(0)
3528 {
3529         struct task_struct *tsk[I915_NUM_ENGINES] = {};
3530         struct preempt_smoke arg[I915_NUM_ENGINES];
3531         struct intel_engine_cs *engine;
3532         enum intel_engine_id id;
3533         unsigned long count;
3534         int err = 0;
3535
3536         for_each_engine(engine, smoke->gt, id) {
3537                 arg[id] = *smoke;
3538                 arg[id].engine = engine;
3539                 if (!(flags & BATCH))
3540                         arg[id].batch = NULL;
3541                 arg[id].count = 0;
3542
3543                 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3544                                       "igt/smoke:%d", id);
3545                 if (IS_ERR(tsk[id])) {
3546                         err = PTR_ERR(tsk[id]);
3547                         break;
3548                 }
3549                 get_task_struct(tsk[id]);
3550         }
3551
3552         yield(); /* start all threads before we kthread_stop() */
3553
3554         count = 0;
3555         for_each_engine(engine, smoke->gt, id) {
3556                 int status;
3557
3558                 if (IS_ERR_OR_NULL(tsk[id]))
3559                         continue;
3560
3561                 status = kthread_stop(tsk[id]);
3562                 if (status && !err)
3563                         err = status;
3564
3565                 count += arg[id].count;
3566
3567                 put_task_struct(tsk[id]);
3568         }
3569
3570         pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3571                 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3572         return 0;
3573 }
3574
3575 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3576 {
3577         enum intel_engine_id id;
3578         IGT_TIMEOUT(end_time);
3579         unsigned long count;
3580
3581         count = 0;
3582         do {
3583                 for_each_engine(smoke->engine, smoke->gt, id) {
3584                         struct i915_gem_context *ctx = smoke_context(smoke);
3585                         int err;
3586
3587                         err = smoke_submit(smoke,
3588                                            ctx, random_priority(&smoke->prng),
3589                                            flags & BATCH ? smoke->batch : NULL);
3590                         if (err)
3591                                 return err;
3592
3593                         count++;
3594                 }
3595         } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3596
3597         pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3598                 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3599         return 0;
3600 }
3601
3602 static int live_preempt_smoke(void *arg)
3603 {
3604         struct preempt_smoke smoke = {
3605                 .gt = arg,
3606                 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3607                 .ncontext = 256,
3608         };
3609         const unsigned int phase[] = { 0, BATCH };
3610         struct igt_live_test t;
3611         int err = -ENOMEM;
3612         u32 *cs;
3613         int n;
3614
3615         if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
3616                 return 0;
3617
3618         smoke.contexts = kmalloc_array(smoke.ncontext,
3619                                        sizeof(*smoke.contexts),
3620                                        GFP_KERNEL);
3621         if (!smoke.contexts)
3622                 return -ENOMEM;
3623
3624         smoke.batch =
3625                 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3626         if (IS_ERR(smoke.batch)) {
3627                 err = PTR_ERR(smoke.batch);
3628                 goto err_free;
3629         }
3630
3631         cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
3632         if (IS_ERR(cs)) {
3633                 err = PTR_ERR(cs);
3634                 goto err_batch;
3635         }
3636         for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3637                 cs[n] = MI_ARB_CHECK;
3638         cs[n] = MI_BATCH_BUFFER_END;
3639         i915_gem_object_flush_map(smoke.batch);
3640         i915_gem_object_unpin_map(smoke.batch);
3641
3642         if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3643                 err = -EIO;
3644                 goto err_batch;
3645         }
3646
3647         for (n = 0; n < smoke.ncontext; n++) {
3648                 smoke.contexts[n] = kernel_context(smoke.gt->i915);
3649                 if (!smoke.contexts[n])
3650                         goto err_ctx;
3651         }
3652
3653         for (n = 0; n < ARRAY_SIZE(phase); n++) {
3654                 err = smoke_crescendo(&smoke, phase[n]);
3655                 if (err)
3656                         goto err_ctx;
3657
3658                 err = smoke_random(&smoke, phase[n]);
3659                 if (err)
3660                         goto err_ctx;
3661         }
3662
3663 err_ctx:
3664         if (igt_live_test_end(&t))
3665                 err = -EIO;
3666
3667         for (n = 0; n < smoke.ncontext; n++) {
3668                 if (!smoke.contexts[n])
3669                         break;
3670                 kernel_context_close(smoke.contexts[n]);
3671         }
3672
3673 err_batch:
3674         i915_gem_object_put(smoke.batch);
3675 err_free:
3676         kfree(smoke.contexts);
3677
3678         return err;
3679 }
3680
3681 static int nop_virtual_engine(struct intel_gt *gt,
3682                               struct intel_engine_cs **siblings,
3683                               unsigned int nsibling,
3684                               unsigned int nctx,
3685                               unsigned int flags)
3686 #define CHAIN BIT(0)
3687 {
3688         IGT_TIMEOUT(end_time);
3689         struct i915_request *request[16] = {};
3690         struct intel_context *ve[16];
3691         unsigned long n, prime, nc;
3692         struct igt_live_test t;
3693         ktime_t times[2] = {};
3694         int err;
3695
3696         GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3697
3698         for (n = 0; n < nctx; n++) {
3699                 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3700                 if (IS_ERR(ve[n])) {
3701                         err = PTR_ERR(ve[n]);
3702                         nctx = n;
3703                         goto out;
3704                 }
3705
3706                 err = intel_context_pin(ve[n]);
3707                 if (err) {
3708                         intel_context_put(ve[n]);
3709                         nctx = n;
3710                         goto out;
3711                 }
3712         }
3713
3714         err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3715         if (err)
3716                 goto out;
3717
3718         for_each_prime_number_from(prime, 1, 8192) {
3719                 times[1] = ktime_get_raw();
3720
3721                 if (flags & CHAIN) {
3722                         for (nc = 0; nc < nctx; nc++) {
3723                                 for (n = 0; n < prime; n++) {
3724                                         struct i915_request *rq;
3725
3726                                         rq = i915_request_create(ve[nc]);
3727                                         if (IS_ERR(rq)) {
3728                                                 err = PTR_ERR(rq);
3729                                                 goto out;
3730                                         }
3731
3732                                         if (request[nc])
3733                                                 i915_request_put(request[nc]);
3734                                         request[nc] = i915_request_get(rq);
3735                                         i915_request_add(rq);
3736                                 }
3737                         }
3738                 } else {
3739                         for (n = 0; n < prime; n++) {
3740                                 for (nc = 0; nc < nctx; nc++) {
3741                                         struct i915_request *rq;
3742
3743                                         rq = i915_request_create(ve[nc]);
3744                                         if (IS_ERR(rq)) {
3745                                                 err = PTR_ERR(rq);
3746                                                 goto out;
3747                                         }
3748
3749                                         if (request[nc])
3750                                                 i915_request_put(request[nc]);
3751                                         request[nc] = i915_request_get(rq);
3752                                         i915_request_add(rq);
3753                                 }
3754                         }
3755                 }
3756
3757                 for (nc = 0; nc < nctx; nc++) {
3758                         if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3759                                 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3760                                        __func__, ve[0]->engine->name,
3761                                        request[nc]->fence.context,
3762                                        request[nc]->fence.seqno);
3763
3764                                 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3765                                           __func__, ve[0]->engine->name,
3766                                           request[nc]->fence.context,
3767                                           request[nc]->fence.seqno);
3768                                 GEM_TRACE_DUMP();
3769                                 intel_gt_set_wedged(gt);
3770                                 break;
3771                         }
3772                 }
3773
3774                 times[1] = ktime_sub(ktime_get_raw(), times[1]);
3775                 if (prime == 1)
3776                         times[0] = times[1];
3777
3778                 for (nc = 0; nc < nctx; nc++) {
3779                         i915_request_put(request[nc]);
3780                         request[nc] = NULL;
3781                 }
3782
3783                 if (__igt_timeout(end_time, NULL))
3784                         break;
3785         }
3786
3787         err = igt_live_test_end(&t);
3788         if (err)
3789                 goto out;
3790
3791         pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3792                 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3793                 prime, div64_u64(ktime_to_ns(times[1]), prime));
3794
3795 out:
3796         if (igt_flush_test(gt->i915))
3797                 err = -EIO;
3798
3799         for (nc = 0; nc < nctx; nc++) {
3800                 i915_request_put(request[nc]);
3801                 intel_context_unpin(ve[nc]);
3802                 intel_context_put(ve[nc]);
3803         }
3804         return err;
3805 }
3806
3807 static unsigned int
3808 __select_siblings(struct intel_gt *gt,
3809                   unsigned int class,
3810                   struct intel_engine_cs **siblings,
3811                   bool (*filter)(const struct intel_engine_cs *))
3812 {
3813         unsigned int n = 0;
3814         unsigned int inst;
3815
3816         for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3817                 if (!gt->engine_class[class][inst])
3818                         continue;
3819
3820                 if (filter && !filter(gt->engine_class[class][inst]))
3821                         continue;
3822
3823                 siblings[n++] = gt->engine_class[class][inst];
3824         }
3825
3826         return n;
3827 }
3828
3829 static unsigned int
3830 select_siblings(struct intel_gt *gt,
3831                 unsigned int class,
3832                 struct intel_engine_cs **siblings)
3833 {
3834         return __select_siblings(gt, class, siblings, NULL);
3835 }
3836
3837 static int live_virtual_engine(void *arg)
3838 {
3839         struct intel_gt *gt = arg;
3840         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3841         struct intel_engine_cs *engine;
3842         enum intel_engine_id id;
3843         unsigned int class;
3844         int err;
3845
3846         if (intel_uc_uses_guc_submission(&gt->uc))
3847                 return 0;
3848
3849         for_each_engine(engine, gt, id) {
3850                 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3851                 if (err) {
3852                         pr_err("Failed to wrap engine %s: err=%d\n",
3853                                engine->name, err);
3854                         return err;
3855                 }
3856         }
3857
3858         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3859                 int nsibling, n;
3860
3861                 nsibling = select_siblings(gt, class, siblings);
3862                 if (nsibling < 2)
3863                         continue;
3864
3865                 for (n = 1; n <= nsibling + 1; n++) {
3866                         err = nop_virtual_engine(gt, siblings, nsibling,
3867                                                  n, 0);
3868                         if (err)
3869                                 return err;
3870                 }
3871
3872                 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3873                 if (err)
3874                         return err;
3875         }
3876
3877         return 0;
3878 }
3879
3880 static int mask_virtual_engine(struct intel_gt *gt,
3881                                struct intel_engine_cs **siblings,
3882                                unsigned int nsibling)
3883 {
3884         struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3885         struct intel_context *ve;
3886         struct igt_live_test t;
3887         unsigned int n;
3888         int err;
3889
3890         /*
3891          * Check that by setting the execution mask on a request, we can
3892          * restrict it to our desired engine within the virtual engine.
3893          */
3894
3895         ve = intel_execlists_create_virtual(siblings, nsibling);
3896         if (IS_ERR(ve)) {
3897                 err = PTR_ERR(ve);
3898                 goto out_close;
3899         }
3900
3901         err = intel_context_pin(ve);
3902         if (err)
3903                 goto out_put;
3904
3905         err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3906         if (err)
3907                 goto out_unpin;
3908
3909         for (n = 0; n < nsibling; n++) {
3910                 request[n] = i915_request_create(ve);
3911                 if (IS_ERR(request[n])) {
3912                         err = PTR_ERR(request[n]);
3913                         nsibling = n;
3914                         goto out;
3915                 }
3916
3917                 /* Reverse order as it's more likely to be unnatural */
3918                 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3919
3920                 i915_request_get(request[n]);
3921                 i915_request_add(request[n]);
3922         }
3923
3924         for (n = 0; n < nsibling; n++) {
3925                 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3926                         pr_err("%s(%s): wait for %llx:%lld timed out\n",
3927                                __func__, ve->engine->name,
3928                                request[n]->fence.context,
3929                                request[n]->fence.seqno);
3930
3931                         GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3932                                   __func__, ve->engine->name,
3933                                   request[n]->fence.context,
3934                                   request[n]->fence.seqno);
3935                         GEM_TRACE_DUMP();
3936                         intel_gt_set_wedged(gt);
3937                         err = -EIO;
3938                         goto out;
3939                 }
3940
3941                 if (request[n]->engine != siblings[nsibling - n - 1]) {
3942                         pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3943                                request[n]->engine->name,
3944                                siblings[nsibling - n - 1]->name);
3945                         err = -EINVAL;
3946                         goto out;
3947                 }
3948         }
3949
3950         err = igt_live_test_end(&t);
3951 out:
3952         if (igt_flush_test(gt->i915))
3953                 err = -EIO;
3954
3955         for (n = 0; n < nsibling; n++)
3956                 i915_request_put(request[n]);
3957
3958 out_unpin:
3959         intel_context_unpin(ve);
3960 out_put:
3961         intel_context_put(ve);
3962 out_close:
3963         return err;
3964 }
3965
3966 static int live_virtual_mask(void *arg)
3967 {
3968         struct intel_gt *gt = arg;
3969         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3970         unsigned int class;
3971         int err;
3972
3973         if (intel_uc_uses_guc_submission(&gt->uc))
3974                 return 0;
3975
3976         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3977                 unsigned int nsibling;
3978
3979                 nsibling = select_siblings(gt, class, siblings);
3980                 if (nsibling < 2)
3981                         continue;
3982
3983                 err = mask_virtual_engine(gt, siblings, nsibling);
3984                 if (err)
3985                         return err;
3986         }
3987
3988         return 0;
3989 }
3990
3991 static int slicein_virtual_engine(struct intel_gt *gt,
3992                                   struct intel_engine_cs **siblings,
3993                                   unsigned int nsibling)
3994 {
3995         const long timeout = slice_timeout(siblings[0]);
3996         struct intel_context *ce;
3997         struct i915_request *rq;
3998         struct igt_spinner spin;
3999         unsigned int n;
4000         int err = 0;
4001
4002         /*
4003          * Virtual requests must take part in timeslicing on the target engines.
4004          */
4005
4006         if (igt_spinner_init(&spin, gt))
4007                 return -ENOMEM;
4008
4009         for (n = 0; n < nsibling; n++) {
4010                 ce = intel_context_create(siblings[n]);
4011                 if (IS_ERR(ce)) {
4012                         err = PTR_ERR(ce);
4013                         goto out;
4014                 }
4015
4016                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4017                 intel_context_put(ce);
4018                 if (IS_ERR(rq)) {
4019                         err = PTR_ERR(rq);
4020                         goto out;
4021                 }
4022
4023                 i915_request_add(rq);
4024         }
4025
4026         ce = intel_execlists_create_virtual(siblings, nsibling);
4027         if (IS_ERR(ce)) {
4028                 err = PTR_ERR(ce);
4029                 goto out;
4030         }
4031
4032         rq = intel_context_create_request(ce);
4033         intel_context_put(ce);
4034         if (IS_ERR(rq)) {
4035                 err = PTR_ERR(rq);
4036                 goto out;
4037         }
4038
4039         i915_request_get(rq);
4040         i915_request_add(rq);
4041         if (i915_request_wait(rq, 0, timeout) < 0) {
4042                 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4043                               __func__, rq->engine->name);
4044                 GEM_TRACE_DUMP();
4045                 intel_gt_set_wedged(gt);
4046                 err = -EIO;
4047         }
4048         i915_request_put(rq);
4049
4050 out:
4051         igt_spinner_end(&spin);
4052         if (igt_flush_test(gt->i915))
4053                 err = -EIO;
4054         igt_spinner_fini(&spin);
4055         return err;
4056 }
4057
4058 static int sliceout_virtual_engine(struct intel_gt *gt,
4059                                    struct intel_engine_cs **siblings,
4060                                    unsigned int nsibling)
4061 {
4062         const long timeout = slice_timeout(siblings[0]);
4063         struct intel_context *ce;
4064         struct i915_request *rq;
4065         struct igt_spinner spin;
4066         unsigned int n;
4067         int err = 0;
4068
4069         /*
4070          * Virtual requests must allow others a fair timeslice.
4071          */
4072
4073         if (igt_spinner_init(&spin, gt))
4074                 return -ENOMEM;
4075
4076         /* XXX We do not handle oversubscription and fairness with normal rq */
4077         for (n = 0; n < nsibling; n++) {
4078                 ce = intel_execlists_create_virtual(siblings, nsibling);
4079                 if (IS_ERR(ce)) {
4080                         err = PTR_ERR(ce);
4081                         goto out;
4082                 }
4083
4084                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4085                 intel_context_put(ce);
4086                 if (IS_ERR(rq)) {
4087                         err = PTR_ERR(rq);
4088                         goto out;
4089                 }
4090
4091                 i915_request_add(rq);
4092         }
4093
4094         for (n = 0; !err && n < nsibling; n++) {
4095                 ce = intel_context_create(siblings[n]);
4096                 if (IS_ERR(ce)) {
4097                         err = PTR_ERR(ce);
4098                         goto out;
4099                 }
4100
4101                 rq = intel_context_create_request(ce);
4102                 intel_context_put(ce);
4103                 if (IS_ERR(rq)) {
4104                         err = PTR_ERR(rq);
4105                         goto out;
4106                 }
4107
4108                 i915_request_get(rq);
4109                 i915_request_add(rq);
4110                 if (i915_request_wait(rq, 0, timeout) < 0) {
4111                         GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4112                                       __func__, siblings[n]->name);
4113                         GEM_TRACE_DUMP();
4114                         intel_gt_set_wedged(gt);
4115                         err = -EIO;
4116                 }
4117                 i915_request_put(rq);
4118         }
4119
4120 out:
4121         igt_spinner_end(&spin);
4122         if (igt_flush_test(gt->i915))
4123                 err = -EIO;
4124         igt_spinner_fini(&spin);
4125         return err;
4126 }
4127
4128 static int live_virtual_slice(void *arg)
4129 {
4130         struct intel_gt *gt = arg;
4131         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4132         unsigned int class;
4133         int err;
4134
4135         if (intel_uc_uses_guc_submission(&gt->uc))
4136                 return 0;
4137
4138         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4139                 unsigned int nsibling;
4140
4141                 nsibling = __select_siblings(gt, class, siblings,
4142                                              intel_engine_has_timeslices);
4143                 if (nsibling < 2)
4144                         continue;
4145
4146                 err = slicein_virtual_engine(gt, siblings, nsibling);
4147                 if (err)
4148                         return err;
4149
4150                 err = sliceout_virtual_engine(gt, siblings, nsibling);
4151                 if (err)
4152                         return err;
4153         }
4154
4155         return 0;
4156 }
4157
4158 static int preserved_virtual_engine(struct intel_gt *gt,
4159                                     struct intel_engine_cs **siblings,
4160                                     unsigned int nsibling)
4161 {
4162         struct i915_request *last = NULL;
4163         struct intel_context *ve;
4164         struct i915_vma *scratch;
4165         struct igt_live_test t;
4166         unsigned int n;
4167         int err = 0;
4168         u32 *cs;
4169
4170         scratch = create_scratch(siblings[0]->gt);
4171         if (IS_ERR(scratch))
4172                 return PTR_ERR(scratch);
4173
4174         err = i915_vma_sync(scratch);
4175         if (err)
4176                 goto out_scratch;
4177
4178         ve = intel_execlists_create_virtual(siblings, nsibling);
4179         if (IS_ERR(ve)) {
4180                 err = PTR_ERR(ve);
4181                 goto out_scratch;
4182         }
4183
4184         err = intel_context_pin(ve);
4185         if (err)
4186                 goto out_put;
4187
4188         err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4189         if (err)
4190                 goto out_unpin;
4191
4192         for (n = 0; n < NUM_GPR_DW; n++) {
4193                 struct intel_engine_cs *engine = siblings[n % nsibling];
4194                 struct i915_request *rq;
4195
4196                 rq = i915_request_create(ve);
4197                 if (IS_ERR(rq)) {
4198                         err = PTR_ERR(rq);
4199                         goto out_end;
4200                 }
4201
4202                 i915_request_put(last);
4203                 last = i915_request_get(rq);
4204
4205                 cs = intel_ring_begin(rq, 8);
4206                 if (IS_ERR(cs)) {
4207                         i915_request_add(rq);
4208                         err = PTR_ERR(cs);
4209                         goto out_end;
4210                 }
4211
4212                 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4213                 *cs++ = CS_GPR(engine, n);
4214                 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4215                 *cs++ = 0;
4216
4217                 *cs++ = MI_LOAD_REGISTER_IMM(1);
4218                 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4219                 *cs++ = n + 1;
4220
4221                 *cs++ = MI_NOOP;
4222                 intel_ring_advance(rq, cs);
4223
4224                 /* Restrict this request to run on a particular engine */
4225                 rq->execution_mask = engine->mask;
4226                 i915_request_add(rq);
4227         }
4228
4229         if (i915_request_wait(last, 0, HZ / 5) < 0) {
4230                 err = -ETIME;
4231                 goto out_end;
4232         }
4233
4234         cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4235         if (IS_ERR(cs)) {
4236                 err = PTR_ERR(cs);
4237                 goto out_end;
4238         }
4239
4240         for (n = 0; n < NUM_GPR_DW; n++) {
4241                 if (cs[n] != n) {
4242                         pr_err("Incorrect value[%d] found for GPR[%d]\n",
4243                                cs[n], n);
4244                         err = -EINVAL;
4245                         break;
4246                 }
4247         }
4248
4249         i915_gem_object_unpin_map(scratch->obj);
4250
4251 out_end:
4252         if (igt_live_test_end(&t))
4253                 err = -EIO;
4254         i915_request_put(last);
4255 out_unpin:
4256         intel_context_unpin(ve);
4257 out_put:
4258         intel_context_put(ve);
4259 out_scratch:
4260         i915_vma_unpin_and_release(&scratch, 0);
4261         return err;
4262 }
4263
4264 static int live_virtual_preserved(void *arg)
4265 {
4266         struct intel_gt *gt = arg;
4267         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4268         unsigned int class;
4269
4270         /*
4271          * Check that the context image retains non-privileged (user) registers
4272          * from one engine to the next. For this we check that the CS_GPR
4273          * are preserved.
4274          */
4275
4276         if (intel_uc_uses_guc_submission(&gt->uc))
4277                 return 0;
4278
4279         /* As we use CS_GPR we cannot run before they existed on all engines. */
4280         if (INTEL_GEN(gt->i915) < 9)
4281                 return 0;
4282
4283         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4284                 int nsibling, err;
4285
4286                 nsibling = select_siblings(gt, class, siblings);
4287                 if (nsibling < 2)
4288                         continue;
4289
4290                 err = preserved_virtual_engine(gt, siblings, nsibling);
4291                 if (err)
4292                         return err;
4293         }
4294
4295         return 0;
4296 }
4297
4298 static int bond_virtual_engine(struct intel_gt *gt,
4299                                unsigned int class,
4300                                struct intel_engine_cs **siblings,
4301                                unsigned int nsibling,
4302                                unsigned int flags)
4303 #define BOND_SCHEDULE BIT(0)
4304 {
4305         struct intel_engine_cs *master;
4306         struct i915_request *rq[16];
4307         enum intel_engine_id id;
4308         struct igt_spinner spin;
4309         unsigned long n;
4310         int err;
4311
4312         /*
4313          * A set of bonded requests is intended to be run concurrently
4314          * across a number of engines. We use one request per-engine
4315          * and a magic fence to schedule each of the bonded requests
4316          * at the same time. A consequence of our current scheduler is that
4317          * we only move requests to the HW ready queue when the request
4318          * becomes ready, that is when all of its prerequisite fences have
4319          * been signaled. As one of those fences is the master submit fence,
4320          * there is a delay on all secondary fences as the HW may be
4321          * currently busy. Equally, as all the requests are independent,
4322          * they may have other fences that delay individual request
4323          * submission to HW. Ergo, we do not guarantee that all requests are
4324          * immediately submitted to HW at the same time, just that if the
4325          * rules are abided by, they are ready at the same time as the
4326          * first is submitted. Userspace can embed semaphores in its batch
4327          * to ensure parallel execution of its phases as it requires.
4328          * Though naturally it gets requested that perhaps the scheduler should
4329          * take care of parallel execution, even across preemption events on
4330          * different HW. (The proper answer is of course "lalalala".)
4331          *
4332          * With the submit-fence, we have identified three possible phases
4333          * of synchronisation depending on the master fence: queued (not
4334          * ready), executing, and signaled. The first two are quite simple
4335          * and checked below. However, the signaled master fence handling is
4336          * contentious. Currently we do not distinguish between a signaled
4337          * fence and an expired fence, as once signaled it does not convey
4338          * any information about the previous execution. It may even be freed
4339          * and hence checking later it may not exist at all. Ergo we currently
4340          * do not apply the bonding constraint for an already signaled fence,
4341          * as our expectation is that it should not constrain the secondaries
4342          * and is outside of the scope of the bonded request API (i.e. all
4343          * userspace requests are meant to be running in parallel). As
4344          * it imposes no constraint, and is effectively a no-op, we do not
4345          * check below as normal execution flows are checked extensively above.
4346          *
4347          * XXX Is the degenerate handling of signaled submit fences the
4348          * expected behaviour for userpace?
4349          */
4350
4351         GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4352
4353         if (igt_spinner_init(&spin, gt))
4354                 return -ENOMEM;
4355
4356         err = 0;
4357         rq[0] = ERR_PTR(-ENOMEM);
4358         for_each_engine(master, gt, id) {
4359                 struct i915_sw_fence fence = {};
4360                 struct intel_context *ce;
4361
4362                 if (master->class == class)
4363                         continue;
4364
4365                 ce = intel_context_create(master);
4366                 if (IS_ERR(ce)) {
4367                         err = PTR_ERR(ce);
4368                         goto out;
4369                 }
4370
4371                 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4372
4373                 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4374                 intel_context_put(ce);
4375                 if (IS_ERR(rq[0])) {
4376                         err = PTR_ERR(rq[0]);
4377                         goto out;
4378                 }
4379                 i915_request_get(rq[0]);
4380
4381                 if (flags & BOND_SCHEDULE) {
4382                         onstack_fence_init(&fence);
4383                         err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4384                                                                &fence,
4385                                                                GFP_KERNEL);
4386                 }
4387
4388                 i915_request_add(rq[0]);
4389                 if (err < 0)
4390                         goto out;
4391
4392                 if (!(flags & BOND_SCHEDULE) &&
4393                     !igt_wait_for_spinner(&spin, rq[0])) {
4394                         err = -EIO;
4395                         goto out;
4396                 }
4397
4398                 for (n = 0; n < nsibling; n++) {
4399                         struct intel_context *ve;
4400
4401                         ve = intel_execlists_create_virtual(siblings, nsibling);
4402                         if (IS_ERR(ve)) {
4403                                 err = PTR_ERR(ve);
4404                                 onstack_fence_fini(&fence);
4405                                 goto out;
4406                         }
4407
4408                         err = intel_virtual_engine_attach_bond(ve->engine,
4409                                                                master,
4410                                                                siblings[n]);
4411                         if (err) {
4412                                 intel_context_put(ve);
4413                                 onstack_fence_fini(&fence);
4414                                 goto out;
4415                         }
4416
4417                         err = intel_context_pin(ve);
4418                         intel_context_put(ve);
4419                         if (err) {
4420                                 onstack_fence_fini(&fence);
4421                                 goto out;
4422                         }
4423
4424                         rq[n + 1] = i915_request_create(ve);
4425                         intel_context_unpin(ve);
4426                         if (IS_ERR(rq[n + 1])) {
4427                                 err = PTR_ERR(rq[n + 1]);
4428                                 onstack_fence_fini(&fence);
4429                                 goto out;
4430                         }
4431                         i915_request_get(rq[n + 1]);
4432
4433                         err = i915_request_await_execution(rq[n + 1],
4434                                                            &rq[0]->fence,
4435                                                            ve->engine->bond_execute);
4436                         i915_request_add(rq[n + 1]);
4437                         if (err < 0) {
4438                                 onstack_fence_fini(&fence);
4439                                 goto out;
4440                         }
4441                 }
4442                 onstack_fence_fini(&fence);
4443                 intel_engine_flush_submission(master);
4444                 igt_spinner_end(&spin);
4445
4446                 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4447                         pr_err("Master request did not execute (on %s)!\n",
4448                                rq[0]->engine->name);
4449                         err = -EIO;
4450                         goto out;
4451                 }
4452
4453                 for (n = 0; n < nsibling; n++) {
4454                         if (i915_request_wait(rq[n + 1], 0,
4455                                               MAX_SCHEDULE_TIMEOUT) < 0) {
4456                                 err = -EIO;
4457                                 goto out;
4458                         }
4459
4460                         if (rq[n + 1]->engine != siblings[n]) {
4461                                 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4462                                        siblings[n]->name,
4463                                        rq[n + 1]->engine->name,
4464                                        rq[0]->engine->name);
4465                                 err = -EINVAL;
4466                                 goto out;
4467                         }
4468                 }
4469
4470                 for (n = 0; !IS_ERR(rq[n]); n++)
4471                         i915_request_put(rq[n]);
4472                 rq[0] = ERR_PTR(-ENOMEM);
4473         }
4474
4475 out:
4476         for (n = 0; !IS_ERR(rq[n]); n++)
4477                 i915_request_put(rq[n]);
4478         if (igt_flush_test(gt->i915))
4479                 err = -EIO;
4480
4481         igt_spinner_fini(&spin);
4482         return err;
4483 }
4484
4485 static int live_virtual_bond(void *arg)
4486 {
4487         static const struct phase {
4488                 const char *name;
4489                 unsigned int flags;
4490         } phases[] = {
4491                 { "", 0 },
4492                 { "schedule", BOND_SCHEDULE },
4493                 { },
4494         };
4495         struct intel_gt *gt = arg;
4496         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4497         unsigned int class;
4498         int err;
4499
4500         if (intel_uc_uses_guc_submission(&gt->uc))
4501                 return 0;
4502
4503         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4504                 const struct phase *p;
4505                 int nsibling;
4506
4507                 nsibling = select_siblings(gt, class, siblings);
4508                 if (nsibling < 2)
4509                         continue;
4510
4511                 for (p = phases; p->name; p++) {
4512                         err = bond_virtual_engine(gt,
4513                                                   class, siblings, nsibling,
4514                                                   p->flags);
4515                         if (err) {
4516                                 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4517                                        __func__, p->name, class, nsibling, err);
4518                                 return err;
4519                         }
4520                 }
4521         }
4522
4523         return 0;
4524 }
4525
4526 static int reset_virtual_engine(struct intel_gt *gt,
4527                                 struct intel_engine_cs **siblings,
4528                                 unsigned int nsibling)
4529 {
4530         struct intel_engine_cs *engine;
4531         struct intel_context *ve;
4532         struct igt_spinner spin;
4533         struct i915_request *rq;
4534         unsigned int n;
4535         int err = 0;
4536
4537         /*
4538          * In order to support offline error capture for fast preempt reset,
4539          * we need to decouple the guilty request and ensure that it and its
4540          * descendents are not executed while the capture is in progress.
4541          */
4542
4543         if (igt_spinner_init(&spin, gt))
4544                 return -ENOMEM;
4545
4546         ve = intel_execlists_create_virtual(siblings, nsibling);
4547         if (IS_ERR(ve)) {
4548                 err = PTR_ERR(ve);
4549                 goto out_spin;
4550         }
4551
4552         for (n = 0; n < nsibling; n++)
4553                 st_engine_heartbeat_disable(siblings[n]);
4554
4555         rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4556         if (IS_ERR(rq)) {
4557                 err = PTR_ERR(rq);
4558                 goto out_heartbeat;
4559         }
4560         i915_request_add(rq);
4561
4562         if (!igt_wait_for_spinner(&spin, rq)) {
4563                 intel_gt_set_wedged(gt);
4564                 err = -ETIME;
4565                 goto out_heartbeat;
4566         }
4567
4568         engine = rq->engine;
4569         GEM_BUG_ON(engine == ve->engine);
4570
4571         /* Take ownership of the reset and tasklet */
4572         if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
4573                              &gt->reset.flags)) {
4574                 intel_gt_set_wedged(gt);
4575                 err = -EBUSY;
4576                 goto out_heartbeat;
4577         }
4578         tasklet_disable(&engine->execlists.tasklet);
4579
4580         engine->execlists.tasklet.func(engine->execlists.tasklet.data);
4581         GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4582
4583         /* Fake a preemption event; failed of course */
4584         spin_lock_irq(&engine->active.lock);
4585         __unwind_incomplete_requests(engine);
4586         spin_unlock_irq(&engine->active.lock);
4587         GEM_BUG_ON(rq->engine != ve->engine);
4588
4589         /* Reset the engine while keeping our active request on hold */
4590         execlists_hold(engine, rq);
4591         GEM_BUG_ON(!i915_request_on_hold(rq));
4592
4593         intel_engine_reset(engine, NULL);
4594         GEM_BUG_ON(rq->fence.error != -EIO);
4595
4596         /* Release our grasp on the engine, letting CS flow again */
4597         tasklet_enable(&engine->execlists.tasklet);
4598         clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
4599
4600         /* Check that we do not resubmit the held request */
4601         i915_request_get(rq);
4602         if (!i915_request_wait(rq, 0, HZ / 5)) {
4603                 pr_err("%s: on hold request completed!\n",
4604                        engine->name);
4605                 intel_gt_set_wedged(gt);
4606                 err = -EIO;
4607                 goto out_rq;
4608         }
4609         GEM_BUG_ON(!i915_request_on_hold(rq));
4610
4611         /* But is resubmitted on release */
4612         execlists_unhold(engine, rq);
4613         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4614                 pr_err("%s: held request did not complete!\n",
4615                        engine->name);
4616                 intel_gt_set_wedged(gt);
4617                 err = -ETIME;
4618         }
4619
4620 out_rq:
4621         i915_request_put(rq);
4622 out_heartbeat:
4623         for (n = 0; n < nsibling; n++)
4624                 st_engine_heartbeat_enable(siblings[n]);
4625
4626         intel_context_put(ve);
4627 out_spin:
4628         igt_spinner_fini(&spin);
4629         return err;
4630 }
4631
4632 static int live_virtual_reset(void *arg)
4633 {
4634         struct intel_gt *gt = arg;
4635         struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4636         unsigned int class;
4637
4638         /*
4639          * Check that we handle a reset event within a virtual engine.
4640          * Only the physical engine is reset, but we have to check the flow
4641          * of the virtual requests around the reset, and make sure it is not
4642          * forgotten.
4643          */
4644
4645         if (intel_uc_uses_guc_submission(&gt->uc))
4646                 return 0;
4647
4648         if (!intel_has_reset_engine(gt))
4649                 return 0;
4650
4651         for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4652                 int nsibling, err;
4653
4654                 nsibling = select_siblings(gt, class, siblings);
4655                 if (nsibling < 2)
4656                         continue;
4657
4658                 err = reset_virtual_engine(gt, siblings, nsibling);
4659                 if (err)
4660                         return err;
4661         }
4662
4663         return 0;
4664 }
4665
4666 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4667 {
4668         static const struct i915_subtest tests[] = {
4669                 SUBTEST(live_sanitycheck),
4670                 SUBTEST(live_unlite_switch),
4671                 SUBTEST(live_unlite_preempt),
4672                 SUBTEST(live_unlite_ring),
4673                 SUBTEST(live_pin_rewind),
4674                 SUBTEST(live_hold_reset),
4675                 SUBTEST(live_error_interrupt),
4676                 SUBTEST(live_timeslice_preempt),
4677                 SUBTEST(live_timeslice_rewind),
4678                 SUBTEST(live_timeslice_queue),
4679                 SUBTEST(live_timeslice_nopreempt),
4680                 SUBTEST(live_busywait_preempt),
4681                 SUBTEST(live_preempt),
4682                 SUBTEST(live_late_preempt),
4683                 SUBTEST(live_nopreempt),
4684                 SUBTEST(live_preempt_cancel),
4685                 SUBTEST(live_suppress_self_preempt),
4686                 SUBTEST(live_chain_preempt),
4687                 SUBTEST(live_preempt_ring),
4688                 SUBTEST(live_preempt_gang),
4689                 SUBTEST(live_preempt_timeout),
4690                 SUBTEST(live_preempt_user),
4691                 SUBTEST(live_preempt_smoke),
4692                 SUBTEST(live_virtual_engine),
4693                 SUBTEST(live_virtual_mask),
4694                 SUBTEST(live_virtual_preserved),
4695                 SUBTEST(live_virtual_slice),
4696                 SUBTEST(live_virtual_bond),
4697                 SUBTEST(live_virtual_reset),
4698         };
4699
4700         if (!HAS_EXECLISTS(i915))
4701                 return 0;
4702
4703         if (intel_gt_is_wedged(&i915->gt))
4704                 return 0;
4705
4706         return intel_gt_live_subtests(tests, &i915->gt);
4707 }
4708
4709 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
4710 {
4711         const u32 offset =
4712                 i915_ggtt_offset(ce->engine->status_page.vma) +
4713                 offset_in_page(slot);
4714         struct i915_request *rq;
4715         u32 *cs;
4716
4717         rq = intel_context_create_request(ce);
4718         if (IS_ERR(rq))
4719                 return PTR_ERR(rq);
4720
4721         cs = intel_ring_begin(rq, 4);
4722         if (IS_ERR(cs)) {
4723                 i915_request_add(rq);
4724                 return PTR_ERR(cs);
4725         }
4726
4727         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
4728         *cs++ = offset;
4729         *cs++ = 0;
4730         *cs++ = 1;
4731
4732         intel_ring_advance(rq, cs);
4733
4734         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4735         i915_request_add(rq);
4736         return 0;
4737 }
4738
4739 static int context_flush(struct intel_context *ce, long timeout)
4740 {
4741         struct i915_request *rq;
4742         struct dma_fence *fence;
4743         int err = 0;
4744
4745         rq = intel_engine_create_kernel_request(ce->engine);
4746         if (IS_ERR(rq))
4747                 return PTR_ERR(rq);
4748
4749         fence = i915_active_fence_get(&ce->timeline->last_request);
4750         if (fence) {
4751                 i915_request_await_dma_fence(rq, fence);
4752                 dma_fence_put(fence);
4753         }
4754
4755         rq = i915_request_get(rq);
4756         i915_request_add(rq);
4757         if (i915_request_wait(rq, 0, timeout) < 0)
4758                 err = -ETIME;
4759         i915_request_put(rq);
4760
4761         rmb(); /* We know the request is written, make sure all state is too! */
4762         return err;
4763 }
4764
4765 static int live_lrc_layout(void *arg)
4766 {
4767         struct intel_gt *gt = arg;
4768         struct intel_engine_cs *engine;
4769         enum intel_engine_id id;
4770         u32 *lrc;
4771         int err;
4772
4773         /*
4774          * Check the registers offsets we use to create the initial reg state
4775          * match the layout saved by HW.
4776          */
4777
4778         lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
4779         if (!lrc)
4780                 return -ENOMEM;
4781
4782         err = 0;
4783         for_each_engine(engine, gt, id) {
4784                 u32 *hw;
4785                 int dw;
4786
4787                 if (!engine->default_state)
4788                         continue;
4789
4790                 hw = shmem_pin_map(engine->default_state);
4791                 if (!hw) {
4792                         err = -ENOMEM;
4793                         break;
4794                 }
4795                 hw += LRC_STATE_OFFSET / sizeof(*hw);
4796
4797                 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
4798                                          engine->kernel_context,
4799                                          engine,
4800                                          engine->kernel_context->ring,
4801                                          true);
4802
4803                 dw = 0;
4804                 do {
4805                         u32 lri = hw[dw];
4806
4807                         if (lri == 0) {
4808                                 dw++;
4809                                 continue;
4810                         }
4811
4812                         if (lrc[dw] == 0) {
4813                                 pr_debug("%s: skipped instruction %x at dword %d\n",
4814                                          engine->name, lri, dw);
4815                                 dw++;
4816                                 continue;
4817                         }
4818
4819                         if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4820                                 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4821                                        engine->name, dw, lri);
4822                                 err = -EINVAL;
4823                                 break;
4824                         }
4825
4826                         if (lrc[dw] != lri) {
4827                                 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4828                                        engine->name, dw, lri, lrc[dw]);
4829                                 err = -EINVAL;
4830                                 break;
4831                         }
4832
4833                         lri &= 0x7f;
4834                         lri++;
4835                         dw++;
4836
4837                         while (lri) {
4838                                 if (hw[dw] != lrc[dw]) {
4839                                         pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4840                                                engine->name, dw, hw[dw], lrc[dw]);
4841                                         err = -EINVAL;
4842                                         break;
4843                                 }
4844
4845                                 /*
4846                                  * Skip over the actual register value as we
4847                                  * expect that to differ.
4848                                  */
4849                                 dw += 2;
4850                                 lri -= 2;
4851                         }
4852                 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
4853
4854                 if (err) {
4855                         pr_info("%s: HW register image:\n", engine->name);
4856                         igt_hexdump(hw, PAGE_SIZE);
4857
4858                         pr_info("%s: SW register image:\n", engine->name);
4859                         igt_hexdump(lrc, PAGE_SIZE);
4860                 }
4861
4862                 shmem_unpin_map(engine->default_state, hw);
4863                 if (err)
4864                         break;
4865         }
4866
4867         kfree(lrc);
4868         return err;
4869 }
4870
4871 static int find_offset(const u32 *lri, u32 offset)
4872 {
4873         int i;
4874
4875         for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
4876                 if (lri[i] == offset)
4877                         return i;
4878
4879         return -1;
4880 }
4881
4882 static int live_lrc_fixed(void *arg)
4883 {
4884         struct intel_gt *gt = arg;
4885         struct intel_engine_cs *engine;
4886         enum intel_engine_id id;
4887         int err = 0;
4888
4889         /*
4890          * Check the assumed register offsets match the actual locations in
4891          * the context image.
4892          */
4893
4894         for_each_engine(engine, gt, id) {
4895                 const struct {
4896                         u32 reg;
4897                         u32 offset;
4898                         const char *name;
4899                 } tbl[] = {
4900                         {
4901                                 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
4902                                 CTX_RING_START - 1,
4903                                 "RING_START"
4904                         },
4905                         {
4906                                 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
4907                                 CTX_RING_CTL - 1,
4908                                 "RING_CTL"
4909                         },
4910                         {
4911                                 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
4912                                 CTX_RING_HEAD - 1,
4913                                 "RING_HEAD"
4914                         },
4915                         {
4916                                 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
4917                                 CTX_RING_TAIL - 1,
4918                                 "RING_TAIL"
4919                         },
4920                         {
4921                                 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
4922                                 lrc_ring_mi_mode(engine),
4923                                 "RING_MI_MODE"
4924                         },
4925                         {
4926                                 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
4927                                 CTX_BB_STATE - 1,
4928                                 "BB_STATE"
4929                         },
4930                         {
4931                                 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
4932                                 lrc_ring_wa_bb_per_ctx(engine),
4933                                 "RING_BB_PER_CTX_PTR"
4934                         },
4935                         {
4936                                 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
4937                                 lrc_ring_indirect_ptr(engine),
4938                                 "RING_INDIRECT_CTX_PTR"
4939                         },
4940                         {
4941                                 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
4942                                 lrc_ring_indirect_offset(engine),
4943                                 "RING_INDIRECT_CTX_OFFSET"
4944                         },
4945                         {
4946                                 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
4947                                 CTX_TIMESTAMP - 1,
4948                                 "RING_CTX_TIMESTAMP"
4949                         },
4950                         {
4951                                 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
4952                                 lrc_ring_gpr0(engine),
4953                                 "RING_CS_GPR0"
4954                         },
4955                         {
4956                                 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
4957                                 lrc_ring_cmd_buf_cctl(engine),
4958                                 "RING_CMD_BUF_CCTL"
4959                         },
4960                         { },
4961                 }, *t;
4962                 u32 *hw;
4963
4964                 if (!engine->default_state)
4965                         continue;
4966
4967                 hw = shmem_pin_map(engine->default_state);
4968                 if (!hw) {
4969                         err = -ENOMEM;
4970                         break;
4971                 }
4972                 hw += LRC_STATE_OFFSET / sizeof(*hw);
4973
4974                 for (t = tbl; t->name; t++) {
4975                         int dw = find_offset(hw, t->reg);
4976
4977                         if (dw != t->offset) {
4978                                 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4979                                        engine->name,
4980                                        t->name,
4981                                        t->reg,
4982                                        dw,
4983                                        t->offset);
4984                                 err = -EINVAL;
4985                         }
4986                 }
4987
4988                 shmem_unpin_map(engine->default_state, hw);
4989         }
4990
4991         return err;
4992 }
4993
4994 static int __live_lrc_state(struct intel_engine_cs *engine,
4995                             struct i915_vma *scratch)
4996 {
4997         struct intel_context *ce;
4998         struct i915_request *rq;
4999         struct i915_gem_ww_ctx ww;
5000         enum {
5001                 RING_START_IDX = 0,
5002                 RING_TAIL_IDX,
5003                 MAX_IDX
5004         };
5005         u32 expected[MAX_IDX];
5006         u32 *cs;
5007         int err;
5008         int n;
5009
5010         ce = intel_context_create(engine);
5011         if (IS_ERR(ce))
5012                 return PTR_ERR(ce);
5013
5014         i915_gem_ww_ctx_init(&ww, false);
5015 retry:
5016         err = i915_gem_object_lock(scratch->obj, &ww);
5017         if (!err)
5018                 err = intel_context_pin_ww(ce, &ww);
5019         if (err)
5020                 goto err_put;
5021
5022         rq = i915_request_create(ce);
5023         if (IS_ERR(rq)) {
5024                 err = PTR_ERR(rq);
5025                 goto err_unpin;
5026         }
5027
5028         cs = intel_ring_begin(rq, 4 * MAX_IDX);
5029         if (IS_ERR(cs)) {
5030                 err = PTR_ERR(cs);
5031                 i915_request_add(rq);
5032                 goto err_unpin;
5033         }
5034
5035         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5036         *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
5037         *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
5038         *cs++ = 0;
5039
5040         expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
5041
5042         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5043         *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
5044         *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
5045         *cs++ = 0;
5046
5047         err = i915_request_await_object(rq, scratch->obj, true);
5048         if (!err)
5049                 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
5050
5051         i915_request_get(rq);
5052         i915_request_add(rq);
5053         if (err)
5054                 goto err_rq;
5055
5056         intel_engine_flush_submission(engine);
5057         expected[RING_TAIL_IDX] = ce->ring->tail;
5058
5059         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
5060                 err = -ETIME;
5061                 goto err_rq;
5062         }
5063
5064         cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
5065         if (IS_ERR(cs)) {
5066                 err = PTR_ERR(cs);
5067                 goto err_rq;
5068         }
5069
5070         for (n = 0; n < MAX_IDX; n++) {
5071                 if (cs[n] != expected[n]) {
5072                         pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
5073                                engine->name, n, cs[n], expected[n]);
5074                         err = -EINVAL;
5075                         break;
5076                 }
5077         }
5078
5079         i915_gem_object_unpin_map(scratch->obj);
5080
5081 err_rq:
5082         i915_request_put(rq);
5083 err_unpin:
5084         intel_context_unpin(ce);
5085 err_put:
5086         if (err == -EDEADLK) {
5087                 err = i915_gem_ww_ctx_backoff(&ww);
5088                 if (!err)
5089                         goto retry;
5090         }
5091         i915_gem_ww_ctx_fini(&ww);
5092         intel_context_put(ce);
5093         return err;
5094 }
5095
5096 static int live_lrc_state(void *arg)
5097 {
5098         struct intel_gt *gt = arg;
5099         struct intel_engine_cs *engine;
5100         struct i915_vma *scratch;
5101         enum intel_engine_id id;
5102         int err = 0;
5103
5104         /*
5105          * Check the live register state matches what we expect for this
5106          * intel_context.
5107          */
5108
5109         scratch = create_scratch(gt);
5110         if (IS_ERR(scratch))
5111                 return PTR_ERR(scratch);
5112
5113         for_each_engine(engine, gt, id) {
5114                 err = __live_lrc_state(engine, scratch);
5115                 if (err)
5116                         break;
5117         }
5118
5119         if (igt_flush_test(gt->i915))
5120                 err = -EIO;
5121
5122         i915_vma_unpin_and_release(&scratch, 0);
5123         return err;
5124 }
5125
5126 static int gpr_make_dirty(struct intel_context *ce)
5127 {
5128         struct i915_request *rq;
5129         u32 *cs;
5130         int n;
5131
5132         rq = intel_context_create_request(ce);
5133         if (IS_ERR(rq))
5134                 return PTR_ERR(rq);
5135
5136         cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
5137         if (IS_ERR(cs)) {
5138                 i915_request_add(rq);
5139                 return PTR_ERR(cs);
5140         }
5141
5142         *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
5143         for (n = 0; n < NUM_GPR_DW; n++) {
5144                 *cs++ = CS_GPR(ce->engine, n);
5145                 *cs++ = STACK_MAGIC;
5146         }
5147         *cs++ = MI_NOOP;
5148
5149         intel_ring_advance(rq, cs);
5150
5151         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5152         i915_request_add(rq);
5153
5154         return 0;
5155 }
5156
5157 static struct i915_request *
5158 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
5159 {
5160         const u32 offset =
5161                 i915_ggtt_offset(ce->engine->status_page.vma) +
5162                 offset_in_page(slot);
5163         struct i915_request *rq;
5164         u32 *cs;
5165         int err;
5166         int n;
5167
5168         rq = intel_context_create_request(ce);
5169         if (IS_ERR(rq))
5170                 return rq;
5171
5172         cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
5173         if (IS_ERR(cs)) {
5174                 i915_request_add(rq);
5175                 return ERR_CAST(cs);
5176         }
5177
5178         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5179         *cs++ = MI_NOOP;
5180
5181         *cs++ = MI_SEMAPHORE_WAIT |
5182                 MI_SEMAPHORE_GLOBAL_GTT |
5183                 MI_SEMAPHORE_POLL |
5184                 MI_SEMAPHORE_SAD_NEQ_SDD;
5185         *cs++ = 0;
5186         *cs++ = offset;
5187         *cs++ = 0;
5188
5189         for (n = 0; n < NUM_GPR_DW; n++) {
5190                 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5191                 *cs++ = CS_GPR(ce->engine, n);
5192                 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
5193                 *cs++ = 0;
5194         }
5195
5196         i915_vma_lock(scratch);
5197         err = i915_request_await_object(rq, scratch->obj, true);
5198         if (!err)
5199                 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
5200         i915_vma_unlock(scratch);
5201
5202         i915_request_get(rq);
5203         i915_request_add(rq);
5204         if (err) {
5205                 i915_request_put(rq);
5206                 rq = ERR_PTR(err);
5207         }
5208
5209         return rq;
5210 }
5211
5212 static int __live_lrc_gpr(struct intel_engine_cs *engine,
5213                           struct i915_vma *scratch,
5214                           bool preempt)
5215 {
5216         u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
5217         struct intel_context *ce;
5218         struct i915_request *rq;
5219         u32 *cs;
5220         int err;
5221         int n;
5222
5223         if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
5224                 return 0; /* GPR only on rcs0 for gen8 */
5225
5226         err = gpr_make_dirty(engine->kernel_context);
5227         if (err)
5228                 return err;
5229
5230         ce = intel_context_create(engine);
5231         if (IS_ERR(ce))
5232                 return PTR_ERR(ce);
5233
5234         rq = __gpr_read(ce, scratch, slot);
5235         if (IS_ERR(rq)) {
5236                 err = PTR_ERR(rq);
5237                 goto err_put;
5238         }
5239
5240         err = wait_for_submit(engine, rq, HZ / 2);
5241         if (err)
5242                 goto err_rq;
5243
5244         if (preempt) {
5245                 err = gpr_make_dirty(engine->kernel_context);
5246                 if (err)
5247                         goto err_rq;
5248
5249                 err = emit_semaphore_signal(engine->kernel_context, slot);
5250                 if (err)
5251                         goto err_rq;
5252         } else {
5253                 slot[0] = 1;
5254                 wmb();
5255         }
5256
5257         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
5258                 err = -ETIME;
5259                 goto err_rq;
5260         }
5261
5262         cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
5263         if (IS_ERR(cs)) {
5264                 err = PTR_ERR(cs);
5265                 goto err_rq;
5266         }
5267
5268         for (n = 0; n < NUM_GPR_DW; n++) {
5269                 if (cs[n]) {
5270                         pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
5271                                engine->name,
5272                                n / 2, n & 1 ? "udw" : "ldw",
5273                                cs[n]);
5274                         err = -EINVAL;
5275                         break;
5276                 }
5277         }
5278
5279         i915_gem_object_unpin_map(scratch->obj);
5280
5281 err_rq:
5282         memset32(&slot[0], -1, 4);
5283         wmb();
5284         i915_request_put(rq);
5285 err_put:
5286         intel_context_put(ce);
5287         return err;
5288 }
5289
5290 static int live_lrc_gpr(void *arg)
5291 {
5292         struct intel_gt *gt = arg;
5293         struct intel_engine_cs *engine;
5294         struct i915_vma *scratch;
5295         enum intel_engine_id id;
5296         int err = 0;
5297
5298         /*
5299          * Check that GPR registers are cleared in new contexts as we need
5300          * to avoid leaking any information from previous contexts.
5301          */
5302
5303         scratch = create_scratch(gt);
5304         if (IS_ERR(scratch))
5305                 return PTR_ERR(scratch);
5306
5307         for_each_engine(engine, gt, id) {
5308                 st_engine_heartbeat_disable(engine);
5309
5310                 err = __live_lrc_gpr(engine, scratch, false);
5311                 if (err)
5312                         goto err;
5313
5314                 err = __live_lrc_gpr(engine, scratch, true);
5315                 if (err)
5316                         goto err;
5317
5318 err:
5319                 st_engine_heartbeat_enable(engine);
5320                 if (igt_flush_test(gt->i915))
5321                         err = -EIO;
5322                 if (err)
5323                         break;
5324         }
5325
5326         i915_vma_unpin_and_release(&scratch, 0);
5327         return err;
5328 }
5329
5330 static struct i915_request *
5331 create_timestamp(struct intel_context *ce, void *slot, int idx)
5332 {
5333         const u32 offset =
5334                 i915_ggtt_offset(ce->engine->status_page.vma) +
5335                 offset_in_page(slot);
5336         struct i915_request *rq;
5337         u32 *cs;
5338         int err;
5339
5340         rq = intel_context_create_request(ce);
5341         if (IS_ERR(rq))
5342                 return rq;
5343
5344         cs = intel_ring_begin(rq, 10);
5345         if (IS_ERR(cs)) {
5346                 err = PTR_ERR(cs);
5347                 goto err;
5348         }
5349
5350         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5351         *cs++ = MI_NOOP;
5352
5353         *cs++ = MI_SEMAPHORE_WAIT |
5354                 MI_SEMAPHORE_GLOBAL_GTT |
5355                 MI_SEMAPHORE_POLL |
5356                 MI_SEMAPHORE_SAD_NEQ_SDD;
5357         *cs++ = 0;
5358         *cs++ = offset;
5359         *cs++ = 0;
5360
5361         *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5362         *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
5363         *cs++ = offset + idx * sizeof(u32);
5364         *cs++ = 0;
5365
5366         intel_ring_advance(rq, cs);
5367
5368         rq->sched.attr.priority = I915_PRIORITY_MASK;
5369         err = 0;
5370 err:
5371         i915_request_get(rq);
5372         i915_request_add(rq);
5373         if (err) {
5374                 i915_request_put(rq);
5375                 return ERR_PTR(err);
5376         }
5377
5378         return rq;
5379 }
5380
5381 struct lrc_timestamp {
5382         struct intel_engine_cs *engine;
5383         struct intel_context *ce[2];
5384         u32 poison;
5385 };
5386
5387 static bool timestamp_advanced(u32 start, u32 end)
5388 {
5389         return (s32)(end - start) > 0;
5390 }
5391
5392 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
5393 {
5394         u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
5395         struct i915_request *rq;
5396         u32 timestamp;
5397         int err = 0;
5398
5399         arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
5400         rq = create_timestamp(arg->ce[0], slot, 1);
5401         if (IS_ERR(rq))
5402                 return PTR_ERR(rq);
5403
5404         err = wait_for_submit(rq->engine, rq, HZ / 2);
5405         if (err)
5406                 goto err;
5407
5408         if (preempt) {
5409                 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
5410                 err = emit_semaphore_signal(arg->ce[1], slot);
5411                 if (err)
5412                         goto err;
5413         } else {
5414                 slot[0] = 1;
5415                 wmb();
5416         }
5417
5418         /* And wait for switch to kernel (to save our context to memory) */
5419         err = context_flush(arg->ce[0], HZ / 2);
5420         if (err)
5421                 goto err;
5422
5423         if (!timestamp_advanced(arg->poison, slot[1])) {
5424                 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
5425                        arg->engine->name, preempt ? "preempt" : "simple",
5426                        arg->poison, slot[1]);
5427                 err = -EINVAL;
5428         }
5429
5430         timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
5431         if (!timestamp_advanced(slot[1], timestamp)) {
5432                 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
5433                        arg->engine->name, preempt ? "preempt" : "simple",
5434                        slot[1], timestamp);
5435                 err = -EINVAL;
5436         }
5437
5438 err:
5439         memset32(slot, -1, 4);
5440         i915_request_put(rq);
5441         return err;
5442 }
5443
5444 static int live_lrc_timestamp(void *arg)
5445 {
5446         struct lrc_timestamp data = {};
5447         struct intel_gt *gt = arg;
5448         enum intel_engine_id id;
5449         const u32 poison[] = {
5450                 0,
5451                 S32_MAX,
5452                 (u32)S32_MAX + 1,
5453                 U32_MAX,
5454         };
5455
5456         /*
5457          * We want to verify that the timestamp is saved and restore across
5458          * context switches and is monotonic.
5459          *
5460          * So we do this with a little bit of LRC poisoning to check various
5461          * boundary conditions, and see what happens if we preempt the context
5462          * with a second request (carrying more poison into the timestamp).
5463          */
5464
5465         for_each_engine(data.engine, gt, id) {
5466                 int i, err = 0;
5467
5468                 st_engine_heartbeat_disable(data.engine);
5469
5470                 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5471                         struct intel_context *tmp;
5472
5473                         tmp = intel_context_create(data.engine);
5474                         if (IS_ERR(tmp)) {
5475                                 err = PTR_ERR(tmp);
5476                                 goto err;
5477                         }
5478
5479                         err = intel_context_pin(tmp);
5480                         if (err) {
5481                                 intel_context_put(tmp);
5482                                 goto err;
5483                         }
5484
5485                         data.ce[i] = tmp;
5486                 }
5487
5488                 for (i = 0; i < ARRAY_SIZE(poison); i++) {
5489                         data.poison = poison[i];
5490
5491                         err = __lrc_timestamp(&data, false);
5492                         if (err)
5493                                 break;
5494
5495                         err = __lrc_timestamp(&data, true);
5496                         if (err)
5497                                 break;
5498                 }
5499
5500 err:
5501                 st_engine_heartbeat_enable(data.engine);
5502                 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5503                         if (!data.ce[i])
5504                                 break;
5505
5506                         intel_context_unpin(data.ce[i]);
5507                         intel_context_put(data.ce[i]);
5508                 }
5509
5510                 if (igt_flush_test(gt->i915))
5511                         err = -EIO;
5512                 if (err)
5513                         return err;
5514         }
5515
5516         return 0;
5517 }
5518
5519 static struct i915_vma *
5520 create_user_vma(struct i915_address_space *vm, unsigned long size)
5521 {
5522         struct drm_i915_gem_object *obj;
5523         struct i915_vma *vma;
5524         int err;
5525
5526         obj = i915_gem_object_create_internal(vm->i915, size);
5527         if (IS_ERR(obj))
5528                 return ERR_CAST(obj);
5529
5530         vma = i915_vma_instance(obj, vm, NULL);
5531         if (IS_ERR(vma)) {
5532                 i915_gem_object_put(obj);
5533                 return vma;
5534         }
5535
5536         err = i915_vma_pin(vma, 0, 0, PIN_USER);
5537         if (err) {
5538                 i915_gem_object_put(obj);
5539                 return ERR_PTR(err);
5540         }
5541
5542         return vma;
5543 }
5544
5545 static struct i915_vma *
5546 store_context(struct intel_context *ce, struct i915_vma *scratch)
5547 {
5548         struct i915_vma *batch;
5549         u32 dw, x, *cs, *hw;
5550         u32 *defaults;
5551
5552         batch = create_user_vma(ce->vm, SZ_64K);
5553         if (IS_ERR(batch))
5554                 return batch;
5555
5556         cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5557         if (IS_ERR(cs)) {
5558                 i915_vma_put(batch);
5559                 return ERR_CAST(cs);
5560         }
5561
5562         defaults = shmem_pin_map(ce->engine->default_state);
5563         if (!defaults) {
5564                 i915_gem_object_unpin_map(batch->obj);
5565                 i915_vma_put(batch);
5566                 return ERR_PTR(-ENOMEM);
5567         }
5568
5569         x = 0;
5570         dw = 0;
5571         hw = defaults;
5572         hw += LRC_STATE_OFFSET / sizeof(*hw);
5573         do {
5574                 u32 len = hw[dw] & 0x7f;
5575
5576                 if (hw[dw] == 0) {
5577                         dw++;
5578                         continue;
5579                 }
5580
5581                 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5582                         dw += len + 2;
5583                         continue;
5584                 }
5585
5586                 dw++;
5587                 len = (len + 1) / 2;
5588                 while (len--) {
5589                         *cs++ = MI_STORE_REGISTER_MEM_GEN8;
5590                         *cs++ = hw[dw];
5591                         *cs++ = lower_32_bits(scratch->node.start + x);
5592                         *cs++ = upper_32_bits(scratch->node.start + x);
5593
5594                         dw += 2;
5595                         x += 4;
5596                 }
5597         } while (dw < PAGE_SIZE / sizeof(u32) &&
5598                  (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5599
5600         *cs++ = MI_BATCH_BUFFER_END;
5601
5602         shmem_unpin_map(ce->engine->default_state, defaults);
5603
5604         i915_gem_object_flush_map(batch->obj);
5605         i915_gem_object_unpin_map(batch->obj);
5606
5607         return batch;
5608 }
5609
5610 static int move_to_active(struct i915_request *rq,
5611                           struct i915_vma *vma,
5612                           unsigned int flags)
5613 {
5614         int err;
5615
5616         i915_vma_lock(vma);
5617         err = i915_request_await_object(rq, vma->obj, flags);
5618         if (!err)
5619                 err = i915_vma_move_to_active(vma, rq, flags);
5620         i915_vma_unlock(vma);
5621
5622         return err;
5623 }
5624
5625 static struct i915_request *
5626 record_registers(struct intel_context *ce,
5627                  struct i915_vma *before,
5628                  struct i915_vma *after,
5629                  u32 *sema)
5630 {
5631         struct i915_vma *b_before, *b_after;
5632         struct i915_request *rq;
5633         u32 *cs;
5634         int err;
5635
5636         b_before = store_context(ce, before);
5637         if (IS_ERR(b_before))
5638                 return ERR_CAST(b_before);
5639
5640         b_after = store_context(ce, after);
5641         if (IS_ERR(b_after)) {
5642                 rq = ERR_CAST(b_after);
5643                 goto err_before;
5644         }
5645
5646         rq = intel_context_create_request(ce);
5647         if (IS_ERR(rq))
5648                 goto err_after;
5649
5650         err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
5651         if (err)
5652                 goto err_rq;
5653
5654         err = move_to_active(rq, b_before, 0);
5655         if (err)
5656                 goto err_rq;
5657
5658         err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
5659         if (err)
5660                 goto err_rq;
5661
5662         err = move_to_active(rq, b_after, 0);
5663         if (err)
5664                 goto err_rq;
5665
5666         cs = intel_ring_begin(rq, 14);
5667         if (IS_ERR(cs)) {
5668                 err = PTR_ERR(cs);
5669                 goto err_rq;
5670         }
5671
5672         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5673         *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5674         *cs++ = lower_32_bits(b_before->node.start);
5675         *cs++ = upper_32_bits(b_before->node.start);
5676
5677         *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5678         *cs++ = MI_SEMAPHORE_WAIT |
5679                 MI_SEMAPHORE_GLOBAL_GTT |
5680                 MI_SEMAPHORE_POLL |
5681                 MI_SEMAPHORE_SAD_NEQ_SDD;
5682         *cs++ = 0;
5683         *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5684                 offset_in_page(sema);
5685         *cs++ = 0;
5686         *cs++ = MI_NOOP;
5687
5688         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5689         *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5690         *cs++ = lower_32_bits(b_after->node.start);
5691         *cs++ = upper_32_bits(b_after->node.start);
5692
5693         intel_ring_advance(rq, cs);
5694
5695         WRITE_ONCE(*sema, 0);
5696         i915_request_get(rq);
5697         i915_request_add(rq);
5698 err_after:
5699         i915_vma_put(b_after);
5700 err_before:
5701         i915_vma_put(b_before);
5702         return rq;
5703
5704 err_rq:
5705         i915_request_add(rq);
5706         rq = ERR_PTR(err);
5707         goto err_after;
5708 }
5709
5710 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
5711 {
5712         struct i915_vma *batch;
5713         u32 dw, *cs, *hw;
5714         u32 *defaults;
5715
5716         batch = create_user_vma(ce->vm, SZ_64K);
5717         if (IS_ERR(batch))
5718                 return batch;
5719
5720         cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5721         if (IS_ERR(cs)) {
5722                 i915_vma_put(batch);
5723                 return ERR_CAST(cs);
5724         }
5725
5726         defaults = shmem_pin_map(ce->engine->default_state);
5727         if (!defaults) {
5728                 i915_gem_object_unpin_map(batch->obj);
5729                 i915_vma_put(batch);
5730                 return ERR_PTR(-ENOMEM);
5731         }
5732
5733         dw = 0;
5734         hw = defaults;
5735         hw += LRC_STATE_OFFSET / sizeof(*hw);
5736         do {
5737                 u32 len = hw[dw] & 0x7f;
5738
5739                 if (hw[dw] == 0) {
5740                         dw++;
5741                         continue;
5742                 }
5743
5744                 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5745                         dw += len + 2;
5746                         continue;
5747                 }
5748
5749                 dw++;
5750                 len = (len + 1) / 2;
5751                 *cs++ = MI_LOAD_REGISTER_IMM(len);
5752                 while (len--) {
5753                         *cs++ = hw[dw];
5754                         *cs++ = poison;
5755                         dw += 2;
5756                 }
5757         } while (dw < PAGE_SIZE / sizeof(u32) &&
5758                  (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5759
5760         *cs++ = MI_BATCH_BUFFER_END;
5761
5762         shmem_unpin_map(ce->engine->default_state, defaults);
5763
5764         i915_gem_object_flush_map(batch->obj);
5765         i915_gem_object_unpin_map(batch->obj);
5766
5767         return batch;
5768 }
5769
5770 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
5771 {
5772         struct i915_request *rq;
5773         struct i915_vma *batch;
5774         u32 *cs;
5775         int err;
5776
5777         batch = load_context(ce, poison);
5778         if (IS_ERR(batch))
5779                 return PTR_ERR(batch);
5780
5781         rq = intel_context_create_request(ce);
5782         if (IS_ERR(rq)) {
5783                 err = PTR_ERR(rq);
5784                 goto err_batch;
5785         }
5786
5787         err = move_to_active(rq, batch, 0);
5788         if (err)
5789                 goto err_rq;
5790
5791         cs = intel_ring_begin(rq, 8);
5792         if (IS_ERR(cs)) {
5793                 err = PTR_ERR(cs);
5794                 goto err_rq;
5795         }
5796
5797         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5798         *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5799         *cs++ = lower_32_bits(batch->node.start);
5800         *cs++ = upper_32_bits(batch->node.start);
5801
5802         *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
5803         *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5804                 offset_in_page(sema);
5805         *cs++ = 0;
5806         *cs++ = 1;
5807
5808         intel_ring_advance(rq, cs);
5809
5810         rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5811 err_rq:
5812         i915_request_add(rq);
5813 err_batch:
5814         i915_vma_put(batch);
5815         return err;
5816 }
5817
5818 static bool is_moving(u32 a, u32 b)
5819 {
5820         return a != b;
5821 }
5822
5823 static int compare_isolation(struct intel_engine_cs *engine,
5824                              struct i915_vma *ref[2],
5825                              struct i915_vma *result[2],
5826                              struct intel_context *ce,
5827                              u32 poison)
5828 {
5829         u32 x, dw, *hw, *lrc;
5830         u32 *A[2], *B[2];
5831         u32 *defaults;
5832         int err = 0;
5833
5834         A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
5835         if (IS_ERR(A[0]))
5836                 return PTR_ERR(A[0]);
5837
5838         A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
5839         if (IS_ERR(A[1])) {
5840                 err = PTR_ERR(A[1]);
5841                 goto err_A0;
5842         }
5843
5844         B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
5845         if (IS_ERR(B[0])) {
5846                 err = PTR_ERR(B[0]);
5847                 goto err_A1;
5848         }
5849
5850         B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
5851         if (IS_ERR(B[1])) {
5852                 err = PTR_ERR(B[1]);
5853                 goto err_B0;
5854         }
5855
5856         lrc = i915_gem_object_pin_map(ce->state->obj,
5857                                       i915_coherent_map_type(engine->i915));
5858         if (IS_ERR(lrc)) {
5859                 err = PTR_ERR(lrc);
5860                 goto err_B1;
5861         }
5862         lrc += LRC_STATE_OFFSET / sizeof(*hw);
5863
5864         defaults = shmem_pin_map(ce->engine->default_state);
5865         if (!defaults) {
5866                 err = -ENOMEM;
5867                 goto err_lrc;
5868         }
5869
5870         x = 0;
5871         dw = 0;
5872         hw = defaults;
5873         hw += LRC_STATE_OFFSET / sizeof(*hw);
5874         do {
5875                 u32 len = hw[dw] & 0x7f;
5876
5877                 if (hw[dw] == 0) {
5878                         dw++;
5879                         continue;
5880                 }
5881
5882                 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5883                         dw += len + 2;
5884                         continue;
5885                 }
5886
5887                 dw++;
5888                 len = (len + 1) / 2;
5889                 while (len--) {
5890                         if (!is_moving(A[0][x], A[1][x]) &&
5891                             (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
5892                                 switch (hw[dw] & 4095) {
5893                                 case 0x30: /* RING_HEAD */
5894                                 case 0x34: /* RING_TAIL */
5895                                         break;
5896
5897                                 default:
5898                                         pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
5899                                                engine->name, dw,
5900                                                hw[dw], hw[dw + 1],
5901                                                A[0][x], B[0][x], B[1][x],
5902                                                poison, lrc[dw + 1]);
5903                                         err = -EINVAL;
5904                                 }
5905                         }
5906                         dw += 2;
5907                         x++;
5908                 }
5909         } while (dw < PAGE_SIZE / sizeof(u32) &&
5910                  (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5911
5912         shmem_unpin_map(ce->engine->default_state, defaults);
5913 err_lrc:
5914         i915_gem_object_unpin_map(ce->state->obj);
5915 err_B1:
5916         i915_gem_object_unpin_map(result[1]->obj);
5917 err_B0:
5918         i915_gem_object_unpin_map(result[0]->obj);
5919 err_A1:
5920         i915_gem_object_unpin_map(ref[1]->obj);
5921 err_A0:
5922         i915_gem_object_unpin_map(ref[0]->obj);
5923         return err;
5924 }
5925
5926 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
5927 {
5928         u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
5929         struct i915_vma *ref[2], *result[2];
5930         struct intel_context *A, *B;
5931         struct i915_request *rq;
5932         int err;
5933
5934         A = intel_context_create(engine);
5935         if (IS_ERR(A))
5936                 return PTR_ERR(A);
5937
5938         B = intel_context_create(engine);
5939         if (IS_ERR(B)) {
5940                 err = PTR_ERR(B);
5941                 goto err_A;
5942         }
5943
5944         ref[0] = create_user_vma(A->vm, SZ_64K);
5945         if (IS_ERR(ref[0])) {
5946                 err = PTR_ERR(ref[0]);
5947                 goto err_B;
5948         }
5949
5950         ref[1] = create_user_vma(A->vm, SZ_64K);
5951         if (IS_ERR(ref[1])) {
5952                 err = PTR_ERR(ref[1]);
5953                 goto err_ref0;
5954         }
5955
5956         rq = record_registers(A, ref[0], ref[1], sema);
5957         if (IS_ERR(rq)) {
5958                 err = PTR_ERR(rq);
5959                 goto err_ref1;
5960         }
5961
5962         WRITE_ONCE(*sema, 1);
5963         wmb();
5964
5965         if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5966                 i915_request_put(rq);
5967                 err = -ETIME;
5968                 goto err_ref1;
5969         }
5970         i915_request_put(rq);
5971
5972         result[0] = create_user_vma(A->vm, SZ_64K);
5973         if (IS_ERR(result[0])) {
5974                 err = PTR_ERR(result[0]);
5975                 goto err_ref1;
5976         }
5977
5978         result[1] = create_user_vma(A->vm, SZ_64K);
5979         if (IS_ERR(result[1])) {
5980                 err = PTR_ERR(result[1]);
5981                 goto err_result0;
5982         }
5983
5984         rq = record_registers(A, result[0], result[1], sema);
5985         if (IS_ERR(rq)) {
5986                 err = PTR_ERR(rq);
5987                 goto err_result1;
5988         }
5989
5990         err = poison_registers(B, poison, sema);
5991         if (err) {
5992                 WRITE_ONCE(*sema, -1);
5993                 i915_request_put(rq);
5994                 goto err_result1;
5995         }
5996
5997         if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5998                 i915_request_put(rq);
5999                 err = -ETIME;
6000                 goto err_result1;
6001         }
6002         i915_request_put(rq);
6003
6004         err = compare_isolation(engine, ref, result, A, poison);
6005
6006 err_result1:
6007         i915_vma_put(result[1]);
6008 err_result0:
6009         i915_vma_put(result[0]);
6010 err_ref1:
6011         i915_vma_put(ref[1]);
6012 err_ref0:
6013         i915_vma_put(ref[0]);
6014 err_B:
6015         intel_context_put(B);
6016 err_A:
6017         intel_context_put(A);
6018         return err;
6019 }
6020
6021 static bool skip_isolation(const struct intel_engine_cs *engine)
6022 {
6023         if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
6024                 return true;
6025
6026         if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
6027                 return true;
6028
6029         return false;
6030 }
6031
6032 static int live_lrc_isolation(void *arg)
6033 {
6034         struct intel_gt *gt = arg;
6035         struct intel_engine_cs *engine;
6036         enum intel_engine_id id;
6037         const u32 poison[] = {
6038                 STACK_MAGIC,
6039                 0x3a3a3a3a,
6040                 0x5c5c5c5c,
6041                 0xffffffff,
6042                 0xffff0000,
6043         };
6044         int err = 0;
6045
6046         /*
6047          * Our goal is try and verify that per-context state cannot be
6048          * tampered with by another non-privileged client.
6049          *
6050          * We take the list of context registers from the LRI in the default
6051          * context image and attempt to modify that list from a remote context.
6052          */
6053
6054         for_each_engine(engine, gt, id) {
6055                 int i;
6056
6057                 /* Just don't even ask */
6058                 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
6059                     skip_isolation(engine))
6060                         continue;
6061
6062                 intel_engine_pm_get(engine);
6063                 for (i = 0; i < ARRAY_SIZE(poison); i++) {
6064                         int result;
6065
6066                         result = __lrc_isolation(engine, poison[i]);
6067                         if (result && !err)
6068                                 err = result;
6069
6070                         result = __lrc_isolation(engine, ~poison[i]);
6071                         if (result && !err)
6072                                 err = result;
6073                 }
6074                 intel_engine_pm_put(engine);
6075                 if (igt_flush_test(gt->i915)) {
6076                         err = -EIO;
6077                         break;
6078                 }
6079         }
6080
6081         return err;
6082 }
6083
6084 static int indirect_ctx_submit_req(struct intel_context *ce)
6085 {
6086         struct i915_request *rq;
6087         int err = 0;
6088
6089         rq = intel_context_create_request(ce);
6090         if (IS_ERR(rq))
6091                 return PTR_ERR(rq);
6092
6093         i915_request_get(rq);
6094         i915_request_add(rq);
6095
6096         if (i915_request_wait(rq, 0, HZ / 5) < 0)
6097                 err = -ETIME;
6098
6099         i915_request_put(rq);
6100
6101         return err;
6102 }
6103
6104 #define CTX_BB_CANARY_OFFSET (3 * 1024)
6105 #define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
6106
6107 static u32 *
6108 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
6109 {
6110         *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
6111                 MI_SRM_LRM_GLOBAL_GTT |
6112                 MI_LRI_LRM_CS_MMIO;
6113         *cs++ = i915_mmio_reg_offset(RING_START(0));
6114         *cs++ = i915_ggtt_offset(ce->state) +
6115                 context_wa_bb_offset(ce) +
6116                 CTX_BB_CANARY_OFFSET;
6117         *cs++ = 0;
6118
6119         return cs;
6120 }
6121
6122 static void
6123 indirect_ctx_bb_setup(struct intel_context *ce)
6124 {
6125         u32 *cs = context_indirect_bb(ce);
6126
6127         cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
6128
6129         setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
6130 }
6131
6132 static bool check_ring_start(struct intel_context *ce)
6133 {
6134         const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
6135                 LRC_STATE_OFFSET + context_wa_bb_offset(ce);
6136
6137         if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
6138                 return true;
6139
6140         pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
6141                ctx_bb[CTX_BB_CANARY_INDEX],
6142                ce->lrc_reg_state[CTX_RING_START]);
6143
6144         return false;
6145 }
6146
6147 static int indirect_ctx_bb_check(struct intel_context *ce)
6148 {
6149         int err;
6150
6151         err = indirect_ctx_submit_req(ce);
6152         if (err)
6153                 return err;
6154
6155         if (!check_ring_start(ce))
6156                 return -EINVAL;
6157
6158         return 0;
6159 }
6160
6161 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
6162 {
6163         struct intel_context *a, *b;
6164         int err;
6165
6166         a = intel_context_create(engine);
6167         if (IS_ERR(a))
6168                 return PTR_ERR(a);
6169         err = intel_context_pin(a);
6170         if (err)
6171                 goto put_a;
6172
6173         b = intel_context_create(engine);
6174         if (IS_ERR(b)) {
6175                 err = PTR_ERR(b);
6176                 goto unpin_a;
6177         }
6178         err = intel_context_pin(b);
6179         if (err)
6180                 goto put_b;
6181
6182         /* We use the already reserved extra page in context state */
6183         if (!a->wa_bb_page) {
6184                 GEM_BUG_ON(b->wa_bb_page);
6185                 GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
6186                 goto unpin_b;
6187         }
6188
6189         /*
6190          * In order to test that our per context bb is truly per context,
6191          * and executes at the intended spot on context restoring process,
6192          * make the batch store the ring start value to memory.
6193          * As ring start is restored apriori of starting the indirect ctx bb and
6194          * as it will be different for each context, it fits to this purpose.
6195          */
6196         indirect_ctx_bb_setup(a);
6197         indirect_ctx_bb_setup(b);
6198
6199         err = indirect_ctx_bb_check(a);
6200         if (err)
6201                 goto unpin_b;
6202
6203         err = indirect_ctx_bb_check(b);
6204
6205 unpin_b:
6206         intel_context_unpin(b);
6207 put_b:
6208         intel_context_put(b);
6209 unpin_a:
6210         intel_context_unpin(a);
6211 put_a:
6212         intel_context_put(a);
6213
6214         return err;
6215 }
6216
6217 static int live_lrc_indirect_ctx_bb(void *arg)
6218 {
6219         struct intel_gt *gt = arg;
6220         struct intel_engine_cs *engine;
6221         enum intel_engine_id id;
6222         int err = 0;
6223
6224         for_each_engine(engine, gt, id) {
6225                 intel_engine_pm_get(engine);
6226                 err = __live_lrc_indirect_ctx_bb(engine);
6227                 intel_engine_pm_put(engine);
6228
6229                 if (igt_flush_test(gt->i915))
6230                         err = -EIO;
6231
6232                 if (err)
6233                         break;
6234         }
6235
6236         return err;
6237 }
6238
6239 static void garbage_reset(struct intel_engine_cs *engine,
6240                           struct i915_request *rq)
6241 {
6242         const unsigned int bit = I915_RESET_ENGINE + engine->id;
6243         unsigned long *lock = &engine->gt->reset.flags;
6244
6245         if (test_and_set_bit(bit, lock))
6246                 return;
6247
6248         tasklet_disable(&engine->execlists.tasklet);
6249
6250         if (!rq->fence.error)
6251                 intel_engine_reset(engine, NULL);
6252
6253         tasklet_enable(&engine->execlists.tasklet);
6254         clear_and_wake_up_bit(bit, lock);
6255 }
6256
6257 static struct i915_request *garbage(struct intel_context *ce,
6258                                     struct rnd_state *prng)
6259 {
6260         struct i915_request *rq;
6261         int err;
6262
6263         err = intel_context_pin(ce);
6264         if (err)
6265                 return ERR_PTR(err);
6266
6267         prandom_bytes_state(prng,
6268                             ce->lrc_reg_state,
6269                             ce->engine->context_size -
6270                             LRC_STATE_OFFSET);
6271
6272         rq = intel_context_create_request(ce);
6273         if (IS_ERR(rq)) {
6274                 err = PTR_ERR(rq);
6275                 goto err_unpin;
6276         }
6277
6278         i915_request_get(rq);
6279         i915_request_add(rq);
6280         return rq;
6281
6282 err_unpin:
6283         intel_context_unpin(ce);
6284         return ERR_PTR(err);
6285 }
6286
6287 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
6288 {
6289         struct intel_context *ce;
6290         struct i915_request *hang;
6291         int err = 0;
6292
6293         ce = intel_context_create(engine);
6294         if (IS_ERR(ce))
6295                 return PTR_ERR(ce);
6296
6297         hang = garbage(ce, prng);
6298         if (IS_ERR(hang)) {
6299                 err = PTR_ERR(hang);
6300                 goto err_ce;
6301         }
6302
6303         if (wait_for_submit(engine, hang, HZ / 2)) {
6304                 i915_request_put(hang);
6305                 err = -ETIME;
6306                 goto err_ce;
6307         }
6308
6309         intel_context_set_banned(ce);
6310         garbage_reset(engine, hang);
6311
6312         intel_engine_flush_submission(engine);
6313         if (!hang->fence.error) {
6314                 i915_request_put(hang);
6315                 pr_err("%s: corrupted context was not reset\n",
6316                        engine->name);
6317                 err = -EINVAL;
6318                 goto err_ce;
6319         }
6320
6321         if (i915_request_wait(hang, 0, HZ / 2) < 0) {
6322                 pr_err("%s: corrupted context did not recover\n",
6323                        engine->name);
6324                 i915_request_put(hang);
6325                 err = -EIO;
6326                 goto err_ce;
6327         }
6328         i915_request_put(hang);
6329
6330 err_ce:
6331         intel_context_put(ce);
6332         return err;
6333 }
6334
6335 static int live_lrc_garbage(void *arg)
6336 {
6337         struct intel_gt *gt = arg;
6338         struct intel_engine_cs *engine;
6339         enum intel_engine_id id;
6340
6341         /*
6342          * Verify that we can recover if one context state is completely
6343          * corrupted.
6344          */
6345
6346         if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
6347                 return 0;
6348
6349         for_each_engine(engine, gt, id) {
6350                 I915_RND_STATE(prng);
6351                 int err = 0, i;
6352
6353                 if (!intel_has_reset_engine(engine->gt))
6354                         continue;
6355
6356                 intel_engine_pm_get(engine);
6357                 for (i = 0; i < 3; i++) {
6358                         err = __lrc_garbage(engine, &prng);
6359                         if (err)
6360                                 break;
6361                 }
6362                 intel_engine_pm_put(engine);
6363
6364                 if (igt_flush_test(gt->i915))
6365                         err = -EIO;
6366                 if (err)
6367                         return err;
6368         }
6369
6370         return 0;
6371 }
6372
6373 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
6374 {
6375         struct intel_context *ce;
6376         struct i915_request *rq;
6377         IGT_TIMEOUT(end_time);
6378         int err;
6379
6380         ce = intel_context_create(engine);
6381         if (IS_ERR(ce))
6382                 return PTR_ERR(ce);
6383
6384         ce->runtime.num_underflow = 0;
6385         ce->runtime.max_underflow = 0;
6386
6387         do {
6388                 unsigned int loop = 1024;
6389
6390                 while (loop) {
6391                         rq = intel_context_create_request(ce);
6392                         if (IS_ERR(rq)) {
6393                                 err = PTR_ERR(rq);
6394                                 goto err_rq;
6395                         }
6396
6397                         if (--loop == 0)
6398                                 i915_request_get(rq);
6399
6400                         i915_request_add(rq);
6401                 }
6402
6403                 if (__igt_timeout(end_time, NULL))
6404                         break;
6405
6406                 i915_request_put(rq);
6407         } while (1);
6408
6409         err = i915_request_wait(rq, 0, HZ / 5);
6410         if (err < 0) {
6411                 pr_err("%s: request not completed!\n", engine->name);
6412                 goto err_wait;
6413         }
6414
6415         igt_flush_test(engine->i915);
6416
6417         pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
6418                 engine->name,
6419                 intel_context_get_total_runtime_ns(ce),
6420                 intel_context_get_avg_runtime_ns(ce));
6421
6422         err = 0;
6423         if (ce->runtime.num_underflow) {
6424                 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
6425                        engine->name,
6426                        ce->runtime.num_underflow,
6427                        ce->runtime.max_underflow);
6428                 GEM_TRACE_DUMP();
6429                 err = -EOVERFLOW;
6430         }
6431
6432 err_wait:
6433         i915_request_put(rq);
6434 err_rq:
6435         intel_context_put(ce);
6436         return err;
6437 }
6438
6439 static int live_pphwsp_runtime(void *arg)
6440 {
6441         struct intel_gt *gt = arg;
6442         struct intel_engine_cs *engine;
6443         enum intel_engine_id id;
6444         int err = 0;
6445
6446         /*
6447          * Check that cumulative context runtime as stored in the pphwsp[16]
6448          * is monotonic.
6449          */
6450
6451         for_each_engine(engine, gt, id) {
6452                 err = __live_pphwsp_runtime(engine);
6453                 if (err)
6454                         break;
6455         }
6456
6457         if (igt_flush_test(gt->i915))
6458                 err = -EIO;
6459
6460         return err;
6461 }
6462
6463 int intel_lrc_live_selftests(struct drm_i915_private *i915)
6464 {
6465         static const struct i915_subtest tests[] = {
6466                 SUBTEST(live_lrc_layout),
6467                 SUBTEST(live_lrc_fixed),
6468                 SUBTEST(live_lrc_state),
6469                 SUBTEST(live_lrc_gpr),
6470                 SUBTEST(live_lrc_isolation),
6471                 SUBTEST(live_lrc_timestamp),
6472                 SUBTEST(live_lrc_garbage),
6473                 SUBTEST(live_pphwsp_runtime),
6474                 SUBTEST(live_lrc_indirect_ctx_bb),
6475         };
6476
6477         if (!HAS_LOGICAL_RING_CONTEXTS(i915))
6478                 return 0;
6479
6480         return intel_gt_live_subtests(tests, &i915->gt);
6481 }