2 * SPDX-License-Identifier: MIT
4 * Copyright © 2018 Intel Corporation
7 #include <linux/prime_numbers.h>
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
12 #include "gt/selftest_engine_heartbeat.h"
14 #include "i915_selftest.h"
15 #include "selftests/i915_random.h"
16 #include "selftests/igt_flush_test.h"
17 #include "selftests/igt_live_test.h"
18 #include "selftests/igt_spinner.h"
19 #include "selftests/lib_sw_fence.h"
21 #include "gem/selftests/igt_gem_utils.h"
22 #include "gem/selftests/mock_context.h"
24 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
26 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
28 static struct i915_vma *create_scratch(struct intel_gt *gt)
30 struct drm_i915_gem_object *obj;
34 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
38 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
40 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
42 i915_gem_object_put(obj);
46 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
48 i915_gem_object_put(obj);
55 static bool is_active(struct i915_request *rq)
57 if (i915_request_is_active(rq))
60 if (i915_request_on_hold(rq))
63 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
69 static int wait_for_submit(struct intel_engine_cs *engine,
70 struct i915_request *rq,
71 unsigned long timeout)
75 bool done = time_after(jiffies, timeout);
77 if (i915_request_completed(rq)) /* that was quick! */
80 /* Wait until the HW has acknowleged the submission (or err) */
81 intel_engine_flush_submission(engine);
82 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
92 static int wait_for_reset(struct intel_engine_cs *engine,
93 struct i915_request *rq,
94 unsigned long timeout)
100 intel_engine_flush_submission(engine);
102 if (READ_ONCE(engine->execlists.pending[0]))
105 if (i915_request_completed(rq))
108 if (READ_ONCE(rq->fence.error))
110 } while (time_before(jiffies, timeout));
112 flush_scheduled_work();
114 if (rq->fence.error != -EIO) {
115 pr_err("%s: hanging request %llx:%lld not reset\n",
122 /* Give the request a jiffie to complete after flushing the worker */
123 if (i915_request_wait(rq, 0,
124 max(0l, (long)(timeout - jiffies)) + 1) < 0) {
125 pr_err("%s: hanging request %llx:%lld did not complete\n",
135 static int live_sanitycheck(void *arg)
137 struct intel_gt *gt = arg;
138 struct intel_engine_cs *engine;
139 enum intel_engine_id id;
140 struct igt_spinner spin;
143 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
146 if (igt_spinner_init(&spin, gt))
149 for_each_engine(engine, gt, id) {
150 struct intel_context *ce;
151 struct i915_request *rq;
153 ce = intel_context_create(engine);
159 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
165 i915_request_add(rq);
166 if (!igt_wait_for_spinner(&spin, rq)) {
167 GEM_TRACE("spinner failed to start\n");
169 intel_gt_set_wedged(gt);
174 igt_spinner_end(&spin);
175 if (igt_flush_test(gt->i915)) {
181 intel_context_put(ce);
186 igt_spinner_fini(&spin);
190 static int live_unlite_restore(struct intel_gt *gt, int prio)
192 struct intel_engine_cs *engine;
193 enum intel_engine_id id;
194 struct igt_spinner spin;
198 * Check that we can correctly context switch between 2 instances
199 * on the same engine from the same parent context.
202 if (igt_spinner_init(&spin, gt))
206 for_each_engine(engine, gt, id) {
207 struct intel_context *ce[2] = {};
208 struct i915_request *rq[2];
209 struct igt_live_test t;
212 if (prio && !intel_engine_has_preemption(engine))
215 if (!intel_engine_can_store_dword(engine))
218 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
222 st_engine_heartbeat_disable(engine);
224 for (n = 0; n < ARRAY_SIZE(ce); n++) {
225 struct intel_context *tmp;
227 tmp = intel_context_create(engine);
233 err = intel_context_pin(tmp);
235 intel_context_put(tmp);
240 * Setup the pair of contexts such that if we
241 * lite-restore using the RING_TAIL from ce[1] it
242 * will execute garbage from ce[0]->ring.
244 memset(tmp->ring->vaddr,
245 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
246 tmp->ring->vma->size);
250 GEM_BUG_ON(!ce[1]->ring->size);
251 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
252 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
254 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
256 err = PTR_ERR(rq[0]);
260 i915_request_get(rq[0]);
261 i915_request_add(rq[0]);
262 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
264 if (!igt_wait_for_spinner(&spin, rq[0])) {
265 i915_request_put(rq[0]);
269 rq[1] = i915_request_create(ce[1]);
271 err = PTR_ERR(rq[1]);
272 i915_request_put(rq[0]);
278 * Ensure we do the switch to ce[1] on completion.
280 * rq[0] is already submitted, so this should reduce
281 * to a no-op (a wait on a request on the same engine
282 * uses the submit fence, not the completion fence),
283 * but it will install a dependency on rq[1] for rq[0]
284 * that will prevent the pair being reordered by
287 i915_request_await_dma_fence(rq[1], &rq[0]->fence);
290 i915_request_get(rq[1]);
291 i915_request_add(rq[1]);
292 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
293 i915_request_put(rq[0]);
296 struct i915_sched_attr attr = {
300 /* Alternatively preempt the spinner with ce[1] */
301 engine->schedule(rq[1], &attr);
304 /* And switch back to ce[0] for good measure */
305 rq[0] = i915_request_create(ce[0]);
307 err = PTR_ERR(rq[0]);
308 i915_request_put(rq[1]);
312 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
313 i915_request_get(rq[0]);
314 i915_request_add(rq[0]);
315 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
316 i915_request_put(rq[1]);
317 i915_request_put(rq[0]);
320 intel_engine_flush_submission(engine);
321 igt_spinner_end(&spin);
322 for (n = 0; n < ARRAY_SIZE(ce); n++) {
323 if (IS_ERR_OR_NULL(ce[n]))
326 intel_context_unpin(ce[n]);
327 intel_context_put(ce[n]);
330 st_engine_heartbeat_enable(engine);
331 if (igt_live_test_end(&t))
337 igt_spinner_fini(&spin);
341 static int live_unlite_switch(void *arg)
343 return live_unlite_restore(arg, 0);
346 static int live_unlite_preempt(void *arg)
348 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
351 static int live_unlite_ring(void *arg)
353 struct intel_gt *gt = arg;
354 struct intel_engine_cs *engine;
355 struct igt_spinner spin;
356 enum intel_engine_id id;
360 * Setup a preemption event that will cause almost the entire ring
361 * to be unwound, potentially fooling our intel_ring_direction()
362 * into emitting a forward lite-restore instead of the rollback.
365 if (igt_spinner_init(&spin, gt))
368 for_each_engine(engine, gt, id) {
369 struct intel_context *ce[2] = {};
370 struct i915_request *rq;
371 struct igt_live_test t;
374 if (!intel_engine_has_preemption(engine))
377 if (!intel_engine_can_store_dword(engine))
380 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
384 st_engine_heartbeat_disable(engine);
386 for (n = 0; n < ARRAY_SIZE(ce); n++) {
387 struct intel_context *tmp;
389 tmp = intel_context_create(engine);
395 err = intel_context_pin(tmp);
397 intel_context_put(tmp);
401 memset32(tmp->ring->vaddr,
402 0xdeadbeef, /* trigger a hang if executed */
403 tmp->ring->vma->size / sizeof(u32));
408 /* Create max prio spinner, followed by N low prio nops */
409 rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
415 i915_request_get(rq);
416 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
417 i915_request_add(rq);
419 if (!igt_wait_for_spinner(&spin, rq)) {
420 intel_gt_set_wedged(gt);
421 i915_request_put(rq);
426 /* Fill the ring, until we will cause a wrap */
428 while (intel_ring_direction(ce[0]->ring,
430 ce[0]->ring->tail) <= 0) {
431 struct i915_request *tmp;
433 tmp = intel_context_create_request(ce[0]);
436 i915_request_put(rq);
440 i915_request_add(tmp);
441 intel_engine_flush_submission(engine);
444 intel_engine_flush_submission(engine);
445 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
451 GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
453 ce[0]->ring->tail) <= 0);
454 i915_request_put(rq);
456 /* Create a second ring to preempt the first ring after rq[0] */
457 rq = intel_context_create_request(ce[1]);
463 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
464 i915_request_get(rq);
465 i915_request_add(rq);
467 err = wait_for_submit(engine, rq, HZ / 2);
468 i915_request_put(rq);
470 pr_err("%s: preemption request was not submitted\n",
475 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
477 ce[0]->ring->tail, ce[0]->ring->emit,
478 ce[1]->ring->tail, ce[1]->ring->emit);
481 intel_engine_flush_submission(engine);
482 igt_spinner_end(&spin);
483 for (n = 0; n < ARRAY_SIZE(ce); n++) {
484 if (IS_ERR_OR_NULL(ce[n]))
487 intel_context_unpin(ce[n]);
488 intel_context_put(ce[n]);
490 st_engine_heartbeat_enable(engine);
491 if (igt_live_test_end(&t))
497 igt_spinner_fini(&spin);
501 static int live_pin_rewind(void *arg)
503 struct intel_gt *gt = arg;
504 struct intel_engine_cs *engine;
505 enum intel_engine_id id;
509 * We have to be careful not to trust intel_ring too much, for example
510 * ring->head is updated upon retire which is out of sync with pinning
511 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
512 * or else we risk writing an older, stale value.
514 * To simulate this, let's apply a bit of deliberate sabotague.
517 for_each_engine(engine, gt, id) {
518 struct intel_context *ce;
519 struct i915_request *rq;
520 struct intel_ring *ring;
521 struct igt_live_test t;
523 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
528 ce = intel_context_create(engine);
534 err = intel_context_pin(ce);
536 intel_context_put(ce);
540 /* Keep the context awake while we play games */
541 err = i915_active_acquire(&ce->active);
543 intel_context_unpin(ce);
544 intel_context_put(ce);
549 /* Poison the ring, and offset the next request from HEAD */
550 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
551 ring->emit = ring->size / 2;
552 ring->tail = ring->emit;
553 GEM_BUG_ON(ring->head);
555 intel_context_unpin(ce);
557 /* Submit a simple nop request */
558 GEM_BUG_ON(intel_context_is_pinned(ce));
559 rq = intel_context_create_request(ce);
560 i915_active_release(&ce->active); /* e.g. async retire */
561 intel_context_put(ce);
566 GEM_BUG_ON(!rq->head);
567 i915_request_add(rq);
569 /* Expect not to hang! */
570 if (igt_live_test_end(&t)) {
579 static int live_hold_reset(void *arg)
581 struct intel_gt *gt = arg;
582 struct intel_engine_cs *engine;
583 enum intel_engine_id id;
584 struct igt_spinner spin;
588 * In order to support offline error capture for fast preempt reset,
589 * we need to decouple the guilty request and ensure that it and its
590 * descendents are not executed while the capture is in progress.
593 if (!intel_has_reset_engine(gt))
596 if (igt_spinner_init(&spin, gt))
599 for_each_engine(engine, gt, id) {
600 struct intel_context *ce;
601 struct i915_request *rq;
603 ce = intel_context_create(engine);
609 st_engine_heartbeat_disable(engine);
611 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
616 i915_request_add(rq);
618 if (!igt_wait_for_spinner(&spin, rq)) {
619 intel_gt_set_wedged(gt);
624 /* We have our request executing, now remove it and reset */
626 if (test_and_set_bit(I915_RESET_ENGINE + id,
628 intel_gt_set_wedged(gt);
632 tasklet_disable(&engine->execlists.tasklet);
634 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
635 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
637 i915_request_get(rq);
638 execlists_hold(engine, rq);
639 GEM_BUG_ON(!i915_request_on_hold(rq));
641 intel_engine_reset(engine, NULL);
642 GEM_BUG_ON(rq->fence.error != -EIO);
644 tasklet_enable(&engine->execlists.tasklet);
645 clear_and_wake_up_bit(I915_RESET_ENGINE + id,
648 /* Check that we do not resubmit the held request */
649 if (!i915_request_wait(rq, 0, HZ / 5)) {
650 pr_err("%s: on hold request completed!\n",
652 i915_request_put(rq);
656 GEM_BUG_ON(!i915_request_on_hold(rq));
658 /* But is resubmitted on release */
659 execlists_unhold(engine, rq);
660 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
661 pr_err("%s: held request did not complete!\n",
663 intel_gt_set_wedged(gt);
666 i915_request_put(rq);
669 st_engine_heartbeat_enable(engine);
670 intel_context_put(ce);
675 igt_spinner_fini(&spin);
679 static const char *error_repr(int err)
681 return err ? "bad" : "good";
684 static int live_error_interrupt(void *arg)
686 static const struct error_phase {
687 enum { GOOD = 0, BAD = -EIO } error[2];
692 { { GOOD, GOOD } }, /* sentinel */
694 struct intel_gt *gt = arg;
695 struct intel_engine_cs *engine;
696 enum intel_engine_id id;
699 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
700 * of invalid commands in user batches that will cause a GPU hang.
701 * This is a faster mechanism than using hangcheck/heartbeats, but
702 * only detects problems the HW knows about -- it will not warn when
705 * To verify our detection and reset, we throw some invalid commands
706 * at the HW and wait for the interrupt.
709 if (!intel_has_reset_engine(gt))
712 for_each_engine(engine, gt, id) {
713 const struct error_phase *p;
716 st_engine_heartbeat_disable(engine);
718 for (p = phases; p->error[0] != GOOD; p++) {
719 struct i915_request *client[ARRAY_SIZE(phases->error)];
723 memset(client, 0, sizeof(*client));
724 for (i = 0; i < ARRAY_SIZE(client); i++) {
725 struct intel_context *ce;
726 struct i915_request *rq;
728 ce = intel_context_create(engine);
734 rq = intel_context_create_request(ce);
735 intel_context_put(ce);
741 if (rq->engine->emit_init_breadcrumb) {
742 err = rq->engine->emit_init_breadcrumb(rq);
744 i915_request_add(rq);
749 cs = intel_ring_begin(rq, 2);
751 i915_request_add(rq);
764 client[i] = i915_request_get(rq);
765 i915_request_add(rq);
768 err = wait_for_submit(engine, client[0], HZ / 2);
770 pr_err("%s: first request did not start within time!\n",
776 for (i = 0; i < ARRAY_SIZE(client); i++) {
777 if (i915_request_wait(client[i], 0, HZ / 5) < 0)
778 pr_debug("%s: %s request incomplete!\n",
780 error_repr(p->error[i]));
782 if (!i915_request_started(client[i])) {
783 pr_err("%s: %s request not started!\n",
785 error_repr(p->error[i]));
790 /* Kick the tasklet to process the error */
791 intel_engine_flush_submission(engine);
792 if (client[i]->fence.error != p->error[i]) {
793 pr_err("%s: %s request (%s) with wrong error code: %d\n",
795 error_repr(p->error[i]),
796 i915_request_completed(client[i]) ? "completed" : "running",
797 client[i]->fence.error);
804 for (i = 0; i < ARRAY_SIZE(client); i++)
806 i915_request_put(client[i]);
808 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
809 engine->name, p - phases,
810 p->error[0], p->error[1]);
815 st_engine_heartbeat_enable(engine);
817 intel_gt_set_wedged(gt);
826 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
830 cs = intel_ring_begin(rq, 10);
834 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
836 *cs++ = MI_SEMAPHORE_WAIT |
837 MI_SEMAPHORE_GLOBAL_GTT |
839 MI_SEMAPHORE_SAD_NEQ_SDD;
841 *cs++ = i915_ggtt_offset(vma) + 4 * idx;
845 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
846 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
856 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
858 intel_ring_advance(rq, cs);
862 static struct i915_request *
863 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
865 struct intel_context *ce;
866 struct i915_request *rq;
869 ce = intel_context_create(engine);
873 rq = intel_context_create_request(ce);
878 if (rq->engine->emit_init_breadcrumb)
879 err = rq->engine->emit_init_breadcrumb(rq);
881 err = emit_semaphore_chain(rq, vma, idx);
883 i915_request_get(rq);
884 i915_request_add(rq);
889 intel_context_put(ce);
894 release_queue(struct intel_engine_cs *engine,
895 struct i915_vma *vma,
898 struct i915_sched_attr attr = {
901 struct i915_request *rq;
904 rq = intel_engine_create_kernel_request(engine);
908 cs = intel_ring_begin(rq, 4);
910 i915_request_add(rq);
914 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
915 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
919 intel_ring_advance(rq, cs);
921 i915_request_get(rq);
922 i915_request_add(rq);
925 engine->schedule(rq, &attr);
926 local_bh_enable(); /* kick tasklet */
928 i915_request_put(rq);
934 slice_semaphore_queue(struct intel_engine_cs *outer,
935 struct i915_vma *vma,
938 struct intel_engine_cs *engine;
939 struct i915_request *head;
940 enum intel_engine_id id;
943 head = semaphore_queue(outer, vma, n++);
945 return PTR_ERR(head);
947 for_each_engine(engine, outer->gt, id) {
948 for (i = 0; i < count; i++) {
949 struct i915_request *rq;
951 rq = semaphore_queue(engine, vma, n++);
957 i915_request_put(rq);
961 err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
965 if (i915_request_wait(head, 0,
966 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
967 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
970 intel_gt_set_wedged(outer->gt);
975 i915_request_put(head);
979 static int live_timeslice_preempt(void *arg)
981 struct intel_gt *gt = arg;
982 struct drm_i915_gem_object *obj;
983 struct intel_engine_cs *engine;
984 enum intel_engine_id id;
985 struct i915_vma *vma;
990 * If a request takes too long, we would like to give other users
991 * a fair go on the GPU. In particular, users may create batches
992 * that wait upon external input, where that input may even be
993 * supplied by another GPU job. To avoid blocking forever, we
994 * need to preempt the current task and replace it with another
997 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1000 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1002 return PTR_ERR(obj);
1004 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1010 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1011 if (IS_ERR(vaddr)) {
1012 err = PTR_ERR(vaddr);
1016 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1020 err = i915_vma_sync(vma);
1024 for_each_engine(engine, gt, id) {
1025 if (!intel_engine_has_preemption(engine))
1028 memset(vaddr, 0, PAGE_SIZE);
1030 st_engine_heartbeat_disable(engine);
1031 err = slice_semaphore_queue(engine, vma, 5);
1032 st_engine_heartbeat_enable(engine);
1036 if (igt_flush_test(gt->i915)) {
1043 i915_vma_unpin(vma);
1045 i915_gem_object_unpin_map(obj);
1047 i915_gem_object_put(obj);
1051 static struct i915_request *
1052 create_rewinder(struct intel_context *ce,
1053 struct i915_request *wait,
1054 void *slot, int idx)
1057 i915_ggtt_offset(ce->engine->status_page.vma) +
1058 offset_in_page(slot);
1059 struct i915_request *rq;
1063 rq = intel_context_create_request(ce);
1068 err = i915_request_await_dma_fence(rq, &wait->fence);
1073 cs = intel_ring_begin(rq, 14);
1079 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1082 *cs++ = MI_SEMAPHORE_WAIT |
1083 MI_SEMAPHORE_GLOBAL_GTT |
1085 MI_SEMAPHORE_SAD_GTE_SDD;
1090 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1091 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1092 *cs++ = offset + idx * sizeof(u32);
1095 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1100 intel_ring_advance(rq, cs);
1102 rq->sched.attr.priority = I915_PRIORITY_MASK;
1105 i915_request_get(rq);
1106 i915_request_add(rq);
1108 i915_request_put(rq);
1109 return ERR_PTR(err);
1115 static int live_timeslice_rewind(void *arg)
1117 struct intel_gt *gt = arg;
1118 struct intel_engine_cs *engine;
1119 enum intel_engine_id id;
1122 * The usual presumption on timeslice expiration is that we replace
1123 * the active context with another. However, given a chain of
1124 * dependencies we may end up with replacing the context with itself,
1125 * but only a few of those requests, forcing us to rewind the
1126 * RING_TAIL of the original request.
1128 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1131 for_each_engine(engine, gt, id) {
1132 enum { A1, A2, B1 };
1133 enum { X = 1, Z, Y };
1134 struct i915_request *rq[3] = {};
1135 struct intel_context *ce;
1136 unsigned long timeslice;
1140 if (!intel_engine_has_timeslices(engine))
1144 * A:rq1 -- semaphore wait, timestamp X
1145 * A:rq2 -- write timestamp Y
1147 * B:rq1 [await A:rq1] -- write timestamp Z
1149 * Force timeslice, release semaphore.
1151 * Expect execution/evaluation order XZY
1154 st_engine_heartbeat_disable(engine);
1155 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1157 slot = memset32(engine->status_page.addr + 1000, 0, 4);
1159 ce = intel_context_create(engine);
1165 rq[A1] = create_rewinder(ce, NULL, slot, X);
1166 if (IS_ERR(rq[A1])) {
1167 intel_context_put(ce);
1171 rq[A2] = create_rewinder(ce, NULL, slot, Y);
1172 intel_context_put(ce);
1176 err = wait_for_submit(engine, rq[A2], HZ / 2);
1178 pr_err("%s: failed to submit first context\n",
1183 ce = intel_context_create(engine);
1189 rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1190 intel_context_put(ce);
1194 err = wait_for_submit(engine, rq[B1], HZ / 2);
1196 pr_err("%s: failed to submit second context\n",
1201 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1202 ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1203 if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
1204 /* Wait for the timeslice to kick in */
1205 del_timer(&engine->execlists.timer);
1206 tasklet_hi_schedule(&engine->execlists.tasklet);
1207 intel_engine_flush_submission(engine);
1209 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1210 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1211 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1212 GEM_BUG_ON(i915_request_is_active(rq[A2]));
1214 /* Release the hounds! */
1216 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1218 for (i = 1; i <= 3; i++) {
1219 unsigned long timeout = jiffies + HZ / 2;
1221 while (!READ_ONCE(slot[i]) &&
1222 time_before(jiffies, timeout))
1225 if (!time_before(jiffies, timeout)) {
1226 pr_err("%s: rq[%d] timed out\n",
1227 engine->name, i - 1);
1232 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1236 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1237 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1245 memset32(&slot[0], -1, 4);
1248 engine->props.timeslice_duration_ms = timeslice;
1249 st_engine_heartbeat_enable(engine);
1250 for (i = 0; i < 3; i++)
1251 i915_request_put(rq[i]);
1252 if (igt_flush_test(gt->i915))
1261 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1263 struct i915_request *rq;
1265 rq = intel_engine_create_kernel_request(engine);
1269 i915_request_get(rq);
1270 i915_request_add(rq);
1275 static long slice_timeout(struct intel_engine_cs *engine)
1279 /* Enough time for a timeslice to kick in, and kick out */
1280 timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1282 /* Enough time for the nop request to complete */
1288 static int live_timeslice_queue(void *arg)
1290 struct intel_gt *gt = arg;
1291 struct drm_i915_gem_object *obj;
1292 struct intel_engine_cs *engine;
1293 enum intel_engine_id id;
1294 struct i915_vma *vma;
1299 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1300 * timeslicing between them disabled, we *do* enable timeslicing
1301 * if the queue demands it. (Normally, we do not submit if
1302 * ELSP[1] is already occupied, so must rely on timeslicing to
1303 * eject ELSP[0] in favour of the queue.)
1305 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1308 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1310 return PTR_ERR(obj);
1312 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1318 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1319 if (IS_ERR(vaddr)) {
1320 err = PTR_ERR(vaddr);
1324 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1328 err = i915_vma_sync(vma);
1332 for_each_engine(engine, gt, id) {
1333 struct i915_sched_attr attr = {
1334 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1336 struct i915_request *rq, *nop;
1338 if (!intel_engine_has_preemption(engine))
1341 st_engine_heartbeat_disable(engine);
1342 memset(vaddr, 0, PAGE_SIZE);
1344 /* ELSP[0]: semaphore wait */
1345 rq = semaphore_queue(engine, vma, 0);
1350 engine->schedule(rq, &attr);
1351 err = wait_for_submit(engine, rq, HZ / 2);
1353 pr_err("%s: Timed out trying to submit semaphores\n",
1358 /* ELSP[1]: nop request */
1359 nop = nop_request(engine);
1364 err = wait_for_submit(engine, nop, HZ / 2);
1365 i915_request_put(nop);
1367 pr_err("%s: Timed out trying to submit nop\n",
1372 GEM_BUG_ON(i915_request_completed(rq));
1373 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1375 /* Queue: semaphore signal, matching priority as semaphore */
1376 err = release_queue(engine, vma, 1, effective_prio(rq));
1380 /* Wait until we ack the release_queue and start timeslicing */
1383 intel_engine_flush_submission(engine);
1384 } while (READ_ONCE(engine->execlists.pending[0]));
1386 /* Timeslice every jiffy, so within 2 we should signal */
1387 if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1388 struct drm_printer p =
1389 drm_info_printer(gt->i915->drm.dev);
1391 pr_err("%s: Failed to timeslice into queue\n",
1393 intel_engine_dump(engine, &p,
1394 "%s\n", engine->name);
1396 memset(vaddr, 0xff, PAGE_SIZE);
1400 i915_request_put(rq);
1402 st_engine_heartbeat_enable(engine);
1408 i915_vma_unpin(vma);
1410 i915_gem_object_unpin_map(obj);
1412 i915_gem_object_put(obj);
1416 static int live_timeslice_nopreempt(void *arg)
1418 struct intel_gt *gt = arg;
1419 struct intel_engine_cs *engine;
1420 enum intel_engine_id id;
1421 struct igt_spinner spin;
1425 * We should not timeslice into a request that is marked with
1426 * I915_REQUEST_NOPREEMPT.
1428 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1431 if (igt_spinner_init(&spin, gt))
1434 for_each_engine(engine, gt, id) {
1435 struct intel_context *ce;
1436 struct i915_request *rq;
1437 unsigned long timeslice;
1439 if (!intel_engine_has_preemption(engine))
1442 ce = intel_context_create(engine);
1448 st_engine_heartbeat_disable(engine);
1449 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1451 /* Create an unpreemptible spinner */
1453 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1454 intel_context_put(ce);
1460 i915_request_get(rq);
1461 i915_request_add(rq);
1463 if (!igt_wait_for_spinner(&spin, rq)) {
1464 i915_request_put(rq);
1469 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1470 i915_request_put(rq);
1472 /* Followed by a maximum priority barrier (heartbeat) */
1474 ce = intel_context_create(engine);
1480 rq = intel_context_create_request(ce);
1481 intel_context_put(ce);
1487 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1488 i915_request_get(rq);
1489 i915_request_add(rq);
1492 * Wait until the barrier is in ELSP, and we know timeslicing
1493 * will have been activated.
1495 if (wait_for_submit(engine, rq, HZ / 2)) {
1496 i915_request_put(rq);
1502 * Since the ELSP[0] request is unpreemptible, it should not
1503 * allow the maximum priority barrier through. Wait long
1504 * enough to see if it is timesliced in by mistake.
1506 if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1507 pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1511 i915_request_put(rq);
1514 igt_spinner_end(&spin);
1516 xchg(&engine->props.timeslice_duration_ms, timeslice);
1517 st_engine_heartbeat_enable(engine);
1521 if (igt_flush_test(gt->i915)) {
1527 igt_spinner_fini(&spin);
1531 static int live_busywait_preempt(void *arg)
1533 struct intel_gt *gt = arg;
1534 struct i915_gem_context *ctx_hi, *ctx_lo;
1535 struct intel_engine_cs *engine;
1536 struct drm_i915_gem_object *obj;
1537 struct i915_vma *vma;
1538 enum intel_engine_id id;
1543 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1544 * preempt the busywaits used to synchronise between rings.
1547 ctx_hi = kernel_context(gt->i915);
1550 ctx_hi->sched.priority =
1551 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1553 ctx_lo = kernel_context(gt->i915);
1556 ctx_lo->sched.priority =
1557 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1559 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1565 map = i915_gem_object_pin_map(obj, I915_MAP_WC);
1571 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1577 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1581 err = i915_vma_sync(vma);
1585 for_each_engine(engine, gt, id) {
1586 struct i915_request *lo, *hi;
1587 struct igt_live_test t;
1590 if (!intel_engine_has_preemption(engine))
1593 if (!intel_engine_can_store_dword(engine))
1596 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1602 * We create two requests. The low priority request
1603 * busywaits on a semaphore (inside the ringbuffer where
1604 * is should be preemptible) and the high priority requests
1605 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1606 * allowing the first request to complete. If preemption
1607 * fails, we hang instead.
1610 lo = igt_request_alloc(ctx_lo, engine);
1616 cs = intel_ring_begin(lo, 8);
1619 i915_request_add(lo);
1623 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1624 *cs++ = i915_ggtt_offset(vma);
1628 /* XXX Do we need a flush + invalidate here? */
1630 *cs++ = MI_SEMAPHORE_WAIT |
1631 MI_SEMAPHORE_GLOBAL_GTT |
1633 MI_SEMAPHORE_SAD_EQ_SDD;
1635 *cs++ = i915_ggtt_offset(vma);
1638 intel_ring_advance(lo, cs);
1640 i915_request_get(lo);
1641 i915_request_add(lo);
1643 if (wait_for(READ_ONCE(*map), 10)) {
1644 i915_request_put(lo);
1649 /* Low priority request should be busywaiting now */
1650 if (i915_request_wait(lo, 0, 1) != -ETIME) {
1651 i915_request_put(lo);
1652 pr_err("%s: Busywaiting request did not!\n",
1658 hi = igt_request_alloc(ctx_hi, engine);
1661 i915_request_put(lo);
1665 cs = intel_ring_begin(hi, 4);
1668 i915_request_add(hi);
1669 i915_request_put(lo);
1673 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1674 *cs++ = i915_ggtt_offset(vma);
1678 intel_ring_advance(hi, cs);
1679 i915_request_add(hi);
1681 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1682 struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1684 pr_err("%s: Failed to preempt semaphore busywait!\n",
1687 intel_engine_dump(engine, &p, "%s\n", engine->name);
1690 i915_request_put(lo);
1691 intel_gt_set_wedged(gt);
1695 GEM_BUG_ON(READ_ONCE(*map));
1696 i915_request_put(lo);
1698 if (igt_live_test_end(&t)) {
1706 i915_vma_unpin(vma);
1708 i915_gem_object_unpin_map(obj);
1710 i915_gem_object_put(obj);
1712 kernel_context_close(ctx_lo);
1714 kernel_context_close(ctx_hi);
1718 static struct i915_request *
1719 spinner_create_request(struct igt_spinner *spin,
1720 struct i915_gem_context *ctx,
1721 struct intel_engine_cs *engine,
1724 struct intel_context *ce;
1725 struct i915_request *rq;
1727 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1729 return ERR_CAST(ce);
1731 rq = igt_spinner_create_request(spin, ce, arb);
1732 intel_context_put(ce);
1736 static int live_preempt(void *arg)
1738 struct intel_gt *gt = arg;
1739 struct i915_gem_context *ctx_hi, *ctx_lo;
1740 struct igt_spinner spin_hi, spin_lo;
1741 struct intel_engine_cs *engine;
1742 enum intel_engine_id id;
1745 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1748 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
1749 pr_err("Logical preemption supported, but not exposed\n");
1751 if (igt_spinner_init(&spin_hi, gt))
1754 if (igt_spinner_init(&spin_lo, gt))
1757 ctx_hi = kernel_context(gt->i915);
1760 ctx_hi->sched.priority =
1761 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1763 ctx_lo = kernel_context(gt->i915);
1766 ctx_lo->sched.priority =
1767 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1769 for_each_engine(engine, gt, id) {
1770 struct igt_live_test t;
1771 struct i915_request *rq;
1773 if (!intel_engine_has_preemption(engine))
1776 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1781 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1788 i915_request_add(rq);
1789 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1790 GEM_TRACE("lo spinner failed to start\n");
1792 intel_gt_set_wedged(gt);
1797 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1800 igt_spinner_end(&spin_lo);
1805 i915_request_add(rq);
1806 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1807 GEM_TRACE("hi spinner failed to start\n");
1809 intel_gt_set_wedged(gt);
1814 igt_spinner_end(&spin_hi);
1815 igt_spinner_end(&spin_lo);
1817 if (igt_live_test_end(&t)) {
1825 kernel_context_close(ctx_lo);
1827 kernel_context_close(ctx_hi);
1829 igt_spinner_fini(&spin_lo);
1831 igt_spinner_fini(&spin_hi);
1835 static int live_late_preempt(void *arg)
1837 struct intel_gt *gt = arg;
1838 struct i915_gem_context *ctx_hi, *ctx_lo;
1839 struct igt_spinner spin_hi, spin_lo;
1840 struct intel_engine_cs *engine;
1841 struct i915_sched_attr attr = {};
1842 enum intel_engine_id id;
1845 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1848 if (igt_spinner_init(&spin_hi, gt))
1851 if (igt_spinner_init(&spin_lo, gt))
1854 ctx_hi = kernel_context(gt->i915);
1858 ctx_lo = kernel_context(gt->i915);
1862 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1863 ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1865 for_each_engine(engine, gt, id) {
1866 struct igt_live_test t;
1867 struct i915_request *rq;
1869 if (!intel_engine_has_preemption(engine))
1872 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1877 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1884 i915_request_add(rq);
1885 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1886 pr_err("First context failed to start\n");
1890 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1893 igt_spinner_end(&spin_lo);
1898 i915_request_add(rq);
1899 if (igt_wait_for_spinner(&spin_hi, rq)) {
1900 pr_err("Second context overtook first?\n");
1904 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1905 engine->schedule(rq, &attr);
1907 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1908 pr_err("High priority context failed to preempt the low priority context\n");
1913 igt_spinner_end(&spin_hi);
1914 igt_spinner_end(&spin_lo);
1916 if (igt_live_test_end(&t)) {
1924 kernel_context_close(ctx_lo);
1926 kernel_context_close(ctx_hi);
1928 igt_spinner_fini(&spin_lo);
1930 igt_spinner_fini(&spin_hi);
1934 igt_spinner_end(&spin_hi);
1935 igt_spinner_end(&spin_lo);
1936 intel_gt_set_wedged(gt);
1941 struct preempt_client {
1942 struct igt_spinner spin;
1943 struct i915_gem_context *ctx;
1946 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1948 c->ctx = kernel_context(gt->i915);
1952 if (igt_spinner_init(&c->spin, gt))
1958 kernel_context_close(c->ctx);
1962 static void preempt_client_fini(struct preempt_client *c)
1964 igt_spinner_fini(&c->spin);
1965 kernel_context_close(c->ctx);
1968 static int live_nopreempt(void *arg)
1970 struct intel_gt *gt = arg;
1971 struct intel_engine_cs *engine;
1972 struct preempt_client a, b;
1973 enum intel_engine_id id;
1977 * Verify that we can disable preemption for an individual request
1978 * that may be being observed and not want to be interrupted.
1981 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1984 if (preempt_client_init(gt, &a))
1986 if (preempt_client_init(gt, &b))
1988 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1990 for_each_engine(engine, gt, id) {
1991 struct i915_request *rq_a, *rq_b;
1993 if (!intel_engine_has_preemption(engine))
1996 engine->execlists.preempt_hang.count = 0;
1998 rq_a = spinner_create_request(&a.spin,
2002 err = PTR_ERR(rq_a);
2006 /* Low priority client, but unpreemptable! */
2007 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
2009 i915_request_add(rq_a);
2010 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2011 pr_err("First client failed to start\n");
2015 rq_b = spinner_create_request(&b.spin,
2019 err = PTR_ERR(rq_b);
2023 i915_request_add(rq_b);
2025 /* B is much more important than A! (But A is unpreemptable.) */
2026 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
2028 /* Wait long enough for preemption and timeslicing */
2029 if (igt_wait_for_spinner(&b.spin, rq_b)) {
2030 pr_err("Second client started too early!\n");
2034 igt_spinner_end(&a.spin);
2036 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2037 pr_err("Second client failed to start\n");
2041 igt_spinner_end(&b.spin);
2043 if (engine->execlists.preempt_hang.count) {
2044 pr_err("Preemption recorded x%d; should have been suppressed!\n",
2045 engine->execlists.preempt_hang.count);
2050 if (igt_flush_test(gt->i915))
2056 preempt_client_fini(&b);
2058 preempt_client_fini(&a);
2062 igt_spinner_end(&b.spin);
2063 igt_spinner_end(&a.spin);
2064 intel_gt_set_wedged(gt);
2069 struct live_preempt_cancel {
2070 struct intel_engine_cs *engine;
2071 struct preempt_client a, b;
2074 static int __cancel_active0(struct live_preempt_cancel *arg)
2076 struct i915_request *rq;
2077 struct igt_live_test t;
2080 /* Preempt cancel of ELSP0 */
2081 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2082 if (igt_live_test_begin(&t, arg->engine->i915,
2083 __func__, arg->engine->name))
2086 rq = spinner_create_request(&arg->a.spin,
2087 arg->a.ctx, arg->engine,
2092 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2093 i915_request_get(rq);
2094 i915_request_add(rq);
2095 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2100 intel_context_set_banned(rq->context);
2101 err = intel_engine_pulse(arg->engine);
2105 err = wait_for_reset(arg->engine, rq, HZ / 2);
2107 pr_err("Cancelled inflight0 request did not reset\n");
2112 i915_request_put(rq);
2113 if (igt_live_test_end(&t))
2118 static int __cancel_active1(struct live_preempt_cancel *arg)
2120 struct i915_request *rq[2] = {};
2121 struct igt_live_test t;
2124 /* Preempt cancel of ELSP1 */
2125 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2126 if (igt_live_test_begin(&t, arg->engine->i915,
2127 __func__, arg->engine->name))
2130 rq[0] = spinner_create_request(&arg->a.spin,
2131 arg->a.ctx, arg->engine,
2132 MI_NOOP); /* no preemption */
2134 return PTR_ERR(rq[0]);
2136 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2137 i915_request_get(rq[0]);
2138 i915_request_add(rq[0]);
2139 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2144 rq[1] = spinner_create_request(&arg->b.spin,
2145 arg->b.ctx, arg->engine,
2147 if (IS_ERR(rq[1])) {
2148 err = PTR_ERR(rq[1]);
2152 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2153 i915_request_get(rq[1]);
2154 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2155 i915_request_add(rq[1]);
2159 intel_context_set_banned(rq[1]->context);
2160 err = intel_engine_pulse(arg->engine);
2164 igt_spinner_end(&arg->a.spin);
2165 err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2169 if (rq[0]->fence.error != 0) {
2170 pr_err("Normal inflight0 request did not complete\n");
2175 if (rq[1]->fence.error != -EIO) {
2176 pr_err("Cancelled inflight1 request did not report -EIO\n");
2182 i915_request_put(rq[1]);
2183 i915_request_put(rq[0]);
2184 if (igt_live_test_end(&t))
2189 static int __cancel_queued(struct live_preempt_cancel *arg)
2191 struct i915_request *rq[3] = {};
2192 struct igt_live_test t;
2195 /* Full ELSP and one in the wings */
2196 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2197 if (igt_live_test_begin(&t, arg->engine->i915,
2198 __func__, arg->engine->name))
2201 rq[0] = spinner_create_request(&arg->a.spin,
2202 arg->a.ctx, arg->engine,
2205 return PTR_ERR(rq[0]);
2207 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2208 i915_request_get(rq[0]);
2209 i915_request_add(rq[0]);
2210 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2215 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2216 if (IS_ERR(rq[1])) {
2217 err = PTR_ERR(rq[1]);
2221 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2222 i915_request_get(rq[1]);
2223 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2224 i915_request_add(rq[1]);
2228 rq[2] = spinner_create_request(&arg->b.spin,
2229 arg->a.ctx, arg->engine,
2231 if (IS_ERR(rq[2])) {
2232 err = PTR_ERR(rq[2]);
2236 i915_request_get(rq[2]);
2237 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2238 i915_request_add(rq[2]);
2242 intel_context_set_banned(rq[2]->context);
2243 err = intel_engine_pulse(arg->engine);
2247 err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2251 if (rq[0]->fence.error != -EIO) {
2252 pr_err("Cancelled inflight0 request did not report -EIO\n");
2257 if (rq[1]->fence.error != 0) {
2258 pr_err("Normal inflight1 request did not complete\n");
2263 if (rq[2]->fence.error != -EIO) {
2264 pr_err("Cancelled queued request did not report -EIO\n");
2270 i915_request_put(rq[2]);
2271 i915_request_put(rq[1]);
2272 i915_request_put(rq[0]);
2273 if (igt_live_test_end(&t))
2278 static int __cancel_hostile(struct live_preempt_cancel *arg)
2280 struct i915_request *rq;
2283 /* Preempt cancel non-preemptible spinner in ELSP0 */
2284 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2287 if (!intel_has_reset_engine(arg->engine->gt))
2290 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2291 rq = spinner_create_request(&arg->a.spin,
2292 arg->a.ctx, arg->engine,
2293 MI_NOOP); /* preemption disabled */
2297 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2298 i915_request_get(rq);
2299 i915_request_add(rq);
2300 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2305 intel_context_set_banned(rq->context);
2306 err = intel_engine_pulse(arg->engine); /* force reset */
2310 err = wait_for_reset(arg->engine, rq, HZ / 2);
2312 pr_err("Cancelled inflight0 request did not reset\n");
2317 i915_request_put(rq);
2318 if (igt_flush_test(arg->engine->i915))
2323 static int live_preempt_cancel(void *arg)
2325 struct intel_gt *gt = arg;
2326 struct live_preempt_cancel data;
2327 enum intel_engine_id id;
2331 * To cancel an inflight context, we need to first remove it from the
2332 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2335 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2338 if (preempt_client_init(gt, &data.a))
2340 if (preempt_client_init(gt, &data.b))
2343 for_each_engine(data.engine, gt, id) {
2344 if (!intel_engine_has_preemption(data.engine))
2347 err = __cancel_active0(&data);
2351 err = __cancel_active1(&data);
2355 err = __cancel_queued(&data);
2359 err = __cancel_hostile(&data);
2366 preempt_client_fini(&data.b);
2368 preempt_client_fini(&data.a);
2373 igt_spinner_end(&data.b.spin);
2374 igt_spinner_end(&data.a.spin);
2375 intel_gt_set_wedged(gt);
2379 static int live_suppress_self_preempt(void *arg)
2381 struct intel_gt *gt = arg;
2382 struct intel_engine_cs *engine;
2383 struct i915_sched_attr attr = {
2384 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
2386 struct preempt_client a, b;
2387 enum intel_engine_id id;
2391 * Verify that if a preemption request does not cause a change in
2392 * the current execution order, the preempt-to-idle injection is
2393 * skipped and that we do not accidentally apply it after the CS
2397 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2400 if (intel_uc_uses_guc_submission(>->uc))
2401 return 0; /* presume black blox */
2403 if (intel_vgpu_active(gt->i915))
2404 return 0; /* GVT forces single port & request submission */
2406 if (preempt_client_init(gt, &a))
2408 if (preempt_client_init(gt, &b))
2411 for_each_engine(engine, gt, id) {
2412 struct i915_request *rq_a, *rq_b;
2415 if (!intel_engine_has_preemption(engine))
2418 if (igt_flush_test(gt->i915))
2421 st_engine_heartbeat_disable(engine);
2422 engine->execlists.preempt_hang.count = 0;
2424 rq_a = spinner_create_request(&a.spin,
2428 err = PTR_ERR(rq_a);
2429 st_engine_heartbeat_enable(engine);
2433 i915_request_add(rq_a);
2434 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2435 pr_err("First client failed to start\n");
2436 st_engine_heartbeat_enable(engine);
2440 /* Keep postponing the timer to avoid premature slicing */
2441 mod_timer(&engine->execlists.timer, jiffies + HZ);
2442 for (depth = 0; depth < 8; depth++) {
2443 rq_b = spinner_create_request(&b.spin,
2447 err = PTR_ERR(rq_b);
2448 st_engine_heartbeat_enable(engine);
2451 i915_request_add(rq_b);
2453 GEM_BUG_ON(i915_request_completed(rq_a));
2454 engine->schedule(rq_a, &attr);
2455 igt_spinner_end(&a.spin);
2457 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2458 pr_err("Second client failed to start\n");
2459 st_engine_heartbeat_enable(engine);
2466 igt_spinner_end(&a.spin);
2468 if (engine->execlists.preempt_hang.count) {
2469 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2471 engine->execlists.preempt_hang.count,
2473 st_engine_heartbeat_enable(engine);
2478 st_engine_heartbeat_enable(engine);
2479 if (igt_flush_test(gt->i915))
2485 preempt_client_fini(&b);
2487 preempt_client_fini(&a);
2491 igt_spinner_end(&b.spin);
2492 igt_spinner_end(&a.spin);
2493 intel_gt_set_wedged(gt);
2498 static int live_chain_preempt(void *arg)
2500 struct intel_gt *gt = arg;
2501 struct intel_engine_cs *engine;
2502 struct preempt_client hi, lo;
2503 enum intel_engine_id id;
2507 * Build a chain AB...BA between two contexts (A, B) and request
2508 * preemption of the last request. It should then complete before
2509 * the previously submitted spinner in B.
2512 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2515 if (preempt_client_init(gt, &hi))
2518 if (preempt_client_init(gt, &lo))
2521 for_each_engine(engine, gt, id) {
2522 struct i915_sched_attr attr = {
2523 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
2525 struct igt_live_test t;
2526 struct i915_request *rq;
2527 int ring_size, count, i;
2529 if (!intel_engine_has_preemption(engine))
2532 rq = spinner_create_request(&lo.spin,
2538 i915_request_get(rq);
2539 i915_request_add(rq);
2541 ring_size = rq->wa_tail - rq->head;
2543 ring_size += rq->ring->size;
2544 ring_size = rq->ring->size / ring_size;
2545 pr_debug("%s(%s): Using maximum of %d requests\n",
2546 __func__, engine->name, ring_size);
2548 igt_spinner_end(&lo.spin);
2549 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2550 pr_err("Timed out waiting to flush %s\n", engine->name);
2551 i915_request_put(rq);
2554 i915_request_put(rq);
2556 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2561 for_each_prime_number_from(count, 1, ring_size) {
2562 rq = spinner_create_request(&hi.spin,
2567 i915_request_add(rq);
2568 if (!igt_wait_for_spinner(&hi.spin, rq))
2571 rq = spinner_create_request(&lo.spin,
2576 i915_request_add(rq);
2578 for (i = 0; i < count; i++) {
2579 rq = igt_request_alloc(lo.ctx, engine);
2582 i915_request_add(rq);
2585 rq = igt_request_alloc(hi.ctx, engine);
2589 i915_request_get(rq);
2590 i915_request_add(rq);
2591 engine->schedule(rq, &attr);
2593 igt_spinner_end(&hi.spin);
2594 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2595 struct drm_printer p =
2596 drm_info_printer(gt->i915->drm.dev);
2598 pr_err("Failed to preempt over chain of %d\n",
2600 intel_engine_dump(engine, &p,
2601 "%s\n", engine->name);
2602 i915_request_put(rq);
2605 igt_spinner_end(&lo.spin);
2606 i915_request_put(rq);
2608 rq = igt_request_alloc(lo.ctx, engine);
2612 i915_request_get(rq);
2613 i915_request_add(rq);
2615 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2616 struct drm_printer p =
2617 drm_info_printer(gt->i915->drm.dev);
2619 pr_err("Failed to flush low priority chain of %d requests\n",
2621 intel_engine_dump(engine, &p,
2622 "%s\n", engine->name);
2624 i915_request_put(rq);
2627 i915_request_put(rq);
2630 if (igt_live_test_end(&t)) {
2638 preempt_client_fini(&lo);
2640 preempt_client_fini(&hi);
2644 igt_spinner_end(&hi.spin);
2645 igt_spinner_end(&lo.spin);
2646 intel_gt_set_wedged(gt);
2651 static int create_gang(struct intel_engine_cs *engine,
2652 struct i915_request **prev)
2654 struct drm_i915_gem_object *obj;
2655 struct intel_context *ce;
2656 struct i915_request *rq;
2657 struct i915_vma *vma;
2661 ce = intel_context_create(engine);
2665 obj = i915_gem_object_create_internal(engine->i915, 4096);
2671 vma = i915_vma_instance(obj, ce->vm, NULL);
2677 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2681 cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2685 /* Semaphore target: spin until zero */
2686 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2688 *cs++ = MI_SEMAPHORE_WAIT |
2690 MI_SEMAPHORE_SAD_EQ_SDD;
2692 *cs++ = lower_32_bits(vma->node.start);
2693 *cs++ = upper_32_bits(vma->node.start);
2696 u64 offset = (*prev)->batch->node.start;
2698 /* Terminate the spinner in the next lower priority batch. */
2699 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2700 *cs++ = lower_32_bits(offset);
2701 *cs++ = upper_32_bits(offset);
2705 *cs++ = MI_BATCH_BUFFER_END;
2706 i915_gem_object_flush_map(obj);
2707 i915_gem_object_unpin_map(obj);
2709 rq = intel_context_create_request(ce);
2713 rq->batch = i915_vma_get(vma);
2714 i915_request_get(rq);
2717 err = i915_request_await_object(rq, vma->obj, false);
2719 err = i915_vma_move_to_active(vma, rq, 0);
2721 err = rq->engine->emit_bb_start(rq,
2724 i915_vma_unlock(vma);
2725 i915_request_add(rq);
2729 i915_gem_object_put(obj);
2730 intel_context_put(ce);
2732 rq->mock.link.next = &(*prev)->mock.link;
2737 i915_vma_put(rq->batch);
2738 i915_request_put(rq);
2740 i915_gem_object_put(obj);
2742 intel_context_put(ce);
2746 static int __live_preempt_ring(struct intel_engine_cs *engine,
2747 struct igt_spinner *spin,
2748 int queue_sz, int ring_sz)
2750 struct intel_context *ce[2] = {};
2751 struct i915_request *rq;
2752 struct igt_live_test t;
2756 if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2759 for (n = 0; n < ARRAY_SIZE(ce); n++) {
2760 struct intel_context *tmp;
2762 tmp = intel_context_create(engine);
2768 tmp->ring = __intel_context_ring_size(ring_sz);
2770 err = intel_context_pin(tmp);
2772 intel_context_put(tmp);
2776 memset32(tmp->ring->vaddr,
2777 0xdeadbeef, /* trigger a hang if executed */
2778 tmp->ring->vma->size / sizeof(u32));
2783 rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2789 i915_request_get(rq);
2790 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2791 i915_request_add(rq);
2793 if (!igt_wait_for_spinner(spin, rq)) {
2794 intel_gt_set_wedged(engine->gt);
2795 i915_request_put(rq);
2800 /* Fill the ring, until we will cause a wrap */
2802 while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2803 struct i915_request *tmp;
2805 tmp = intel_context_create_request(ce[0]);
2808 i915_request_put(rq);
2812 i915_request_add(tmp);
2813 intel_engine_flush_submission(engine);
2816 intel_engine_flush_submission(engine);
2817 pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2818 engine->name, queue_sz, n,
2823 i915_request_put(rq);
2825 /* Create a second request to preempt the first ring */
2826 rq = intel_context_create_request(ce[1]);
2832 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2833 i915_request_get(rq);
2834 i915_request_add(rq);
2836 err = wait_for_submit(engine, rq, HZ / 2);
2837 i915_request_put(rq);
2839 pr_err("%s: preemption request was not submited\n",
2844 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2846 ce[0]->ring->tail, ce[0]->ring->emit,
2847 ce[1]->ring->tail, ce[1]->ring->emit);
2850 intel_engine_flush_submission(engine);
2851 igt_spinner_end(spin);
2852 for (n = 0; n < ARRAY_SIZE(ce); n++) {
2853 if (IS_ERR_OR_NULL(ce[n]))
2856 intel_context_unpin(ce[n]);
2857 intel_context_put(ce[n]);
2859 if (igt_live_test_end(&t))
2864 static int live_preempt_ring(void *arg)
2866 struct intel_gt *gt = arg;
2867 struct intel_engine_cs *engine;
2868 struct igt_spinner spin;
2869 enum intel_engine_id id;
2873 * Check that we rollback large chunks of a ring in order to do a
2874 * preemption event. Similar to live_unlite_ring, but looking at
2875 * ring size rather than the impact of intel_ring_direction().
2878 if (igt_spinner_init(&spin, gt))
2881 for_each_engine(engine, gt, id) {
2884 if (!intel_engine_has_preemption(engine))
2887 if (!intel_engine_can_store_dword(engine))
2890 st_engine_heartbeat_disable(engine);
2892 for (n = 0; n <= 3; n++) {
2893 err = __live_preempt_ring(engine, &spin,
2894 n * SZ_4K / 4, SZ_4K);
2899 st_engine_heartbeat_enable(engine);
2904 igt_spinner_fini(&spin);
2908 static int live_preempt_gang(void *arg)
2910 struct intel_gt *gt = arg;
2911 struct intel_engine_cs *engine;
2912 enum intel_engine_id id;
2914 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2918 * Build as long a chain of preempters as we can, with each
2919 * request higher priority than the last. Once we are ready, we release
2920 * the last batch which then precolates down the chain, each releasing
2921 * the next oldest in turn. The intent is to simply push as hard as we
2922 * can with the number of preemptions, trying to exceed narrow HW
2923 * limits. At a minimum, we insist that we can sort all the user
2924 * high priority levels into execution order.
2927 for_each_engine(engine, gt, id) {
2928 struct i915_request *rq = NULL;
2929 struct igt_live_test t;
2930 IGT_TIMEOUT(end_time);
2935 if (!intel_engine_has_preemption(engine))
2938 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2942 struct i915_sched_attr attr = {
2943 .priority = I915_USER_PRIORITY(prio++),
2946 err = create_gang(engine, &rq);
2950 /* Submit each spinner at increasing priority */
2951 engine->schedule(rq, &attr);
2952 } while (prio <= I915_PRIORITY_MAX &&
2953 !__igt_timeout(end_time, NULL));
2954 pr_debug("%s: Preempt chain of %d requests\n",
2955 engine->name, prio);
2958 * Such that the last spinner is the highest priority and
2959 * should execute first. When that spinner completes,
2960 * it will terminate the next lowest spinner until there
2961 * are no more spinners and the gang is complete.
2963 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2966 i915_gem_object_unpin_map(rq->batch->obj);
2969 intel_gt_set_wedged(gt);
2972 while (rq) { /* wait for each rq from highest to lowest prio */
2973 struct i915_request *n = list_next_entry(rq, mock.link);
2975 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2976 struct drm_printer p =
2977 drm_info_printer(engine->i915->drm.dev);
2979 pr_err("Failed to flush chain of %d requests, at %d\n",
2980 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2981 intel_engine_dump(engine, &p,
2982 "%s\n", engine->name);
2987 i915_vma_put(rq->batch);
2988 i915_request_put(rq);
2992 if (igt_live_test_end(&t))
3001 static struct i915_vma *
3002 create_gpr_user(struct intel_engine_cs *engine,
3003 struct i915_vma *result,
3004 unsigned int offset)
3006 struct drm_i915_gem_object *obj;
3007 struct i915_vma *vma;
3012 obj = i915_gem_object_create_internal(engine->i915, 4096);
3014 return ERR_CAST(obj);
3016 vma = i915_vma_instance(obj, result->vm, NULL);
3018 i915_gem_object_put(obj);
3022 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3025 return ERR_PTR(err);
3028 cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
3031 return ERR_CAST(cs);
3034 /* All GPR are clear for new contexts. We use GPR(0) as a constant */
3035 *cs++ = MI_LOAD_REGISTER_IMM(1);
3036 *cs++ = CS_GPR(engine, 0);
3039 for (i = 1; i < NUM_GPR; i++) {
3045 * As we read and write into the context saved GPR[i], if
3046 * we restart this batch buffer from an earlier point, we
3047 * will repeat the increment and store a value > 1.
3050 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3051 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3052 *cs++ = MI_MATH_ADD;
3053 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3055 addr = result->node.start + offset + i * sizeof(*cs);
3056 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
3057 *cs++ = CS_GPR(engine, 2 * i);
3058 *cs++ = lower_32_bits(addr);
3059 *cs++ = upper_32_bits(addr);
3061 *cs++ = MI_SEMAPHORE_WAIT |
3063 MI_SEMAPHORE_SAD_GTE_SDD;
3065 *cs++ = lower_32_bits(result->node.start);
3066 *cs++ = upper_32_bits(result->node.start);
3069 *cs++ = MI_BATCH_BUFFER_END;
3070 i915_gem_object_flush_map(obj);
3071 i915_gem_object_unpin_map(obj);
3076 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3078 struct drm_i915_gem_object *obj;
3079 struct i915_vma *vma;
3082 obj = i915_gem_object_create_internal(gt->i915, sz);
3084 return ERR_CAST(obj);
3086 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
3088 i915_gem_object_put(obj);
3092 err = i915_ggtt_pin(vma, NULL, 0, 0);
3095 return ERR_PTR(err);
3101 static struct i915_request *
3102 create_gpr_client(struct intel_engine_cs *engine,
3103 struct i915_vma *global,
3104 unsigned int offset)
3106 struct i915_vma *batch, *vma;
3107 struct intel_context *ce;
3108 struct i915_request *rq;
3111 ce = intel_context_create(engine);
3113 return ERR_CAST(ce);
3115 vma = i915_vma_instance(global->obj, ce->vm, NULL);
3121 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3125 batch = create_gpr_user(engine, vma, offset);
3126 if (IS_ERR(batch)) {
3127 err = PTR_ERR(batch);
3131 rq = intel_context_create_request(ce);
3138 err = i915_request_await_object(rq, vma->obj, false);
3140 err = i915_vma_move_to_active(vma, rq, 0);
3141 i915_vma_unlock(vma);
3143 i915_vma_lock(batch);
3145 err = i915_request_await_object(rq, batch->obj, false);
3147 err = i915_vma_move_to_active(batch, rq, 0);
3149 err = rq->engine->emit_bb_start(rq,
3152 i915_vma_unlock(batch);
3153 i915_vma_unpin(batch);
3156 i915_request_get(rq);
3157 i915_request_add(rq);
3160 i915_vma_put(batch);
3162 i915_vma_unpin(vma);
3164 intel_context_put(ce);
3165 return err ? ERR_PTR(err) : rq;
3168 static int preempt_user(struct intel_engine_cs *engine,
3169 struct i915_vma *global,
3172 struct i915_sched_attr attr = {
3173 .priority = I915_PRIORITY_MAX
3175 struct i915_request *rq;
3179 rq = intel_engine_create_kernel_request(engine);
3183 cs = intel_ring_begin(rq, 4);
3185 i915_request_add(rq);
3189 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3190 *cs++ = i915_ggtt_offset(global);
3194 intel_ring_advance(rq, cs);
3196 i915_request_get(rq);
3197 i915_request_add(rq);
3199 engine->schedule(rq, &attr);
3201 if (i915_request_wait(rq, 0, HZ / 2) < 0)
3203 i915_request_put(rq);
3208 static int live_preempt_user(void *arg)
3210 struct intel_gt *gt = arg;
3211 struct intel_engine_cs *engine;
3212 struct i915_vma *global;
3213 enum intel_engine_id id;
3217 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3221 * In our other tests, we look at preemption in carefully
3222 * controlled conditions in the ringbuffer. Since most of the
3223 * time is spent in user batches, most of our preemptions naturally
3224 * occur there. We want to verify that when we preempt inside a batch
3225 * we continue on from the current instruction and do not roll back
3226 * to the start, or another earlier arbitration point.
3228 * To verify this, we create a batch which is a mixture of
3229 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3230 * a few preempting contexts thrown into the mix, we look for any
3231 * repeated instructions (which show up as incorrect values).
3234 global = create_global(gt, 4096);
3236 return PTR_ERR(global);
3238 result = i915_gem_object_pin_map(global->obj, I915_MAP_WC);
3239 if (IS_ERR(result)) {
3240 i915_vma_unpin_and_release(&global, 0);
3241 return PTR_ERR(result);
3244 for_each_engine(engine, gt, id) {
3245 struct i915_request *client[3] = {};
3246 struct igt_live_test t;
3249 if (!intel_engine_has_preemption(engine))
3252 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
3253 continue; /* we need per-context GPR */
3255 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3260 memset(result, 0, 4096);
3262 for (i = 0; i < ARRAY_SIZE(client); i++) {
3263 struct i915_request *rq;
3265 rq = create_gpr_client(engine, global,
3266 NUM_GPR * i * sizeof(u32));
3273 /* Continuously preempt the set of 3 running contexts */
3274 for (i = 1; i <= NUM_GPR; i++) {
3275 err = preempt_user(engine, global, i);
3280 if (READ_ONCE(result[0]) != NUM_GPR) {
3281 pr_err("%s: Failed to release semaphore\n",
3287 for (i = 0; i < ARRAY_SIZE(client); i++) {
3290 if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3295 for (gpr = 1; gpr < NUM_GPR; gpr++) {
3296 if (result[NUM_GPR * i + gpr] != 1) {
3297 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3299 i, gpr, result[NUM_GPR * i + gpr]);
3307 for (i = 0; i < ARRAY_SIZE(client); i++) {
3311 i915_request_put(client[i]);
3314 /* Flush the semaphores on error */
3315 smp_store_mb(result[0], -1);
3316 if (igt_live_test_end(&t))
3322 i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3326 static int live_preempt_timeout(void *arg)
3328 struct intel_gt *gt = arg;
3329 struct i915_gem_context *ctx_hi, *ctx_lo;
3330 struct igt_spinner spin_lo;
3331 struct intel_engine_cs *engine;
3332 enum intel_engine_id id;
3336 * Check that we force preemption to occur by cancelling the previous
3337 * context if it refuses to yield the GPU.
3339 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3342 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
3345 if (!intel_has_reset_engine(gt))
3348 if (igt_spinner_init(&spin_lo, gt))
3351 ctx_hi = kernel_context(gt->i915);
3354 ctx_hi->sched.priority =
3355 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
3357 ctx_lo = kernel_context(gt->i915);
3360 ctx_lo->sched.priority =
3361 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
3363 for_each_engine(engine, gt, id) {
3364 unsigned long saved_timeout;
3365 struct i915_request *rq;
3367 if (!intel_engine_has_preemption(engine))
3370 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3371 MI_NOOP); /* preemption disabled */
3377 i915_request_add(rq);
3378 if (!igt_wait_for_spinner(&spin_lo, rq)) {
3379 intel_gt_set_wedged(gt);
3384 rq = igt_request_alloc(ctx_hi, engine);
3386 igt_spinner_end(&spin_lo);
3391 /* Flush the previous CS ack before changing timeouts */
3392 while (READ_ONCE(engine->execlists.pending[0]))
3395 saved_timeout = engine->props.preempt_timeout_ms;
3396 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3398 i915_request_get(rq);
3399 i915_request_add(rq);
3401 intel_engine_flush_submission(engine);
3402 engine->props.preempt_timeout_ms = saved_timeout;
3404 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3405 intel_gt_set_wedged(gt);
3406 i915_request_put(rq);
3411 igt_spinner_end(&spin_lo);
3412 i915_request_put(rq);
3417 kernel_context_close(ctx_lo);
3419 kernel_context_close(ctx_hi);
3421 igt_spinner_fini(&spin_lo);
3425 static int random_range(struct rnd_state *rnd, int min, int max)
3427 return i915_prandom_u32_max_state(max - min, rnd) + min;
3430 static int random_priority(struct rnd_state *rnd)
3432 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3435 struct preempt_smoke {
3436 struct intel_gt *gt;
3437 struct i915_gem_context **contexts;
3438 struct intel_engine_cs *engine;
3439 struct drm_i915_gem_object *batch;
3440 unsigned int ncontext;
3441 struct rnd_state prng;
3442 unsigned long count;
3445 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3447 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3451 static int smoke_submit(struct preempt_smoke *smoke,
3452 struct i915_gem_context *ctx, int prio,
3453 struct drm_i915_gem_object *batch)
3455 struct i915_request *rq;
3456 struct i915_vma *vma = NULL;
3460 struct i915_address_space *vm;
3462 vm = i915_gem_context_get_vm_rcu(ctx);
3463 vma = i915_vma_instance(batch, vm, NULL);
3466 return PTR_ERR(vma);
3468 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3473 ctx->sched.priority = prio;
3475 rq = igt_request_alloc(ctx, smoke->engine);
3483 err = i915_request_await_object(rq, vma->obj, false);
3485 err = i915_vma_move_to_active(vma, rq, 0);
3487 err = rq->engine->emit_bb_start(rq,
3490 i915_vma_unlock(vma);
3493 i915_request_add(rq);
3497 i915_vma_unpin(vma);
3502 static int smoke_crescendo_thread(void *arg)
3504 struct preempt_smoke *smoke = arg;
3505 IGT_TIMEOUT(end_time);
3506 unsigned long count;
3510 struct i915_gem_context *ctx = smoke_context(smoke);
3513 err = smoke_submit(smoke,
3514 ctx, count % I915_PRIORITY_MAX,
3520 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3522 smoke->count = count;
3526 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3527 #define BATCH BIT(0)
3529 struct task_struct *tsk[I915_NUM_ENGINES] = {};
3530 struct preempt_smoke arg[I915_NUM_ENGINES];
3531 struct intel_engine_cs *engine;
3532 enum intel_engine_id id;
3533 unsigned long count;
3536 for_each_engine(engine, smoke->gt, id) {
3538 arg[id].engine = engine;
3539 if (!(flags & BATCH))
3540 arg[id].batch = NULL;
3543 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3544 "igt/smoke:%d", id);
3545 if (IS_ERR(tsk[id])) {
3546 err = PTR_ERR(tsk[id]);
3549 get_task_struct(tsk[id]);
3552 yield(); /* start all threads before we kthread_stop() */
3555 for_each_engine(engine, smoke->gt, id) {
3558 if (IS_ERR_OR_NULL(tsk[id]))
3561 status = kthread_stop(tsk[id]);
3565 count += arg[id].count;
3567 put_task_struct(tsk[id]);
3570 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3571 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3575 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3577 enum intel_engine_id id;
3578 IGT_TIMEOUT(end_time);
3579 unsigned long count;
3583 for_each_engine(smoke->engine, smoke->gt, id) {
3584 struct i915_gem_context *ctx = smoke_context(smoke);
3587 err = smoke_submit(smoke,
3588 ctx, random_priority(&smoke->prng),
3589 flags & BATCH ? smoke->batch : NULL);
3595 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3597 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3598 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3602 static int live_preempt_smoke(void *arg)
3604 struct preempt_smoke smoke = {
3606 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3609 const unsigned int phase[] = { 0, BATCH };
3610 struct igt_live_test t;
3615 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
3618 smoke.contexts = kmalloc_array(smoke.ncontext,
3619 sizeof(*smoke.contexts),
3621 if (!smoke.contexts)
3625 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3626 if (IS_ERR(smoke.batch)) {
3627 err = PTR_ERR(smoke.batch);
3631 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
3636 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3637 cs[n] = MI_ARB_CHECK;
3638 cs[n] = MI_BATCH_BUFFER_END;
3639 i915_gem_object_flush_map(smoke.batch);
3640 i915_gem_object_unpin_map(smoke.batch);
3642 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3647 for (n = 0; n < smoke.ncontext; n++) {
3648 smoke.contexts[n] = kernel_context(smoke.gt->i915);
3649 if (!smoke.contexts[n])
3653 for (n = 0; n < ARRAY_SIZE(phase); n++) {
3654 err = smoke_crescendo(&smoke, phase[n]);
3658 err = smoke_random(&smoke, phase[n]);
3664 if (igt_live_test_end(&t))
3667 for (n = 0; n < smoke.ncontext; n++) {
3668 if (!smoke.contexts[n])
3670 kernel_context_close(smoke.contexts[n]);
3674 i915_gem_object_put(smoke.batch);
3676 kfree(smoke.contexts);
3681 static int nop_virtual_engine(struct intel_gt *gt,
3682 struct intel_engine_cs **siblings,
3683 unsigned int nsibling,
3686 #define CHAIN BIT(0)
3688 IGT_TIMEOUT(end_time);
3689 struct i915_request *request[16] = {};
3690 struct intel_context *ve[16];
3691 unsigned long n, prime, nc;
3692 struct igt_live_test t;
3693 ktime_t times[2] = {};
3696 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3698 for (n = 0; n < nctx; n++) {
3699 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3700 if (IS_ERR(ve[n])) {
3701 err = PTR_ERR(ve[n]);
3706 err = intel_context_pin(ve[n]);
3708 intel_context_put(ve[n]);
3714 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3718 for_each_prime_number_from(prime, 1, 8192) {
3719 times[1] = ktime_get_raw();
3721 if (flags & CHAIN) {
3722 for (nc = 0; nc < nctx; nc++) {
3723 for (n = 0; n < prime; n++) {
3724 struct i915_request *rq;
3726 rq = i915_request_create(ve[nc]);
3733 i915_request_put(request[nc]);
3734 request[nc] = i915_request_get(rq);
3735 i915_request_add(rq);
3739 for (n = 0; n < prime; n++) {
3740 for (nc = 0; nc < nctx; nc++) {
3741 struct i915_request *rq;
3743 rq = i915_request_create(ve[nc]);
3750 i915_request_put(request[nc]);
3751 request[nc] = i915_request_get(rq);
3752 i915_request_add(rq);
3757 for (nc = 0; nc < nctx; nc++) {
3758 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3759 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3760 __func__, ve[0]->engine->name,
3761 request[nc]->fence.context,
3762 request[nc]->fence.seqno);
3764 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3765 __func__, ve[0]->engine->name,
3766 request[nc]->fence.context,
3767 request[nc]->fence.seqno);
3769 intel_gt_set_wedged(gt);
3774 times[1] = ktime_sub(ktime_get_raw(), times[1]);
3776 times[0] = times[1];
3778 for (nc = 0; nc < nctx; nc++) {
3779 i915_request_put(request[nc]);
3783 if (__igt_timeout(end_time, NULL))
3787 err = igt_live_test_end(&t);
3791 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3792 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3793 prime, div64_u64(ktime_to_ns(times[1]), prime));
3796 if (igt_flush_test(gt->i915))
3799 for (nc = 0; nc < nctx; nc++) {
3800 i915_request_put(request[nc]);
3801 intel_context_unpin(ve[nc]);
3802 intel_context_put(ve[nc]);
3808 __select_siblings(struct intel_gt *gt,
3810 struct intel_engine_cs **siblings,
3811 bool (*filter)(const struct intel_engine_cs *))
3816 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3817 if (!gt->engine_class[class][inst])
3820 if (filter && !filter(gt->engine_class[class][inst]))
3823 siblings[n++] = gt->engine_class[class][inst];
3830 select_siblings(struct intel_gt *gt,
3832 struct intel_engine_cs **siblings)
3834 return __select_siblings(gt, class, siblings, NULL);
3837 static int live_virtual_engine(void *arg)
3839 struct intel_gt *gt = arg;
3840 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3841 struct intel_engine_cs *engine;
3842 enum intel_engine_id id;
3846 if (intel_uc_uses_guc_submission(>->uc))
3849 for_each_engine(engine, gt, id) {
3850 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3852 pr_err("Failed to wrap engine %s: err=%d\n",
3858 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3861 nsibling = select_siblings(gt, class, siblings);
3865 for (n = 1; n <= nsibling + 1; n++) {
3866 err = nop_virtual_engine(gt, siblings, nsibling,
3872 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3880 static int mask_virtual_engine(struct intel_gt *gt,
3881 struct intel_engine_cs **siblings,
3882 unsigned int nsibling)
3884 struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3885 struct intel_context *ve;
3886 struct igt_live_test t;
3891 * Check that by setting the execution mask on a request, we can
3892 * restrict it to our desired engine within the virtual engine.
3895 ve = intel_execlists_create_virtual(siblings, nsibling);
3901 err = intel_context_pin(ve);
3905 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3909 for (n = 0; n < nsibling; n++) {
3910 request[n] = i915_request_create(ve);
3911 if (IS_ERR(request[n])) {
3912 err = PTR_ERR(request[n]);
3917 /* Reverse order as it's more likely to be unnatural */
3918 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3920 i915_request_get(request[n]);
3921 i915_request_add(request[n]);
3924 for (n = 0; n < nsibling; n++) {
3925 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3926 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3927 __func__, ve->engine->name,
3928 request[n]->fence.context,
3929 request[n]->fence.seqno);
3931 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3932 __func__, ve->engine->name,
3933 request[n]->fence.context,
3934 request[n]->fence.seqno);
3936 intel_gt_set_wedged(gt);
3941 if (request[n]->engine != siblings[nsibling - n - 1]) {
3942 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3943 request[n]->engine->name,
3944 siblings[nsibling - n - 1]->name);
3950 err = igt_live_test_end(&t);
3952 if (igt_flush_test(gt->i915))
3955 for (n = 0; n < nsibling; n++)
3956 i915_request_put(request[n]);
3959 intel_context_unpin(ve);
3961 intel_context_put(ve);
3966 static int live_virtual_mask(void *arg)
3968 struct intel_gt *gt = arg;
3969 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3973 if (intel_uc_uses_guc_submission(>->uc))
3976 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3977 unsigned int nsibling;
3979 nsibling = select_siblings(gt, class, siblings);
3983 err = mask_virtual_engine(gt, siblings, nsibling);
3991 static int slicein_virtual_engine(struct intel_gt *gt,
3992 struct intel_engine_cs **siblings,
3993 unsigned int nsibling)
3995 const long timeout = slice_timeout(siblings[0]);
3996 struct intel_context *ce;
3997 struct i915_request *rq;
3998 struct igt_spinner spin;
4003 * Virtual requests must take part in timeslicing on the target engines.
4006 if (igt_spinner_init(&spin, gt))
4009 for (n = 0; n < nsibling; n++) {
4010 ce = intel_context_create(siblings[n]);
4016 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4017 intel_context_put(ce);
4023 i915_request_add(rq);
4026 ce = intel_execlists_create_virtual(siblings, nsibling);
4032 rq = intel_context_create_request(ce);
4033 intel_context_put(ce);
4039 i915_request_get(rq);
4040 i915_request_add(rq);
4041 if (i915_request_wait(rq, 0, timeout) < 0) {
4042 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4043 __func__, rq->engine->name);
4045 intel_gt_set_wedged(gt);
4048 i915_request_put(rq);
4051 igt_spinner_end(&spin);
4052 if (igt_flush_test(gt->i915))
4054 igt_spinner_fini(&spin);
4058 static int sliceout_virtual_engine(struct intel_gt *gt,
4059 struct intel_engine_cs **siblings,
4060 unsigned int nsibling)
4062 const long timeout = slice_timeout(siblings[0]);
4063 struct intel_context *ce;
4064 struct i915_request *rq;
4065 struct igt_spinner spin;
4070 * Virtual requests must allow others a fair timeslice.
4073 if (igt_spinner_init(&spin, gt))
4076 /* XXX We do not handle oversubscription and fairness with normal rq */
4077 for (n = 0; n < nsibling; n++) {
4078 ce = intel_execlists_create_virtual(siblings, nsibling);
4084 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4085 intel_context_put(ce);
4091 i915_request_add(rq);
4094 for (n = 0; !err && n < nsibling; n++) {
4095 ce = intel_context_create(siblings[n]);
4101 rq = intel_context_create_request(ce);
4102 intel_context_put(ce);
4108 i915_request_get(rq);
4109 i915_request_add(rq);
4110 if (i915_request_wait(rq, 0, timeout) < 0) {
4111 GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4112 __func__, siblings[n]->name);
4114 intel_gt_set_wedged(gt);
4117 i915_request_put(rq);
4121 igt_spinner_end(&spin);
4122 if (igt_flush_test(gt->i915))
4124 igt_spinner_fini(&spin);
4128 static int live_virtual_slice(void *arg)
4130 struct intel_gt *gt = arg;
4131 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4135 if (intel_uc_uses_guc_submission(>->uc))
4138 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4139 unsigned int nsibling;
4141 nsibling = __select_siblings(gt, class, siblings,
4142 intel_engine_has_timeslices);
4146 err = slicein_virtual_engine(gt, siblings, nsibling);
4150 err = sliceout_virtual_engine(gt, siblings, nsibling);
4158 static int preserved_virtual_engine(struct intel_gt *gt,
4159 struct intel_engine_cs **siblings,
4160 unsigned int nsibling)
4162 struct i915_request *last = NULL;
4163 struct intel_context *ve;
4164 struct i915_vma *scratch;
4165 struct igt_live_test t;
4170 scratch = create_scratch(siblings[0]->gt);
4171 if (IS_ERR(scratch))
4172 return PTR_ERR(scratch);
4174 err = i915_vma_sync(scratch);
4178 ve = intel_execlists_create_virtual(siblings, nsibling);
4184 err = intel_context_pin(ve);
4188 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4192 for (n = 0; n < NUM_GPR_DW; n++) {
4193 struct intel_engine_cs *engine = siblings[n % nsibling];
4194 struct i915_request *rq;
4196 rq = i915_request_create(ve);
4202 i915_request_put(last);
4203 last = i915_request_get(rq);
4205 cs = intel_ring_begin(rq, 8);
4207 i915_request_add(rq);
4212 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4213 *cs++ = CS_GPR(engine, n);
4214 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4217 *cs++ = MI_LOAD_REGISTER_IMM(1);
4218 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4222 intel_ring_advance(rq, cs);
4224 /* Restrict this request to run on a particular engine */
4225 rq->execution_mask = engine->mask;
4226 i915_request_add(rq);
4229 if (i915_request_wait(last, 0, HZ / 5) < 0) {
4234 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4240 for (n = 0; n < NUM_GPR_DW; n++) {
4242 pr_err("Incorrect value[%d] found for GPR[%d]\n",
4249 i915_gem_object_unpin_map(scratch->obj);
4252 if (igt_live_test_end(&t))
4254 i915_request_put(last);
4256 intel_context_unpin(ve);
4258 intel_context_put(ve);
4260 i915_vma_unpin_and_release(&scratch, 0);
4264 static int live_virtual_preserved(void *arg)
4266 struct intel_gt *gt = arg;
4267 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4271 * Check that the context image retains non-privileged (user) registers
4272 * from one engine to the next. For this we check that the CS_GPR
4276 if (intel_uc_uses_guc_submission(>->uc))
4279 /* As we use CS_GPR we cannot run before they existed on all engines. */
4280 if (INTEL_GEN(gt->i915) < 9)
4283 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4286 nsibling = select_siblings(gt, class, siblings);
4290 err = preserved_virtual_engine(gt, siblings, nsibling);
4298 static int bond_virtual_engine(struct intel_gt *gt,
4300 struct intel_engine_cs **siblings,
4301 unsigned int nsibling,
4303 #define BOND_SCHEDULE BIT(0)
4305 struct intel_engine_cs *master;
4306 struct i915_request *rq[16];
4307 enum intel_engine_id id;
4308 struct igt_spinner spin;
4313 * A set of bonded requests is intended to be run concurrently
4314 * across a number of engines. We use one request per-engine
4315 * and a magic fence to schedule each of the bonded requests
4316 * at the same time. A consequence of our current scheduler is that
4317 * we only move requests to the HW ready queue when the request
4318 * becomes ready, that is when all of its prerequisite fences have
4319 * been signaled. As one of those fences is the master submit fence,
4320 * there is a delay on all secondary fences as the HW may be
4321 * currently busy. Equally, as all the requests are independent,
4322 * they may have other fences that delay individual request
4323 * submission to HW. Ergo, we do not guarantee that all requests are
4324 * immediately submitted to HW at the same time, just that if the
4325 * rules are abided by, they are ready at the same time as the
4326 * first is submitted. Userspace can embed semaphores in its batch
4327 * to ensure parallel execution of its phases as it requires.
4328 * Though naturally it gets requested that perhaps the scheduler should
4329 * take care of parallel execution, even across preemption events on
4330 * different HW. (The proper answer is of course "lalalala".)
4332 * With the submit-fence, we have identified three possible phases
4333 * of synchronisation depending on the master fence: queued (not
4334 * ready), executing, and signaled. The first two are quite simple
4335 * and checked below. However, the signaled master fence handling is
4336 * contentious. Currently we do not distinguish between a signaled
4337 * fence and an expired fence, as once signaled it does not convey
4338 * any information about the previous execution. It may even be freed
4339 * and hence checking later it may not exist at all. Ergo we currently
4340 * do not apply the bonding constraint for an already signaled fence,
4341 * as our expectation is that it should not constrain the secondaries
4342 * and is outside of the scope of the bonded request API (i.e. all
4343 * userspace requests are meant to be running in parallel). As
4344 * it imposes no constraint, and is effectively a no-op, we do not
4345 * check below as normal execution flows are checked extensively above.
4347 * XXX Is the degenerate handling of signaled submit fences the
4348 * expected behaviour for userpace?
4351 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4353 if (igt_spinner_init(&spin, gt))
4357 rq[0] = ERR_PTR(-ENOMEM);
4358 for_each_engine(master, gt, id) {
4359 struct i915_sw_fence fence = {};
4360 struct intel_context *ce;
4362 if (master->class == class)
4365 ce = intel_context_create(master);
4371 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4373 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4374 intel_context_put(ce);
4375 if (IS_ERR(rq[0])) {
4376 err = PTR_ERR(rq[0]);
4379 i915_request_get(rq[0]);
4381 if (flags & BOND_SCHEDULE) {
4382 onstack_fence_init(&fence);
4383 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4388 i915_request_add(rq[0]);
4392 if (!(flags & BOND_SCHEDULE) &&
4393 !igt_wait_for_spinner(&spin, rq[0])) {
4398 for (n = 0; n < nsibling; n++) {
4399 struct intel_context *ve;
4401 ve = intel_execlists_create_virtual(siblings, nsibling);
4404 onstack_fence_fini(&fence);
4408 err = intel_virtual_engine_attach_bond(ve->engine,
4412 intel_context_put(ve);
4413 onstack_fence_fini(&fence);
4417 err = intel_context_pin(ve);
4418 intel_context_put(ve);
4420 onstack_fence_fini(&fence);
4424 rq[n + 1] = i915_request_create(ve);
4425 intel_context_unpin(ve);
4426 if (IS_ERR(rq[n + 1])) {
4427 err = PTR_ERR(rq[n + 1]);
4428 onstack_fence_fini(&fence);
4431 i915_request_get(rq[n + 1]);
4433 err = i915_request_await_execution(rq[n + 1],
4435 ve->engine->bond_execute);
4436 i915_request_add(rq[n + 1]);
4438 onstack_fence_fini(&fence);
4442 onstack_fence_fini(&fence);
4443 intel_engine_flush_submission(master);
4444 igt_spinner_end(&spin);
4446 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4447 pr_err("Master request did not execute (on %s)!\n",
4448 rq[0]->engine->name);
4453 for (n = 0; n < nsibling; n++) {
4454 if (i915_request_wait(rq[n + 1], 0,
4455 MAX_SCHEDULE_TIMEOUT) < 0) {
4460 if (rq[n + 1]->engine != siblings[n]) {
4461 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4463 rq[n + 1]->engine->name,
4464 rq[0]->engine->name);
4470 for (n = 0; !IS_ERR(rq[n]); n++)
4471 i915_request_put(rq[n]);
4472 rq[0] = ERR_PTR(-ENOMEM);
4476 for (n = 0; !IS_ERR(rq[n]); n++)
4477 i915_request_put(rq[n]);
4478 if (igt_flush_test(gt->i915))
4481 igt_spinner_fini(&spin);
4485 static int live_virtual_bond(void *arg)
4487 static const struct phase {
4492 { "schedule", BOND_SCHEDULE },
4495 struct intel_gt *gt = arg;
4496 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4500 if (intel_uc_uses_guc_submission(>->uc))
4503 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4504 const struct phase *p;
4507 nsibling = select_siblings(gt, class, siblings);
4511 for (p = phases; p->name; p++) {
4512 err = bond_virtual_engine(gt,
4513 class, siblings, nsibling,
4516 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4517 __func__, p->name, class, nsibling, err);
4526 static int reset_virtual_engine(struct intel_gt *gt,
4527 struct intel_engine_cs **siblings,
4528 unsigned int nsibling)
4530 struct intel_engine_cs *engine;
4531 struct intel_context *ve;
4532 struct igt_spinner spin;
4533 struct i915_request *rq;
4538 * In order to support offline error capture for fast preempt reset,
4539 * we need to decouple the guilty request and ensure that it and its
4540 * descendents are not executed while the capture is in progress.
4543 if (igt_spinner_init(&spin, gt))
4546 ve = intel_execlists_create_virtual(siblings, nsibling);
4552 for (n = 0; n < nsibling; n++)
4553 st_engine_heartbeat_disable(siblings[n]);
4555 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4560 i915_request_add(rq);
4562 if (!igt_wait_for_spinner(&spin, rq)) {
4563 intel_gt_set_wedged(gt);
4568 engine = rq->engine;
4569 GEM_BUG_ON(engine == ve->engine);
4571 /* Take ownership of the reset and tasklet */
4572 if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
4573 >->reset.flags)) {
4574 intel_gt_set_wedged(gt);
4578 tasklet_disable(&engine->execlists.tasklet);
4580 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
4581 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4583 /* Fake a preemption event; failed of course */
4584 spin_lock_irq(&engine->active.lock);
4585 __unwind_incomplete_requests(engine);
4586 spin_unlock_irq(&engine->active.lock);
4587 GEM_BUG_ON(rq->engine != ve->engine);
4589 /* Reset the engine while keeping our active request on hold */
4590 execlists_hold(engine, rq);
4591 GEM_BUG_ON(!i915_request_on_hold(rq));
4593 intel_engine_reset(engine, NULL);
4594 GEM_BUG_ON(rq->fence.error != -EIO);
4596 /* Release our grasp on the engine, letting CS flow again */
4597 tasklet_enable(&engine->execlists.tasklet);
4598 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags);
4600 /* Check that we do not resubmit the held request */
4601 i915_request_get(rq);
4602 if (!i915_request_wait(rq, 0, HZ / 5)) {
4603 pr_err("%s: on hold request completed!\n",
4605 intel_gt_set_wedged(gt);
4609 GEM_BUG_ON(!i915_request_on_hold(rq));
4611 /* But is resubmitted on release */
4612 execlists_unhold(engine, rq);
4613 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4614 pr_err("%s: held request did not complete!\n",
4616 intel_gt_set_wedged(gt);
4621 i915_request_put(rq);
4623 for (n = 0; n < nsibling; n++)
4624 st_engine_heartbeat_enable(siblings[n]);
4626 intel_context_put(ve);
4628 igt_spinner_fini(&spin);
4632 static int live_virtual_reset(void *arg)
4634 struct intel_gt *gt = arg;
4635 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4639 * Check that we handle a reset event within a virtual engine.
4640 * Only the physical engine is reset, but we have to check the flow
4641 * of the virtual requests around the reset, and make sure it is not
4645 if (intel_uc_uses_guc_submission(>->uc))
4648 if (!intel_has_reset_engine(gt))
4651 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4654 nsibling = select_siblings(gt, class, siblings);
4658 err = reset_virtual_engine(gt, siblings, nsibling);
4666 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4668 static const struct i915_subtest tests[] = {
4669 SUBTEST(live_sanitycheck),
4670 SUBTEST(live_unlite_switch),
4671 SUBTEST(live_unlite_preempt),
4672 SUBTEST(live_unlite_ring),
4673 SUBTEST(live_pin_rewind),
4674 SUBTEST(live_hold_reset),
4675 SUBTEST(live_error_interrupt),
4676 SUBTEST(live_timeslice_preempt),
4677 SUBTEST(live_timeslice_rewind),
4678 SUBTEST(live_timeslice_queue),
4679 SUBTEST(live_timeslice_nopreempt),
4680 SUBTEST(live_busywait_preempt),
4681 SUBTEST(live_preempt),
4682 SUBTEST(live_late_preempt),
4683 SUBTEST(live_nopreempt),
4684 SUBTEST(live_preempt_cancel),
4685 SUBTEST(live_suppress_self_preempt),
4686 SUBTEST(live_chain_preempt),
4687 SUBTEST(live_preempt_ring),
4688 SUBTEST(live_preempt_gang),
4689 SUBTEST(live_preempt_timeout),
4690 SUBTEST(live_preempt_user),
4691 SUBTEST(live_preempt_smoke),
4692 SUBTEST(live_virtual_engine),
4693 SUBTEST(live_virtual_mask),
4694 SUBTEST(live_virtual_preserved),
4695 SUBTEST(live_virtual_slice),
4696 SUBTEST(live_virtual_bond),
4697 SUBTEST(live_virtual_reset),
4700 if (!HAS_EXECLISTS(i915))
4703 if (intel_gt_is_wedged(&i915->gt))
4706 return intel_gt_live_subtests(tests, &i915->gt);
4709 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
4712 i915_ggtt_offset(ce->engine->status_page.vma) +
4713 offset_in_page(slot);
4714 struct i915_request *rq;
4717 rq = intel_context_create_request(ce);
4721 cs = intel_ring_begin(rq, 4);
4723 i915_request_add(rq);
4727 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
4732 intel_ring_advance(rq, cs);
4734 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4735 i915_request_add(rq);
4739 static int context_flush(struct intel_context *ce, long timeout)
4741 struct i915_request *rq;
4742 struct dma_fence *fence;
4745 rq = intel_engine_create_kernel_request(ce->engine);
4749 fence = i915_active_fence_get(&ce->timeline->last_request);
4751 i915_request_await_dma_fence(rq, fence);
4752 dma_fence_put(fence);
4755 rq = i915_request_get(rq);
4756 i915_request_add(rq);
4757 if (i915_request_wait(rq, 0, timeout) < 0)
4759 i915_request_put(rq);
4761 rmb(); /* We know the request is written, make sure all state is too! */
4765 static int live_lrc_layout(void *arg)
4767 struct intel_gt *gt = arg;
4768 struct intel_engine_cs *engine;
4769 enum intel_engine_id id;
4774 * Check the registers offsets we use to create the initial reg state
4775 * match the layout saved by HW.
4778 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
4783 for_each_engine(engine, gt, id) {
4787 if (!engine->default_state)
4790 hw = shmem_pin_map(engine->default_state);
4795 hw += LRC_STATE_OFFSET / sizeof(*hw);
4797 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
4798 engine->kernel_context,
4800 engine->kernel_context->ring,
4813 pr_debug("%s: skipped instruction %x at dword %d\n",
4814 engine->name, lri, dw);
4819 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4820 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4821 engine->name, dw, lri);
4826 if (lrc[dw] != lri) {
4827 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4828 engine->name, dw, lri, lrc[dw]);
4838 if (hw[dw] != lrc[dw]) {
4839 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4840 engine->name, dw, hw[dw], lrc[dw]);
4846 * Skip over the actual register value as we
4847 * expect that to differ.
4852 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
4855 pr_info("%s: HW register image:\n", engine->name);
4856 igt_hexdump(hw, PAGE_SIZE);
4858 pr_info("%s: SW register image:\n", engine->name);
4859 igt_hexdump(lrc, PAGE_SIZE);
4862 shmem_unpin_map(engine->default_state, hw);
4871 static int find_offset(const u32 *lri, u32 offset)
4875 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
4876 if (lri[i] == offset)
4882 static int live_lrc_fixed(void *arg)
4884 struct intel_gt *gt = arg;
4885 struct intel_engine_cs *engine;
4886 enum intel_engine_id id;
4890 * Check the assumed register offsets match the actual locations in
4891 * the context image.
4894 for_each_engine(engine, gt, id) {
4901 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
4906 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
4911 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
4916 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
4921 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
4922 lrc_ring_mi_mode(engine),
4926 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
4931 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
4932 lrc_ring_wa_bb_per_ctx(engine),
4933 "RING_BB_PER_CTX_PTR"
4936 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
4937 lrc_ring_indirect_ptr(engine),
4938 "RING_INDIRECT_CTX_PTR"
4941 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
4942 lrc_ring_indirect_offset(engine),
4943 "RING_INDIRECT_CTX_OFFSET"
4946 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
4948 "RING_CTX_TIMESTAMP"
4951 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
4952 lrc_ring_gpr0(engine),
4956 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
4957 lrc_ring_cmd_buf_cctl(engine),
4964 if (!engine->default_state)
4967 hw = shmem_pin_map(engine->default_state);
4972 hw += LRC_STATE_OFFSET / sizeof(*hw);
4974 for (t = tbl; t->name; t++) {
4975 int dw = find_offset(hw, t->reg);
4977 if (dw != t->offset) {
4978 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4988 shmem_unpin_map(engine->default_state, hw);
4994 static int __live_lrc_state(struct intel_engine_cs *engine,
4995 struct i915_vma *scratch)
4997 struct intel_context *ce;
4998 struct i915_request *rq;
4999 struct i915_gem_ww_ctx ww;
5005 u32 expected[MAX_IDX];
5010 ce = intel_context_create(engine);
5014 i915_gem_ww_ctx_init(&ww, false);
5016 err = i915_gem_object_lock(scratch->obj, &ww);
5018 err = intel_context_pin_ww(ce, &ww);
5022 rq = i915_request_create(ce);
5028 cs = intel_ring_begin(rq, 4 * MAX_IDX);
5031 i915_request_add(rq);
5035 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5036 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
5037 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
5040 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
5042 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5043 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
5044 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
5047 err = i915_request_await_object(rq, scratch->obj, true);
5049 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
5051 i915_request_get(rq);
5052 i915_request_add(rq);
5056 intel_engine_flush_submission(engine);
5057 expected[RING_TAIL_IDX] = ce->ring->tail;
5059 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
5064 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
5070 for (n = 0; n < MAX_IDX; n++) {
5071 if (cs[n] != expected[n]) {
5072 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
5073 engine->name, n, cs[n], expected[n]);
5079 i915_gem_object_unpin_map(scratch->obj);
5082 i915_request_put(rq);
5084 intel_context_unpin(ce);
5086 if (err == -EDEADLK) {
5087 err = i915_gem_ww_ctx_backoff(&ww);
5091 i915_gem_ww_ctx_fini(&ww);
5092 intel_context_put(ce);
5096 static int live_lrc_state(void *arg)
5098 struct intel_gt *gt = arg;
5099 struct intel_engine_cs *engine;
5100 struct i915_vma *scratch;
5101 enum intel_engine_id id;
5105 * Check the live register state matches what we expect for this
5109 scratch = create_scratch(gt);
5110 if (IS_ERR(scratch))
5111 return PTR_ERR(scratch);
5113 for_each_engine(engine, gt, id) {
5114 err = __live_lrc_state(engine, scratch);
5119 if (igt_flush_test(gt->i915))
5122 i915_vma_unpin_and_release(&scratch, 0);
5126 static int gpr_make_dirty(struct intel_context *ce)
5128 struct i915_request *rq;
5132 rq = intel_context_create_request(ce);
5136 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
5138 i915_request_add(rq);
5142 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
5143 for (n = 0; n < NUM_GPR_DW; n++) {
5144 *cs++ = CS_GPR(ce->engine, n);
5145 *cs++ = STACK_MAGIC;
5149 intel_ring_advance(rq, cs);
5151 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5152 i915_request_add(rq);
5157 static struct i915_request *
5158 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
5161 i915_ggtt_offset(ce->engine->status_page.vma) +
5162 offset_in_page(slot);
5163 struct i915_request *rq;
5168 rq = intel_context_create_request(ce);
5172 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
5174 i915_request_add(rq);
5175 return ERR_CAST(cs);
5178 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5181 *cs++ = MI_SEMAPHORE_WAIT |
5182 MI_SEMAPHORE_GLOBAL_GTT |
5184 MI_SEMAPHORE_SAD_NEQ_SDD;
5189 for (n = 0; n < NUM_GPR_DW; n++) {
5190 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5191 *cs++ = CS_GPR(ce->engine, n);
5192 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
5196 i915_vma_lock(scratch);
5197 err = i915_request_await_object(rq, scratch->obj, true);
5199 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
5200 i915_vma_unlock(scratch);
5202 i915_request_get(rq);
5203 i915_request_add(rq);
5205 i915_request_put(rq);
5212 static int __live_lrc_gpr(struct intel_engine_cs *engine,
5213 struct i915_vma *scratch,
5216 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
5217 struct intel_context *ce;
5218 struct i915_request *rq;
5223 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
5224 return 0; /* GPR only on rcs0 for gen8 */
5226 err = gpr_make_dirty(engine->kernel_context);
5230 ce = intel_context_create(engine);
5234 rq = __gpr_read(ce, scratch, slot);
5240 err = wait_for_submit(engine, rq, HZ / 2);
5245 err = gpr_make_dirty(engine->kernel_context);
5249 err = emit_semaphore_signal(engine->kernel_context, slot);
5257 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
5262 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
5268 for (n = 0; n < NUM_GPR_DW; n++) {
5270 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
5272 n / 2, n & 1 ? "udw" : "ldw",
5279 i915_gem_object_unpin_map(scratch->obj);
5282 memset32(&slot[0], -1, 4);
5284 i915_request_put(rq);
5286 intel_context_put(ce);
5290 static int live_lrc_gpr(void *arg)
5292 struct intel_gt *gt = arg;
5293 struct intel_engine_cs *engine;
5294 struct i915_vma *scratch;
5295 enum intel_engine_id id;
5299 * Check that GPR registers are cleared in new contexts as we need
5300 * to avoid leaking any information from previous contexts.
5303 scratch = create_scratch(gt);
5304 if (IS_ERR(scratch))
5305 return PTR_ERR(scratch);
5307 for_each_engine(engine, gt, id) {
5308 st_engine_heartbeat_disable(engine);
5310 err = __live_lrc_gpr(engine, scratch, false);
5314 err = __live_lrc_gpr(engine, scratch, true);
5319 st_engine_heartbeat_enable(engine);
5320 if (igt_flush_test(gt->i915))
5326 i915_vma_unpin_and_release(&scratch, 0);
5330 static struct i915_request *
5331 create_timestamp(struct intel_context *ce, void *slot, int idx)
5334 i915_ggtt_offset(ce->engine->status_page.vma) +
5335 offset_in_page(slot);
5336 struct i915_request *rq;
5340 rq = intel_context_create_request(ce);
5344 cs = intel_ring_begin(rq, 10);
5350 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5353 *cs++ = MI_SEMAPHORE_WAIT |
5354 MI_SEMAPHORE_GLOBAL_GTT |
5356 MI_SEMAPHORE_SAD_NEQ_SDD;
5361 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
5362 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
5363 *cs++ = offset + idx * sizeof(u32);
5366 intel_ring_advance(rq, cs);
5368 rq->sched.attr.priority = I915_PRIORITY_MASK;
5371 i915_request_get(rq);
5372 i915_request_add(rq);
5374 i915_request_put(rq);
5375 return ERR_PTR(err);
5381 struct lrc_timestamp {
5382 struct intel_engine_cs *engine;
5383 struct intel_context *ce[2];
5387 static bool timestamp_advanced(u32 start, u32 end)
5389 return (s32)(end - start) > 0;
5392 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
5394 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
5395 struct i915_request *rq;
5399 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
5400 rq = create_timestamp(arg->ce[0], slot, 1);
5404 err = wait_for_submit(rq->engine, rq, HZ / 2);
5409 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
5410 err = emit_semaphore_signal(arg->ce[1], slot);
5418 /* And wait for switch to kernel (to save our context to memory) */
5419 err = context_flush(arg->ce[0], HZ / 2);
5423 if (!timestamp_advanced(arg->poison, slot[1])) {
5424 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
5425 arg->engine->name, preempt ? "preempt" : "simple",
5426 arg->poison, slot[1]);
5430 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
5431 if (!timestamp_advanced(slot[1], timestamp)) {
5432 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
5433 arg->engine->name, preempt ? "preempt" : "simple",
5434 slot[1], timestamp);
5439 memset32(slot, -1, 4);
5440 i915_request_put(rq);
5444 static int live_lrc_timestamp(void *arg)
5446 struct lrc_timestamp data = {};
5447 struct intel_gt *gt = arg;
5448 enum intel_engine_id id;
5449 const u32 poison[] = {
5457 * We want to verify that the timestamp is saved and restore across
5458 * context switches and is monotonic.
5460 * So we do this with a little bit of LRC poisoning to check various
5461 * boundary conditions, and see what happens if we preempt the context
5462 * with a second request (carrying more poison into the timestamp).
5465 for_each_engine(data.engine, gt, id) {
5468 st_engine_heartbeat_disable(data.engine);
5470 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5471 struct intel_context *tmp;
5473 tmp = intel_context_create(data.engine);
5479 err = intel_context_pin(tmp);
5481 intel_context_put(tmp);
5488 for (i = 0; i < ARRAY_SIZE(poison); i++) {
5489 data.poison = poison[i];
5491 err = __lrc_timestamp(&data, false);
5495 err = __lrc_timestamp(&data, true);
5501 st_engine_heartbeat_enable(data.engine);
5502 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
5506 intel_context_unpin(data.ce[i]);
5507 intel_context_put(data.ce[i]);
5510 if (igt_flush_test(gt->i915))
5519 static struct i915_vma *
5520 create_user_vma(struct i915_address_space *vm, unsigned long size)
5522 struct drm_i915_gem_object *obj;
5523 struct i915_vma *vma;
5526 obj = i915_gem_object_create_internal(vm->i915, size);
5528 return ERR_CAST(obj);
5530 vma = i915_vma_instance(obj, vm, NULL);
5532 i915_gem_object_put(obj);
5536 err = i915_vma_pin(vma, 0, 0, PIN_USER);
5538 i915_gem_object_put(obj);
5539 return ERR_PTR(err);
5545 static struct i915_vma *
5546 store_context(struct intel_context *ce, struct i915_vma *scratch)
5548 struct i915_vma *batch;
5549 u32 dw, x, *cs, *hw;
5552 batch = create_user_vma(ce->vm, SZ_64K);
5556 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5558 i915_vma_put(batch);
5559 return ERR_CAST(cs);
5562 defaults = shmem_pin_map(ce->engine->default_state);
5564 i915_gem_object_unpin_map(batch->obj);
5565 i915_vma_put(batch);
5566 return ERR_PTR(-ENOMEM);
5572 hw += LRC_STATE_OFFSET / sizeof(*hw);
5574 u32 len = hw[dw] & 0x7f;
5581 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5587 len = (len + 1) / 2;
5589 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
5591 *cs++ = lower_32_bits(scratch->node.start + x);
5592 *cs++ = upper_32_bits(scratch->node.start + x);
5597 } while (dw < PAGE_SIZE / sizeof(u32) &&
5598 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5600 *cs++ = MI_BATCH_BUFFER_END;
5602 shmem_unpin_map(ce->engine->default_state, defaults);
5604 i915_gem_object_flush_map(batch->obj);
5605 i915_gem_object_unpin_map(batch->obj);
5610 static int move_to_active(struct i915_request *rq,
5611 struct i915_vma *vma,
5617 err = i915_request_await_object(rq, vma->obj, flags);
5619 err = i915_vma_move_to_active(vma, rq, flags);
5620 i915_vma_unlock(vma);
5625 static struct i915_request *
5626 record_registers(struct intel_context *ce,
5627 struct i915_vma *before,
5628 struct i915_vma *after,
5631 struct i915_vma *b_before, *b_after;
5632 struct i915_request *rq;
5636 b_before = store_context(ce, before);
5637 if (IS_ERR(b_before))
5638 return ERR_CAST(b_before);
5640 b_after = store_context(ce, after);
5641 if (IS_ERR(b_after)) {
5642 rq = ERR_CAST(b_after);
5646 rq = intel_context_create_request(ce);
5650 err = move_to_active(rq, before, EXEC_OBJECT_WRITE);
5654 err = move_to_active(rq, b_before, 0);
5658 err = move_to_active(rq, after, EXEC_OBJECT_WRITE);
5662 err = move_to_active(rq, b_after, 0);
5666 cs = intel_ring_begin(rq, 14);
5672 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5673 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5674 *cs++ = lower_32_bits(b_before->node.start);
5675 *cs++ = upper_32_bits(b_before->node.start);
5677 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
5678 *cs++ = MI_SEMAPHORE_WAIT |
5679 MI_SEMAPHORE_GLOBAL_GTT |
5681 MI_SEMAPHORE_SAD_NEQ_SDD;
5683 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5684 offset_in_page(sema);
5688 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5689 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5690 *cs++ = lower_32_bits(b_after->node.start);
5691 *cs++ = upper_32_bits(b_after->node.start);
5693 intel_ring_advance(rq, cs);
5695 WRITE_ONCE(*sema, 0);
5696 i915_request_get(rq);
5697 i915_request_add(rq);
5699 i915_vma_put(b_after);
5701 i915_vma_put(b_before);
5705 i915_request_add(rq);
5710 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
5712 struct i915_vma *batch;
5716 batch = create_user_vma(ce->vm, SZ_64K);
5720 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
5722 i915_vma_put(batch);
5723 return ERR_CAST(cs);
5726 defaults = shmem_pin_map(ce->engine->default_state);
5728 i915_gem_object_unpin_map(batch->obj);
5729 i915_vma_put(batch);
5730 return ERR_PTR(-ENOMEM);
5735 hw += LRC_STATE_OFFSET / sizeof(*hw);
5737 u32 len = hw[dw] & 0x7f;
5744 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5750 len = (len + 1) / 2;
5751 *cs++ = MI_LOAD_REGISTER_IMM(len);
5757 } while (dw < PAGE_SIZE / sizeof(u32) &&
5758 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5760 *cs++ = MI_BATCH_BUFFER_END;
5762 shmem_unpin_map(ce->engine->default_state, defaults);
5764 i915_gem_object_flush_map(batch->obj);
5765 i915_gem_object_unpin_map(batch->obj);
5770 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
5772 struct i915_request *rq;
5773 struct i915_vma *batch;
5777 batch = load_context(ce, poison);
5779 return PTR_ERR(batch);
5781 rq = intel_context_create_request(ce);
5787 err = move_to_active(rq, batch, 0);
5791 cs = intel_ring_begin(rq, 8);
5797 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
5798 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
5799 *cs++ = lower_32_bits(batch->node.start);
5800 *cs++ = upper_32_bits(batch->node.start);
5802 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
5803 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
5804 offset_in_page(sema);
5808 intel_ring_advance(rq, cs);
5810 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
5812 i915_request_add(rq);
5814 i915_vma_put(batch);
5818 static bool is_moving(u32 a, u32 b)
5823 static int compare_isolation(struct intel_engine_cs *engine,
5824 struct i915_vma *ref[2],
5825 struct i915_vma *result[2],
5826 struct intel_context *ce,
5829 u32 x, dw, *hw, *lrc;
5834 A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC);
5836 return PTR_ERR(A[0]);
5838 A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC);
5840 err = PTR_ERR(A[1]);
5844 B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC);
5846 err = PTR_ERR(B[0]);
5850 B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC);
5852 err = PTR_ERR(B[1]);
5856 lrc = i915_gem_object_pin_map(ce->state->obj,
5857 i915_coherent_map_type(engine->i915));
5862 lrc += LRC_STATE_OFFSET / sizeof(*hw);
5864 defaults = shmem_pin_map(ce->engine->default_state);
5873 hw += LRC_STATE_OFFSET / sizeof(*hw);
5875 u32 len = hw[dw] & 0x7f;
5882 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
5888 len = (len + 1) / 2;
5890 if (!is_moving(A[0][x], A[1][x]) &&
5891 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
5892 switch (hw[dw] & 4095) {
5893 case 0x30: /* RING_HEAD */
5894 case 0x34: /* RING_TAIL */
5898 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
5901 A[0][x], B[0][x], B[1][x],
5902 poison, lrc[dw + 1]);
5909 } while (dw < PAGE_SIZE / sizeof(u32) &&
5910 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
5912 shmem_unpin_map(ce->engine->default_state, defaults);
5914 i915_gem_object_unpin_map(ce->state->obj);
5916 i915_gem_object_unpin_map(result[1]->obj);
5918 i915_gem_object_unpin_map(result[0]->obj);
5920 i915_gem_object_unpin_map(ref[1]->obj);
5922 i915_gem_object_unpin_map(ref[0]->obj);
5926 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
5928 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
5929 struct i915_vma *ref[2], *result[2];
5930 struct intel_context *A, *B;
5931 struct i915_request *rq;
5934 A = intel_context_create(engine);
5938 B = intel_context_create(engine);
5944 ref[0] = create_user_vma(A->vm, SZ_64K);
5945 if (IS_ERR(ref[0])) {
5946 err = PTR_ERR(ref[0]);
5950 ref[1] = create_user_vma(A->vm, SZ_64K);
5951 if (IS_ERR(ref[1])) {
5952 err = PTR_ERR(ref[1]);
5956 rq = record_registers(A, ref[0], ref[1], sema);
5962 WRITE_ONCE(*sema, 1);
5965 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5966 i915_request_put(rq);
5970 i915_request_put(rq);
5972 result[0] = create_user_vma(A->vm, SZ_64K);
5973 if (IS_ERR(result[0])) {
5974 err = PTR_ERR(result[0]);
5978 result[1] = create_user_vma(A->vm, SZ_64K);
5979 if (IS_ERR(result[1])) {
5980 err = PTR_ERR(result[1]);
5984 rq = record_registers(A, result[0], result[1], sema);
5990 err = poison_registers(B, poison, sema);
5992 WRITE_ONCE(*sema, -1);
5993 i915_request_put(rq);
5997 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
5998 i915_request_put(rq);
6002 i915_request_put(rq);
6004 err = compare_isolation(engine, ref, result, A, poison);
6007 i915_vma_put(result[1]);
6009 i915_vma_put(result[0]);
6011 i915_vma_put(ref[1]);
6013 i915_vma_put(ref[0]);
6015 intel_context_put(B);
6017 intel_context_put(A);
6021 static bool skip_isolation(const struct intel_engine_cs *engine)
6023 if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9)
6026 if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11)
6032 static int live_lrc_isolation(void *arg)
6034 struct intel_gt *gt = arg;
6035 struct intel_engine_cs *engine;
6036 enum intel_engine_id id;
6037 const u32 poison[] = {
6047 * Our goal is try and verify that per-context state cannot be
6048 * tampered with by another non-privileged client.
6050 * We take the list of context registers from the LRI in the default
6051 * context image and attempt to modify that list from a remote context.
6054 for_each_engine(engine, gt, id) {
6057 /* Just don't even ask */
6058 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
6059 skip_isolation(engine))
6062 intel_engine_pm_get(engine);
6063 for (i = 0; i < ARRAY_SIZE(poison); i++) {
6066 result = __lrc_isolation(engine, poison[i]);
6070 result = __lrc_isolation(engine, ~poison[i]);
6074 intel_engine_pm_put(engine);
6075 if (igt_flush_test(gt->i915)) {
6084 static int indirect_ctx_submit_req(struct intel_context *ce)
6086 struct i915_request *rq;
6089 rq = intel_context_create_request(ce);
6093 i915_request_get(rq);
6094 i915_request_add(rq);
6096 if (i915_request_wait(rq, 0, HZ / 5) < 0)
6099 i915_request_put(rq);
6104 #define CTX_BB_CANARY_OFFSET (3 * 1024)
6105 #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32))
6108 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
6110 *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
6111 MI_SRM_LRM_GLOBAL_GTT |
6113 *cs++ = i915_mmio_reg_offset(RING_START(0));
6114 *cs++ = i915_ggtt_offset(ce->state) +
6115 context_wa_bb_offset(ce) +
6116 CTX_BB_CANARY_OFFSET;
6123 indirect_ctx_bb_setup(struct intel_context *ce)
6125 u32 *cs = context_indirect_bb(ce);
6127 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
6129 setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
6132 static bool check_ring_start(struct intel_context *ce)
6134 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
6135 LRC_STATE_OFFSET + context_wa_bb_offset(ce);
6137 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
6140 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
6141 ctx_bb[CTX_BB_CANARY_INDEX],
6142 ce->lrc_reg_state[CTX_RING_START]);
6147 static int indirect_ctx_bb_check(struct intel_context *ce)
6151 err = indirect_ctx_submit_req(ce);
6155 if (!check_ring_start(ce))
6161 static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
6163 struct intel_context *a, *b;
6166 a = intel_context_create(engine);
6169 err = intel_context_pin(a);
6173 b = intel_context_create(engine);
6178 err = intel_context_pin(b);
6182 /* We use the already reserved extra page in context state */
6183 if (!a->wa_bb_page) {
6184 GEM_BUG_ON(b->wa_bb_page);
6185 GEM_BUG_ON(INTEL_GEN(engine->i915) == 12);
6190 * In order to test that our per context bb is truly per context,
6191 * and executes at the intended spot on context restoring process,
6192 * make the batch store the ring start value to memory.
6193 * As ring start is restored apriori of starting the indirect ctx bb and
6194 * as it will be different for each context, it fits to this purpose.
6196 indirect_ctx_bb_setup(a);
6197 indirect_ctx_bb_setup(b);
6199 err = indirect_ctx_bb_check(a);
6203 err = indirect_ctx_bb_check(b);
6206 intel_context_unpin(b);
6208 intel_context_put(b);
6210 intel_context_unpin(a);
6212 intel_context_put(a);
6217 static int live_lrc_indirect_ctx_bb(void *arg)
6219 struct intel_gt *gt = arg;
6220 struct intel_engine_cs *engine;
6221 enum intel_engine_id id;
6224 for_each_engine(engine, gt, id) {
6225 intel_engine_pm_get(engine);
6226 err = __live_lrc_indirect_ctx_bb(engine);
6227 intel_engine_pm_put(engine);
6229 if (igt_flush_test(gt->i915))
6239 static void garbage_reset(struct intel_engine_cs *engine,
6240 struct i915_request *rq)
6242 const unsigned int bit = I915_RESET_ENGINE + engine->id;
6243 unsigned long *lock = &engine->gt->reset.flags;
6245 if (test_and_set_bit(bit, lock))
6248 tasklet_disable(&engine->execlists.tasklet);
6250 if (!rq->fence.error)
6251 intel_engine_reset(engine, NULL);
6253 tasklet_enable(&engine->execlists.tasklet);
6254 clear_and_wake_up_bit(bit, lock);
6257 static struct i915_request *garbage(struct intel_context *ce,
6258 struct rnd_state *prng)
6260 struct i915_request *rq;
6263 err = intel_context_pin(ce);
6265 return ERR_PTR(err);
6267 prandom_bytes_state(prng,
6269 ce->engine->context_size -
6272 rq = intel_context_create_request(ce);
6278 i915_request_get(rq);
6279 i915_request_add(rq);
6283 intel_context_unpin(ce);
6284 return ERR_PTR(err);
6287 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
6289 struct intel_context *ce;
6290 struct i915_request *hang;
6293 ce = intel_context_create(engine);
6297 hang = garbage(ce, prng);
6299 err = PTR_ERR(hang);
6303 if (wait_for_submit(engine, hang, HZ / 2)) {
6304 i915_request_put(hang);
6309 intel_context_set_banned(ce);
6310 garbage_reset(engine, hang);
6312 intel_engine_flush_submission(engine);
6313 if (!hang->fence.error) {
6314 i915_request_put(hang);
6315 pr_err("%s: corrupted context was not reset\n",
6321 if (i915_request_wait(hang, 0, HZ / 2) < 0) {
6322 pr_err("%s: corrupted context did not recover\n",
6324 i915_request_put(hang);
6328 i915_request_put(hang);
6331 intel_context_put(ce);
6335 static int live_lrc_garbage(void *arg)
6337 struct intel_gt *gt = arg;
6338 struct intel_engine_cs *engine;
6339 enum intel_engine_id id;
6342 * Verify that we can recover if one context state is completely
6346 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
6349 for_each_engine(engine, gt, id) {
6350 I915_RND_STATE(prng);
6353 if (!intel_has_reset_engine(engine->gt))
6356 intel_engine_pm_get(engine);
6357 for (i = 0; i < 3; i++) {
6358 err = __lrc_garbage(engine, &prng);
6362 intel_engine_pm_put(engine);
6364 if (igt_flush_test(gt->i915))
6373 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
6375 struct intel_context *ce;
6376 struct i915_request *rq;
6377 IGT_TIMEOUT(end_time);
6380 ce = intel_context_create(engine);
6384 ce->runtime.num_underflow = 0;
6385 ce->runtime.max_underflow = 0;
6388 unsigned int loop = 1024;
6391 rq = intel_context_create_request(ce);
6398 i915_request_get(rq);
6400 i915_request_add(rq);
6403 if (__igt_timeout(end_time, NULL))
6406 i915_request_put(rq);
6409 err = i915_request_wait(rq, 0, HZ / 5);
6411 pr_err("%s: request not completed!\n", engine->name);
6415 igt_flush_test(engine->i915);
6417 pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
6419 intel_context_get_total_runtime_ns(ce),
6420 intel_context_get_avg_runtime_ns(ce));
6423 if (ce->runtime.num_underflow) {
6424 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
6426 ce->runtime.num_underflow,
6427 ce->runtime.max_underflow);
6433 i915_request_put(rq);
6435 intel_context_put(ce);
6439 static int live_pphwsp_runtime(void *arg)
6441 struct intel_gt *gt = arg;
6442 struct intel_engine_cs *engine;
6443 enum intel_engine_id id;
6447 * Check that cumulative context runtime as stored in the pphwsp[16]
6451 for_each_engine(engine, gt, id) {
6452 err = __live_pphwsp_runtime(engine);
6457 if (igt_flush_test(gt->i915))
6463 int intel_lrc_live_selftests(struct drm_i915_private *i915)
6465 static const struct i915_subtest tests[] = {
6466 SUBTEST(live_lrc_layout),
6467 SUBTEST(live_lrc_fixed),
6468 SUBTEST(live_lrc_state),
6469 SUBTEST(live_lrc_gpr),
6470 SUBTEST(live_lrc_isolation),
6471 SUBTEST(live_lrc_timestamp),
6472 SUBTEST(live_lrc_garbage),
6473 SUBTEST(live_pphwsp_runtime),
6474 SUBTEST(live_lrc_indirect_ctx_bb),
6477 if (!HAS_LOGICAL_RING_CONTEXTS(i915))
6480 return intel_gt_live_subtests(tests, &i915->gt);