drivers/gpu/drm/i915/selftests/intel_hangcheck.c

   1 /*
   2  * Copyright © 2016 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include <linux/kthread.h>
  26
  27 #include "../i915_selftest.h"
  28 #include "i915_random.h"
  29 #include "igt_flush_test.h"
  30 #include "igt_wedge_me.h"
  31
  32 #include "mock_context.h"
  33 #include "mock_drm.h"
  34
  35 #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */
  36
  37 struct hang {
  38         struct drm_i915_private *i915;
  39         struct drm_i915_gem_object *hws;
  40         struct drm_i915_gem_object *obj;
  41         struct i915_gem_context *ctx;
  42         u32 *seqno;
  43         u32 *batch;
  44 };
  45
  46 static int hang_init(struct hang *h, struct drm_i915_private *i915)
  47 {
  48         void *vaddr;
  49         int err;
  50
  51         memset(h, 0, sizeof(*h));
  52         h->i915 = i915;
  53
  54         h->ctx = kernel_context(i915);
  55         if (IS_ERR(h->ctx))
  56                 return PTR_ERR(h->ctx);
  57
  58         h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE);
  59         if (IS_ERR(h->hws)) {
  60                 err = PTR_ERR(h->hws);
  61                 goto err_ctx;
  62         }
  63
  64         h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
  65         if (IS_ERR(h->obj)) {
  66                 err = PTR_ERR(h->obj);
  67                 goto err_hws;
  68         }
  69
  70         i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC);
  71         vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB);
  72         if (IS_ERR(vaddr)) {
  73                 err = PTR_ERR(vaddr);
  74                 goto err_obj;
  75         }
  76         h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
  77
  78         vaddr = i915_gem_object_pin_map(h->obj,
  79                                         HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC);
  80         if (IS_ERR(vaddr)) {
  81                 err = PTR_ERR(vaddr);
  82                 goto err_unpin_hws;
  83         }
  84         h->batch = vaddr;
  85
  86         return 0;
  87
  88 err_unpin_hws:
  89         i915_gem_object_unpin_map(h->hws);
  90 err_obj:
  91         i915_gem_object_put(h->obj);
  92 err_hws:
  93         i915_gem_object_put(h->hws);
  94 err_ctx:
  95         kernel_context_close(h->ctx);
  96         return err;
  97 }
  98
  99 static u64 hws_address(const struct i915_vma *hws,
 100                        const struct i915_request *rq)
 101 {
 102         return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context);
 103 }
 104
 105 static int emit_recurse_batch(struct hang *h,
 106                               struct i915_request *rq)
 107 {
 108         struct drm_i915_private *i915 = h->i915;
 109         struct i915_address_space *vm =
 110                 rq->gem_context->ppgtt ?
 111                 &rq->gem_context->ppgtt->vm :
 112                 &i915->ggtt.vm;
 113         struct i915_vma *hws, *vma;
 114         unsigned int flags;
 115         u32 *batch;
 116         int err;
 117
 118         vma = i915_vma_instance(h->obj, vm, NULL);
 119         if (IS_ERR(vma))
 120                 return PTR_ERR(vma);
 121
 122         hws = i915_vma_instance(h->hws, vm, NULL);
 123         if (IS_ERR(hws))
 124                 return PTR_ERR(hws);
 125
 126         err = i915_vma_pin(vma, 0, 0, PIN_USER);
 127         if (err)
 128                 return err;
 129
 130         err = i915_vma_pin(hws, 0, 0, PIN_USER);
 131         if (err)
 132                 goto unpin_vma;
 133
 134         err = i915_vma_move_to_active(vma, rq, 0);
 135         if (err)
 136                 goto unpin_hws;
 137
 138         if (!i915_gem_object_has_active_reference(vma->obj)) {
 139                 i915_gem_object_get(vma->obj);
 140                 i915_gem_object_set_active_reference(vma->obj);
 141         }
 142
 143         err = i915_vma_move_to_active(hws, rq, 0);
 144         if (err)
 145                 goto unpin_hws;
 146
 147         if (!i915_gem_object_has_active_reference(hws->obj)) {
 148                 i915_gem_object_get(hws->obj);
 149                 i915_gem_object_set_active_reference(hws->obj);
 150         }
 151
 152         batch = h->batch;
 153         if (INTEL_GEN(i915) >= 8) {
 154                 *batch++ = MI_STORE_DWORD_IMM_GEN4;
 155                 *batch++ = lower_32_bits(hws_address(hws, rq));
 156                 *batch++ = upper_32_bits(hws_address(hws, rq));
 157                 *batch++ = rq->fence.seqno;
 158                 *batch++ = MI_ARB_CHECK;
 159
 160                 memset(batch, 0, 1024);
 161                 batch += 1024 / sizeof(*batch);
 162
 163                 *batch++ = MI_ARB_CHECK;
 164                 *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
 165                 *batch++ = lower_32_bits(vma->node.start);
 166                 *batch++ = upper_32_bits(vma->node.start);
 167         } else if (INTEL_GEN(i915) >= 6) {
 168                 *batch++ = MI_STORE_DWORD_IMM_GEN4;
 169                 *batch++ = 0;
 170                 *batch++ = lower_32_bits(hws_address(hws, rq));
 171                 *batch++ = rq->fence.seqno;
 172                 *batch++ = MI_ARB_CHECK;
 173
 174                 memset(batch, 0, 1024);
 175                 batch += 1024 / sizeof(*batch);
 176
 177                 *batch++ = MI_ARB_CHECK;
 178                 *batch++ = MI_BATCH_BUFFER_START | 1 << 8;
 179                 *batch++ = lower_32_bits(vma->node.start);
 180         } else if (INTEL_GEN(i915) >= 4) {
 181                 *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 182                 *batch++ = 0;
 183                 *batch++ = lower_32_bits(hws_address(hws, rq));
 184                 *batch++ = rq->fence.seqno;
 185                 *batch++ = MI_ARB_CHECK;
 186
 187                 memset(batch, 0, 1024);
 188                 batch += 1024 / sizeof(*batch);
 189
 190                 *batch++ = MI_ARB_CHECK;
 191                 *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
 192                 *batch++ = lower_32_bits(vma->node.start);
 193         } else {
 194                 *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
 195                 *batch++ = lower_32_bits(hws_address(hws, rq));
 196                 *batch++ = rq->fence.seqno;
 197                 *batch++ = MI_ARB_CHECK;
 198
 199                 memset(batch, 0, 1024);
 200                 batch += 1024 / sizeof(*batch);
 201
 202                 *batch++ = MI_ARB_CHECK;
 203                 *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
 204                 *batch++ = lower_32_bits(vma->node.start);
 205         }
 206         *batch++ = MI_BATCH_BUFFER_END; /* not reached */
 207         i915_gem_chipset_flush(h->i915);
 208
 209         flags = 0;
 210         if (INTEL_GEN(vm->i915) <= 5)
 211                 flags |= I915_DISPATCH_SECURE;
 212
 213         err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
 214
 215 unpin_hws:
 216         i915_vma_unpin(hws);
 217 unpin_vma:
 218         i915_vma_unpin(vma);
 219         return err;
 220 }
 221
 222 static struct i915_request *
 223 hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 224 {
 225         struct i915_request *rq;
 226         int err;
 227
 228         if (i915_gem_object_is_active(h->obj)) {
 229                 struct drm_i915_gem_object *obj;
 230                 void *vaddr;
 231
 232                 obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE);
 233                 if (IS_ERR(obj))
 234                         return ERR_CAST(obj);
 235
 236                 vaddr = i915_gem_object_pin_map(obj,
 237                                                 HAS_LLC(h->i915) ? I915_MAP_WB : I915_MAP_WC);
 238                 if (IS_ERR(vaddr)) {
 239                         i915_gem_object_put(obj);
 240                         return ERR_CAST(vaddr);
 241                 }
 242
 243                 i915_gem_object_unpin_map(h->obj);
 244                 i915_gem_object_put(h->obj);
 245
 246                 h->obj = obj;
 247                 h->batch = vaddr;
 248         }
 249
 250         rq = i915_request_alloc(engine, h->ctx);
 251         if (IS_ERR(rq))
 252                 return rq;
 253
 254         err = emit_recurse_batch(h, rq);
 255         if (err) {
 256                 i915_request_add(rq);
 257                 return ERR_PTR(err);
 258         }
 259
 260         return rq;
 261 }
 262
 263 static u32 hws_seqno(const struct hang *h, const struct i915_request *rq)
 264 {
 265         return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
 266 }
 267
 268 static void hang_fini(struct hang *h)
 269 {
 270         *h->batch = MI_BATCH_BUFFER_END;
 271         i915_gem_chipset_flush(h->i915);
 272
 273         i915_gem_object_unpin_map(h->obj);
 274         i915_gem_object_put(h->obj);
 275
 276         i915_gem_object_unpin_map(h->hws);
 277         i915_gem_object_put(h->hws);
 278
 279         kernel_context_close(h->ctx);
 280
 281         igt_flush_test(h->i915, I915_WAIT_LOCKED);
 282 }
 283
 284 static bool wait_until_running(struct hang *h, struct i915_request *rq)
 285 {
 286         return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
 287                                                rq->fence.seqno),
 288                              10) &&
 289                  wait_for(i915_seqno_passed(hws_seqno(h, rq),
 290                                             rq->fence.seqno),
 291                           1000));
 292 }
 293
 294 static int igt_hang_sanitycheck(void *arg)
 295 {
 296         struct drm_i915_private *i915 = arg;
 297         struct i915_request *rq;
 298         struct intel_engine_cs *engine;
 299         enum intel_engine_id id;
 300         struct hang h;
 301         int err;
 302
 303         /* Basic check that we can execute our hanging batch */
 304
 305         mutex_lock(&i915->drm.struct_mutex);
 306         err = hang_init(&h, i915);
 307         if (err)
 308                 goto unlock;
 309
 310         for_each_engine(engine, i915, id) {
 311                 long timeout;
 312
 313                 if (!intel_engine_can_store_dword(engine))
 314                         continue;
 315
 316                 rq = hang_create_request(&h, engine);
 317                 if (IS_ERR(rq)) {
 318                         err = PTR_ERR(rq);
 319                         pr_err("Failed to create request for %s, err=%d\n",
 320                                engine->name, err);
 321                         goto fini;
 322                 }
 323
 324                 i915_request_get(rq);
 325
 326                 *h.batch = MI_BATCH_BUFFER_END;
 327                 i915_gem_chipset_flush(i915);
 328
 329                 i915_request_add(rq);
 330
 331                 timeout = i915_request_wait(rq,
 332                                             I915_WAIT_LOCKED,
 333                                             MAX_SCHEDULE_TIMEOUT);
 334                 i915_request_put(rq);
 335
 336                 if (timeout < 0) {
 337                         err = timeout;
 338                         pr_err("Wait for request failed on %s, err=%d\n",
 339                                engine->name, err);
 340                         goto fini;
 341                 }
 342         }
 343
 344 fini:
 345         hang_fini(&h);
 346 unlock:
 347         mutex_unlock(&i915->drm.struct_mutex);
 348         return err;
 349 }
 350
 351 static void global_reset_lock(struct drm_i915_private *i915)
 352 {
 353         struct intel_engine_cs *engine;
 354         enum intel_engine_id id;
 355
 356         pr_debug("%s: current gpu_error=%08lx\n",
 357                  __func__, i915->gpu_error.flags);
 358
 359         while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags))
 360                 wait_event(i915->gpu_error.reset_queue,
 361                            !test_bit(I915_RESET_BACKOFF,
 362                                      &i915->gpu_error.flags));
 363
 364         for_each_engine(engine, i915, id) {
 365                 while (test_and_set_bit(I915_RESET_ENGINE + id,
 366                                         &i915->gpu_error.flags))
 367                         wait_on_bit(&i915->gpu_error.flags,
 368                                     I915_RESET_ENGINE + id,
 369                                     TASK_UNINTERRUPTIBLE);
 370         }
 371 }
 372
 373 static void global_reset_unlock(struct drm_i915_private *i915)
 374 {
 375         struct intel_engine_cs *engine;
 376         enum intel_engine_id id;
 377
 378         for_each_engine(engine, i915, id)
 379                 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 380
 381         clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
 382         wake_up_all(&i915->gpu_error.reset_queue);
 383 }
 384
 385 static int igt_global_reset(void *arg)
 386 {
 387         struct drm_i915_private *i915 = arg;
 388         unsigned int reset_count;
 389         int err = 0;
 390
 391         /* Check that we can issue a global GPU reset */
 392
 393         global_reset_lock(i915);
 394         set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
 395
 396         mutex_lock(&i915->drm.struct_mutex);
 397         reset_count = i915_reset_count(&i915->gpu_error);
 398
 399         i915_reset(i915, ALL_ENGINES, NULL);
 400
 401         if (i915_reset_count(&i915->gpu_error) == reset_count) {
 402                 pr_err("No GPU reset recorded!\n");
 403                 err = -EINVAL;
 404         }
 405         mutex_unlock(&i915->drm.struct_mutex);
 406
 407         GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
 408         global_reset_unlock(i915);
 409
 410         if (i915_terminally_wedged(&i915->gpu_error))
 411                 err = -EIO;
 412
 413         return err;
 414 }
 415
 416 static bool wait_for_idle(struct intel_engine_cs *engine)
 417 {
 418         return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0;
 419 }
 420
 421 static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 422 {
 423         struct intel_engine_cs *engine;
 424         enum intel_engine_id id;
 425         struct hang h;
 426         int err = 0;
 427
 428         /* Check that we can issue an engine reset on an idle engine (no-op) */
 429
 430         if (!intel_has_reset_engine(i915))
 431                 return 0;
 432
 433         if (active) {
 434                 mutex_lock(&i915->drm.struct_mutex);
 435                 err = hang_init(&h, i915);
 436                 mutex_unlock(&i915->drm.struct_mutex);
 437                 if (err)
 438                         return err;
 439         }
 440
 441         for_each_engine(engine, i915, id) {
 442                 unsigned int reset_count, reset_engine_count;
 443                 IGT_TIMEOUT(end_time);
 444
 445                 if (active && !intel_engine_can_store_dword(engine))
 446                         continue;
 447
 448                 if (!wait_for_idle(engine)) {
 449                         pr_err("%s failed to idle before reset\n",
 450                                engine->name);
 451                         err = -EIO;
 452                         break;
 453                 }
 454
 455                 reset_count = i915_reset_count(&i915->gpu_error);
 456                 reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
 457                                                              engine);
 458
 459                 set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 460                 do {
 461                         u32 seqno = intel_engine_get_seqno(engine);
 462
 463                         if (active) {
 464                                 struct i915_request *rq;
 465
 466                                 mutex_lock(&i915->drm.struct_mutex);
 467                                 rq = hang_create_request(&h, engine);
 468                                 if (IS_ERR(rq)) {
 469                                         err = PTR_ERR(rq);
 470                                         mutex_unlock(&i915->drm.struct_mutex);
 471                                         break;
 472                                 }
 473
 474                                 i915_request_get(rq);
 475                                 i915_request_add(rq);
 476                                 mutex_unlock(&i915->drm.struct_mutex);
 477
 478                                 if (!wait_until_running(&h, rq)) {
 479                                         struct drm_printer p = drm_info_printer(i915->drm.dev);
 480
 481                                         pr_err("%s: Failed to start request %x, at %x\n",
 482                                                __func__, rq->fence.seqno, hws_seqno(&h, rq));
 483                                         intel_engine_dump(engine, &p,
 484                                                           "%s\n", engine->name);
 485
 486                                         i915_request_put(rq);
 487                                         err = -EIO;
 488                                         break;
 489                                 }
 490
 491                                 GEM_BUG_ON(!rq->global_seqno);
 492                                 seqno = rq->global_seqno - 1;
 493                                 i915_request_put(rq);
 494                         }
 495
 496                         err = i915_reset_engine(engine, NULL);
 497                         if (err) {
 498                                 pr_err("i915_reset_engine failed\n");
 499                                 break;
 500                         }
 501
 502                         if (i915_reset_count(&i915->gpu_error) != reset_count) {
 503                                 pr_err("Full GPU reset recorded! (engine reset expected)\n");
 504                                 err = -EINVAL;
 505                                 break;
 506                         }
 507
 508                         reset_engine_count += active;
 509                         if (i915_reset_engine_count(&i915->gpu_error, engine) !=
 510                             reset_engine_count) {
 511                                 pr_err("%s engine reset %srecorded!\n",
 512                                        engine->name, active ? "not " : "");
 513                                 err = -EINVAL;
 514                                 break;
 515                         }
 516
 517                         if (!wait_for_idle(engine)) {
 518                                 struct drm_printer p =
 519                                         drm_info_printer(i915->drm.dev);
 520
 521                                 pr_err("%s failed to idle after reset\n",
 522                                        engine->name);
 523                                 intel_engine_dump(engine, &p,
 524                                                   "%s\n", engine->name);
 525
 526                                 err = -EIO;
 527                                 break;
 528                         }
 529                 } while (time_before(jiffies, end_time));
 530                 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 531
 532                 if (err)
 533                         break;
 534
 535                 err = igt_flush_test(i915, 0);
 536                 if (err)
 537                         break;
 538         }
 539
 540         if (i915_terminally_wedged(&i915->gpu_error))
 541                 err = -EIO;
 542
 543         if (active) {
 544                 mutex_lock(&i915->drm.struct_mutex);
 545                 hang_fini(&h);
 546                 mutex_unlock(&i915->drm.struct_mutex);
 547         }
 548
 549         return err;
 550 }
 551
 552 static int igt_reset_idle_engine(void *arg)
 553 {
 554         return __igt_reset_engine(arg, false);
 555 }
 556
 557 static int igt_reset_active_engine(void *arg)
 558 {
 559         return __igt_reset_engine(arg, true);
 560 }
 561
 562 struct active_engine {
 563         struct task_struct *task;
 564         struct intel_engine_cs *engine;
 565         unsigned long resets;
 566         unsigned int flags;
 567 };
 568
 569 #define TEST_ACTIVE     BIT(0)
 570 #define TEST_OTHERS     BIT(1)
 571 #define TEST_SELF       BIT(2)
 572 #define TEST_PRIORITY   BIT(3)
 573
 574 static int active_request_put(struct i915_request *rq)
 575 {
 576         int err = 0;
 577
 578         if (!rq)
 579                 return 0;
 580
 581         if (i915_request_wait(rq, 0, 5 * HZ) < 0) {
 582                 GEM_TRACE("%s timed out waiting for completion of fence %llx:%d, seqno %d.\n",
 583                           rq->engine->name,
 584                           rq->fence.context,
 585                           rq->fence.seqno,
 586                           i915_request_global_seqno(rq));
 587                 GEM_TRACE_DUMP();
 588
 589                 i915_gem_set_wedged(rq->i915);
 590                 err = -EIO;
 591         }
 592
 593         i915_request_put(rq);
 594
 595         return err;
 596 }
 597
 598 static int active_engine(void *data)
 599 {
 600         I915_RND_STATE(prng);
 601         struct active_engine *arg = data;
 602         struct intel_engine_cs *engine = arg->engine;
 603         struct i915_request *rq[8] = {};
 604         struct i915_gem_context *ctx[ARRAY_SIZE(rq)];
 605         struct drm_file *file;
 606         unsigned long count = 0;
 607         int err = 0;
 608
 609         file = mock_file(engine->i915);
 610         if (IS_ERR(file))
 611                 return PTR_ERR(file);
 612
 613         for (count = 0; count < ARRAY_SIZE(ctx); count++) {
 614                 mutex_lock(&engine->i915->drm.struct_mutex);
 615                 ctx[count] = live_context(engine->i915, file);
 616                 mutex_unlock(&engine->i915->drm.struct_mutex);
 617                 if (IS_ERR(ctx[count])) {
 618                         err = PTR_ERR(ctx[count]);
 619                         while (--count)
 620                                 i915_gem_context_put(ctx[count]);
 621                         goto err_file;
 622                 }
 623         }
 624
 625         while (!kthread_should_stop()) {
 626                 unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1);
 627                 struct i915_request *old = rq[idx];
 628                 struct i915_request *new;
 629
 630                 mutex_lock(&engine->i915->drm.struct_mutex);
 631                 new = i915_request_alloc(engine, ctx[idx]);
 632                 if (IS_ERR(new)) {
 633                         mutex_unlock(&engine->i915->drm.struct_mutex);
 634                         err = PTR_ERR(new);
 635                         break;
 636                 }
 637
 638                 if (arg->flags & TEST_PRIORITY)
 639                         ctx[idx]->sched.priority =
 640                                 i915_prandom_u32_max_state(512, &prng);
 641
 642                 rq[idx] = i915_request_get(new);
 643                 i915_request_add(new);
 644                 mutex_unlock(&engine->i915->drm.struct_mutex);
 645
 646                 err = active_request_put(old);
 647                 if (err)
 648                         break;
 649
 650                 cond_resched();
 651         }
 652
 653         for (count = 0; count < ARRAY_SIZE(rq); count++) {
 654                 int err__ = active_request_put(rq[count]);
 655
 656                 /* Keep the first error */
 657                 if (!err)
 658                         err = err__;
 659         }
 660
 661 err_file:
 662         mock_file_free(engine->i915, file);
 663         return err;
 664 }
 665
 666 static int __igt_reset_engines(struct drm_i915_private *i915,
 667                                const char *test_name,
 668                                unsigned int flags)
 669 {
 670         struct intel_engine_cs *engine, *other;
 671         enum intel_engine_id id, tmp;
 672         struct hang h;
 673         int err = 0;
 674
 675         /* Check that issuing a reset on one engine does not interfere
 676          * with any other engine.
 677          */
 678
 679         if (!intel_has_reset_engine(i915))
 680                 return 0;
 681
 682         if (flags & TEST_ACTIVE) {
 683                 mutex_lock(&i915->drm.struct_mutex);
 684                 err = hang_init(&h, i915);
 685                 mutex_unlock(&i915->drm.struct_mutex);
 686                 if (err)
 687                         return err;
 688
 689                 if (flags & TEST_PRIORITY)
 690                         h.ctx->sched.priority = 1024;
 691         }
 692
 693         for_each_engine(engine, i915, id) {
 694                 struct active_engine threads[I915_NUM_ENGINES] = {};
 695                 unsigned long global = i915_reset_count(&i915->gpu_error);
 696                 unsigned long count = 0, reported;
 697                 IGT_TIMEOUT(end_time);
 698
 699                 if (flags & TEST_ACTIVE &&
 700                     !intel_engine_can_store_dword(engine))
 701                         continue;
 702
 703                 if (!wait_for_idle(engine)) {
 704                         pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n",
 705                                engine->name, test_name);
 706                         err = -EIO;
 707                         break;
 708                 }
 709
 710                 memset(threads, 0, sizeof(threads));
 711                 for_each_engine(other, i915, tmp) {
 712                         struct task_struct *tsk;
 713
 714                         threads[tmp].resets =
 715                                 i915_reset_engine_count(&i915->gpu_error,
 716                                                         other);
 717
 718                         if (!(flags & TEST_OTHERS))
 719                                 continue;
 720
 721                         if (other == engine && !(flags & TEST_SELF))
 722                                 continue;
 723
 724                         threads[tmp].engine = other;
 725                         threads[tmp].flags = flags;
 726
 727                         tsk = kthread_run(active_engine, &threads[tmp],
 728                                           "igt/%s", other->name);
 729                         if (IS_ERR(tsk)) {
 730                                 err = PTR_ERR(tsk);
 731                                 goto unwind;
 732                         }
 733
 734                         threads[tmp].task = tsk;
 735                         get_task_struct(tsk);
 736                 }
 737
 738                 set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 739                 do {
 740                         u32 seqno = intel_engine_get_seqno(engine);
 741                         struct i915_request *rq = NULL;
 742
 743                         if (flags & TEST_ACTIVE) {
 744                                 mutex_lock(&i915->drm.struct_mutex);
 745                                 rq = hang_create_request(&h, engine);
 746                                 if (IS_ERR(rq)) {
 747                                         err = PTR_ERR(rq);
 748                                         mutex_unlock(&i915->drm.struct_mutex);
 749                                         break;
 750                                 }
 751
 752                                 i915_request_get(rq);
 753                                 i915_request_add(rq);
 754                                 mutex_unlock(&i915->drm.struct_mutex);
 755
 756                                 if (!wait_until_running(&h, rq)) {
 757                                         struct drm_printer p = drm_info_printer(i915->drm.dev);
 758
 759                                         pr_err("%s: Failed to start request %x, at %x\n",
 760                                                __func__, rq->fence.seqno, hws_seqno(&h, rq));
 761                                         intel_engine_dump(engine, &p,
 762                                                           "%s\n", engine->name);
 763
 764                                         i915_request_put(rq);
 765                                         err = -EIO;
 766                                         break;
 767                                 }
 768
 769                                 GEM_BUG_ON(!rq->global_seqno);
 770                                 seqno = rq->global_seqno - 1;
 771                         }
 772
 773                         err = i915_reset_engine(engine, NULL);
 774                         if (err) {
 775                                 pr_err("i915_reset_engine(%s:%s): failed, err=%d\n",
 776                                        engine->name, test_name, err);
 777                                 break;
 778                         }
 779
 780                         count++;
 781
 782                         if (rq) {
 783                                 i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
 784                                 i915_request_put(rq);
 785                         }
 786
 787                         if (!(flags & TEST_SELF) && !wait_for_idle(engine)) {
 788                                 struct drm_printer p =
 789                                         drm_info_printer(i915->drm.dev);
 790
 791                                 pr_err("i915_reset_engine(%s:%s):"
 792                                        " failed to idle after reset\n",
 793                                        engine->name, test_name);
 794                                 intel_engine_dump(engine, &p,
 795                                                   "%s\n", engine->name);
 796
 797                                 err = -EIO;
 798                                 break;
 799                         }
 800                 } while (time_before(jiffies, end_time));
 801                 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 802                 pr_info("i915_reset_engine(%s:%s): %lu resets\n",
 803                         engine->name, test_name, count);
 804
 805                 reported = i915_reset_engine_count(&i915->gpu_error, engine);
 806                 reported -= threads[engine->id].resets;
 807                 if (reported != (flags & TEST_ACTIVE ? count : 0)) {
 808                         pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu, expected %lu reported\n",
 809                                engine->name, test_name, count, reported,
 810                                (flags & TEST_ACTIVE ? count : 0));
 811                         if (!err)
 812                                 err = -EINVAL;
 813                 }
 814
 815 unwind:
 816                 for_each_engine(other, i915, tmp) {
 817                         int ret;
 818
 819                         if (!threads[tmp].task)
 820                                 continue;
 821
 822                         ret = kthread_stop(threads[tmp].task);
 823                         if (ret) {
 824                                 pr_err("kthread for other engine %s failed, err=%d\n",
 825                                        other->name, ret);
 826                                 if (!err)
 827                                         err = ret;
 828                         }
 829                         put_task_struct(threads[tmp].task);
 830
 831                         if (other != engine &&
 832                             threads[tmp].resets !=
 833                             i915_reset_engine_count(&i915->gpu_error, other)) {
 834                                 pr_err("Innocent engine %s was reset (count=%ld)\n",
 835                                        other->name,
 836                                        i915_reset_engine_count(&i915->gpu_error,
 837                                                                other) -
 838                                        threads[tmp].resets);
 839                                 if (!err)
 840                                         err = -EINVAL;
 841                         }
 842                 }
 843
 844                 if (global != i915_reset_count(&i915->gpu_error)) {
 845                         pr_err("Global reset (count=%ld)!\n",
 846                                i915_reset_count(&i915->gpu_error) - global);
 847                         if (!err)
 848                                 err = -EINVAL;
 849                 }
 850
 851                 if (err)
 852                         break;
 853
 854                 err = igt_flush_test(i915, 0);
 855                 if (err)
 856                         break;
 857         }
 858
 859         if (i915_terminally_wedged(&i915->gpu_error))
 860                 err = -EIO;
 861
 862         if (flags & TEST_ACTIVE) {
 863                 mutex_lock(&i915->drm.struct_mutex);
 864                 hang_fini(&h);
 865                 mutex_unlock(&i915->drm.struct_mutex);
 866         }
 867
 868         return err;
 869 }
 870
 871 static int igt_reset_engines(void *arg)
 872 {
 873         static const struct {
 874                 const char *name;
 875                 unsigned int flags;
 876         } phases[] = {
 877                 { "idle", 0 },
 878                 { "active", TEST_ACTIVE },
 879                 { "others-idle", TEST_OTHERS },
 880                 { "others-active", TEST_OTHERS | TEST_ACTIVE },
 881                 {
 882                         "others-priority",
 883                         TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY
 884                 },
 885                 {
 886                         "self-priority",
 887                         TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY | TEST_SELF,
 888                 },
 889                 { }
 890         };
 891         struct drm_i915_private *i915 = arg;
 892         typeof(*phases) *p;
 893         int err;
 894
 895         for (p = phases; p->name; p++) {
 896                 if (p->flags & TEST_PRIORITY) {
 897                         if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
 898                                 continue;
 899                 }
 900
 901                 err = __igt_reset_engines(arg, p->name, p->flags);
 902                 if (err)
 903                         return err;
 904         }
 905
 906         return 0;
 907 }
 908
 909 static u32 fake_hangcheck(struct i915_request *rq, u32 mask)
 910 {
 911         struct i915_gpu_error *error = &rq->i915->gpu_error;
 912         u32 reset_count = i915_reset_count(error);
 913
 914         error->stalled_mask = mask;
 915
 916         /* set_bit() must be after we have setup the backchannel (mask) */
 917         smp_mb__before_atomic();
 918         set_bit(I915_RESET_HANDOFF, &error->flags);
 919
 920         wake_up_all(&error->wait_queue);
 921
 922         return reset_count;
 923 }
 924
 925 static int igt_reset_wait(void *arg)
 926 {
 927         struct drm_i915_private *i915 = arg;
 928         struct i915_request *rq;
 929         unsigned int reset_count;
 930         struct hang h;
 931         long timeout;
 932         int err;
 933
 934         if (!intel_engine_can_store_dword(i915->engine[RCS]))
 935                 return 0;
 936
 937         /* Check that we detect a stuck waiter and issue a reset */
 938
 939         global_reset_lock(i915);
 940
 941         mutex_lock(&i915->drm.struct_mutex);
 942         err = hang_init(&h, i915);
 943         if (err)
 944                 goto unlock;
 945
 946         rq = hang_create_request(&h, i915->engine[RCS]);
 947         if (IS_ERR(rq)) {
 948                 err = PTR_ERR(rq);
 949                 goto fini;
 950         }
 951
 952         i915_request_get(rq);
 953         i915_request_add(rq);
 954
 955         if (!wait_until_running(&h, rq)) {
 956                 struct drm_printer p = drm_info_printer(i915->drm.dev);
 957
 958                 pr_err("%s: Failed to start request %x, at %x\n",
 959                        __func__, rq->fence.seqno, hws_seqno(&h, rq));
 960                 intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
 961
 962                 i915_gem_set_wedged(i915);
 963
 964                 err = -EIO;
 965                 goto out_rq;
 966         }
 967
 968         reset_count = fake_hangcheck(rq, ALL_ENGINES);
 969
 970         timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10);
 971         if (timeout < 0) {
 972                 pr_err("i915_request_wait failed on a stuck request: err=%ld\n",
 973                        timeout);
 974                 err = timeout;
 975                 goto out_rq;
 976         }
 977
 978         GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
 979         if (i915_reset_count(&i915->gpu_error) == reset_count) {
 980                 pr_err("No GPU reset recorded!\n");
 981                 err = -EINVAL;
 982                 goto out_rq;
 983         }
 984
 985 out_rq:
 986         i915_request_put(rq);
 987 fini:
 988         hang_fini(&h);
 989 unlock:
 990         mutex_unlock(&i915->drm.struct_mutex);
 991         global_reset_unlock(i915);
 992
 993         if (i915_terminally_wedged(&i915->gpu_error))
 994                 return -EIO;
 995
 996         return err;
 997 }
 998
 999 struct evict_vma {
1000         struct completion completion;
1001         struct i915_vma *vma;
1002 };
1003
1004 static int evict_vma(void *data)
1005 {
1006         struct evict_vma *arg = data;
1007         struct i915_address_space *vm = arg->vma->vm;
1008         struct drm_i915_private *i915 = vm->i915;
1009         struct drm_mm_node evict = arg->vma->node;
1010         int err;
1011
1012         complete(&arg->completion);
1013
1014         mutex_lock(&i915->drm.struct_mutex);
1015         err = i915_gem_evict_for_node(vm, &evict, 0);
1016         mutex_unlock(&i915->drm.struct_mutex);
1017
1018         return err;
1019 }
1020
1021 static int __igt_reset_evict_vma(struct drm_i915_private *i915,
1022                                  struct i915_address_space *vm)
1023 {
1024         struct drm_i915_gem_object *obj;
1025         struct task_struct *tsk = NULL;
1026         struct i915_request *rq;
1027         struct evict_vma arg;
1028         struct hang h;
1029         int err;
1030
1031         if (!intel_engine_can_store_dword(i915->engine[RCS]))
1032                 return 0;
1033
1034         /* Check that we can recover an unbind stuck on a hanging request */
1035
1036         global_reset_lock(i915);
1037
1038         mutex_lock(&i915->drm.struct_mutex);
1039         err = hang_init(&h, i915);
1040         if (err)
1041                 goto unlock;
1042
1043         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1044         if (IS_ERR(obj)) {
1045                 err = PTR_ERR(obj);
1046                 goto fini;
1047         }
1048
1049         arg.vma = i915_vma_instance(obj, vm, NULL);
1050         if (IS_ERR(arg.vma)) {
1051                 err = PTR_ERR(arg.vma);
1052                 goto out_obj;
1053         }
1054
1055         rq = hang_create_request(&h, i915->engine[RCS]);
1056         if (IS_ERR(rq)) {
1057                 err = PTR_ERR(rq);
1058                 goto out_obj;
1059         }
1060
1061         err = i915_vma_pin(arg.vma, 0, 0,
1062                            i915_vma_is_ggtt(arg.vma) ? PIN_GLOBAL : PIN_USER);
1063         if (err)
1064                 goto out_obj;
1065
1066         err = i915_vma_move_to_active(arg.vma, rq, EXEC_OBJECT_WRITE);
1067         i915_vma_unpin(arg.vma);
1068
1069         i915_request_get(rq);
1070         i915_request_add(rq);
1071         if (err)
1072                 goto out_rq;
1073
1074         mutex_unlock(&i915->drm.struct_mutex);
1075
1076         if (!wait_until_running(&h, rq)) {
1077                 struct drm_printer p = drm_info_printer(i915->drm.dev);
1078
1079                 pr_err("%s: Failed to start request %x, at %x\n",
1080                        __func__, rq->fence.seqno, hws_seqno(&h, rq));
1081                 intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
1082
1083                 i915_gem_set_wedged(i915);
1084                 goto out_reset;
1085         }
1086
1087         init_completion(&arg.completion);
1088
1089         tsk = kthread_run(evict_vma, &arg, "igt/evict_vma");
1090         if (IS_ERR(tsk)) {
1091                 err = PTR_ERR(tsk);
1092                 tsk = NULL;
1093                 goto out_reset;
1094         }
1095
1096         wait_for_completion(&arg.completion);
1097
1098         if (wait_for(waitqueue_active(&rq->execute), 10)) {
1099                 struct drm_printer p = drm_info_printer(i915->drm.dev);
1100
1101                 pr_err("igt/evict_vma kthread did not wait\n");
1102                 intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
1103
1104                 i915_gem_set_wedged(i915);
1105                 goto out_reset;
1106         }
1107
1108 out_reset:
1109         fake_hangcheck(rq, intel_engine_flag(rq->engine));
1110
1111         if (tsk) {
1112                 struct igt_wedge_me w;
1113
1114                 /* The reset, even indirectly, should take less than 10ms. */
1115                 igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/)
1116                         err = kthread_stop(tsk);
1117         }
1118
1119         mutex_lock(&i915->drm.struct_mutex);
1120 out_rq:
1121         i915_request_put(rq);
1122 out_obj:
1123         i915_gem_object_put(obj);
1124 fini:
1125         hang_fini(&h);
1126 unlock:
1127         mutex_unlock(&i915->drm.struct_mutex);
1128         global_reset_unlock(i915);
1129
1130         if (i915_terminally_wedged(&i915->gpu_error))
1131                 return -EIO;
1132
1133         return err;
1134 }
1135
1136 static int igt_reset_evict_ggtt(void *arg)
1137 {
1138         struct drm_i915_private *i915 = arg;
1139
1140         return __igt_reset_evict_vma(i915, &i915->ggtt.vm);
1141 }
1142
1143 static int igt_reset_evict_ppgtt(void *arg)
1144 {
1145         struct drm_i915_private *i915 = arg;
1146         struct i915_gem_context *ctx;
1147         int err;
1148
1149         mutex_lock(&i915->drm.struct_mutex);
1150         ctx = kernel_context(i915);
1151         mutex_unlock(&i915->drm.struct_mutex);
1152         if (IS_ERR(ctx))
1153                 return PTR_ERR(ctx);
1154
1155         err = 0;
1156         if (ctx->ppgtt) /* aliasing == global gtt locking, covered above */
1157                 err = __igt_reset_evict_vma(i915, &ctx->ppgtt->vm);
1158
1159         kernel_context_close(ctx);
1160         return err;
1161 }
1162
1163 static int wait_for_others(struct drm_i915_private *i915,
1164                            struct intel_engine_cs *exclude)
1165 {
1166         struct intel_engine_cs *engine;
1167         enum intel_engine_id id;
1168
1169         for_each_engine(engine, i915, id) {
1170                 if (engine == exclude)
1171                         continue;
1172
1173                 if (!wait_for_idle(engine))
1174                         return -EIO;
1175         }
1176
1177         return 0;
1178 }
1179
1180 static int igt_reset_queue(void *arg)
1181 {
1182         struct drm_i915_private *i915 = arg;
1183         struct intel_engine_cs *engine;
1184         enum intel_engine_id id;
1185         struct hang h;
1186         int err;
1187
1188         /* Check that we replay pending requests following a hang */
1189
1190         global_reset_lock(i915);
1191
1192         mutex_lock(&i915->drm.struct_mutex);
1193         err = hang_init(&h, i915);
1194         if (err)
1195                 goto unlock;
1196
1197         for_each_engine(engine, i915, id) {
1198                 struct i915_request *prev;
1199                 IGT_TIMEOUT(end_time);
1200                 unsigned int count;
1201
1202                 if (!intel_engine_can_store_dword(engine))
1203                         continue;
1204
1205                 prev = hang_create_request(&h, engine);
1206                 if (IS_ERR(prev)) {
1207                         err = PTR_ERR(prev);
1208                         goto fini;
1209                 }
1210
1211                 i915_request_get(prev);
1212                 i915_request_add(prev);
1213
1214                 count = 0;
1215                 do {
1216                         struct i915_request *rq;
1217                         unsigned int reset_count;
1218
1219                         rq = hang_create_request(&h, engine);
1220                         if (IS_ERR(rq)) {
1221                                 err = PTR_ERR(rq);
1222                                 goto fini;
1223                         }
1224
1225                         i915_request_get(rq);
1226                         i915_request_add(rq);
1227
1228                         /*
1229                          * XXX We don't handle resetting the kernel context
1230                          * very well. If we trigger a device reset twice in
1231                          * quick succession while the kernel context is
1232                          * executing, we may end up skipping the breadcrumb.
1233                          * This is really only a problem for the selftest as
1234                          * normally there is a large interlude between resets
1235                          * (hangcheck), or we focus on resetting just one
1236                          * engine and so avoid repeatedly resetting innocents.
1237                          */
1238                         err = wait_for_others(i915, engine);
1239                         if (err) {
1240                                 pr_err("%s(%s): Failed to idle other inactive engines after device reset\n",
1241                                        __func__, engine->name);
1242                                 i915_request_put(rq);
1243                                 i915_request_put(prev);
1244
1245                                 GEM_TRACE_DUMP();
1246                                 i915_gem_set_wedged(i915);
1247                                 goto fini;
1248                         }
1249
1250                         if (!wait_until_running(&h, prev)) {
1251                                 struct drm_printer p = drm_info_printer(i915->drm.dev);
1252
1253                                 pr_err("%s(%s): Failed to start request %x, at %x\n",
1254                                        __func__, engine->name,
1255                                        prev->fence.seqno, hws_seqno(&h, prev));
1256                                 intel_engine_dump(engine, &p,
1257                                                   "%s\n", engine->name);
1258
1259                                 i915_request_put(rq);
1260                                 i915_request_put(prev);
1261
1262                                 i915_gem_set_wedged(i915);
1263
1264                                 err = -EIO;
1265                                 goto fini;
1266                         }
1267
1268                         reset_count = fake_hangcheck(prev, ENGINE_MASK(id));
1269
1270                         i915_reset(i915, ENGINE_MASK(id), NULL);
1271
1272                         GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
1273                                             &i915->gpu_error.flags));
1274
1275                         if (prev->fence.error != -EIO) {
1276                                 pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
1277                                        prev->fence.error);
1278                                 i915_request_put(rq);
1279                                 i915_request_put(prev);
1280                                 err = -EINVAL;
1281                                 goto fini;
1282                         }
1283
1284                         if (rq->fence.error) {
1285                                 pr_err("Fence error status not zero [%d] after unrelated reset\n",
1286                                        rq->fence.error);
1287                                 i915_request_put(rq);
1288                                 i915_request_put(prev);
1289                                 err = -EINVAL;
1290                                 goto fini;
1291                         }
1292
1293                         if (i915_reset_count(&i915->gpu_error) == reset_count) {
1294                                 pr_err("No GPU reset recorded!\n");
1295                                 i915_request_put(rq);
1296                                 i915_request_put(prev);
1297                                 err = -EINVAL;
1298                                 goto fini;
1299                         }
1300
1301                         i915_request_put(prev);
1302                         prev = rq;
1303                         count++;
1304                 } while (time_before(jiffies, end_time));
1305                 pr_info("%s: Completed %d resets\n", engine->name, count);
1306
1307                 *h.batch = MI_BATCH_BUFFER_END;
1308                 i915_gem_chipset_flush(i915);
1309
1310                 i915_request_put(prev);
1311
1312                 err = igt_flush_test(i915, I915_WAIT_LOCKED);
1313                 if (err)
1314                         break;
1315         }
1316
1317 fini:
1318         hang_fini(&h);
1319 unlock:
1320         mutex_unlock(&i915->drm.struct_mutex);
1321         global_reset_unlock(i915);
1322
1323         if (i915_terminally_wedged(&i915->gpu_error))
1324                 return -EIO;
1325
1326         return err;
1327 }
1328
1329 static int igt_handle_error(void *arg)
1330 {
1331         struct drm_i915_private *i915 = arg;
1332         struct intel_engine_cs *engine = i915->engine[RCS];
1333         struct hang h;
1334         struct i915_request *rq;
1335         struct i915_gpu_state *error;
1336         int err;
1337
1338         /* Check that we can issue a global GPU and engine reset */
1339
1340         if (!intel_has_reset_engine(i915))
1341                 return 0;
1342
1343         if (!engine || !intel_engine_can_store_dword(engine))
1344                 return 0;
1345
1346         mutex_lock(&i915->drm.struct_mutex);
1347
1348         err = hang_init(&h, i915);
1349         if (err)
1350                 goto err_unlock;
1351
1352         rq = hang_create_request(&h, engine);
1353         if (IS_ERR(rq)) {
1354                 err = PTR_ERR(rq);
1355                 goto err_fini;
1356         }
1357
1358         i915_request_get(rq);
1359         i915_request_add(rq);
1360
1361         if (!wait_until_running(&h, rq)) {
1362                 struct drm_printer p = drm_info_printer(i915->drm.dev);
1363
1364                 pr_err("%s: Failed to start request %x, at %x\n",
1365                        __func__, rq->fence.seqno, hws_seqno(&h, rq));
1366                 intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
1367
1368                 i915_gem_set_wedged(i915);
1369
1370                 err = -EIO;
1371                 goto err_request;
1372         }
1373
1374         mutex_unlock(&i915->drm.struct_mutex);
1375
1376         /* Temporarily disable error capture */
1377         error = xchg(&i915->gpu_error.first_error, (void *)-1);
1378
1379         i915_handle_error(i915, ENGINE_MASK(engine->id), 0, NULL);
1380
1381         xchg(&i915->gpu_error.first_error, error);
1382
1383         mutex_lock(&i915->drm.struct_mutex);
1384
1385         if (rq->fence.error != -EIO) {
1386                 pr_err("Guilty request not identified!\n");
1387                 err = -EINVAL;
1388                 goto err_request;
1389         }
1390
1391 err_request:
1392         i915_request_put(rq);
1393 err_fini:
1394         hang_fini(&h);
1395 err_unlock:
1396         mutex_unlock(&i915->drm.struct_mutex);
1397         return err;
1398 }
1399
1400 int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
1401 {
1402         static const struct i915_subtest tests[] = {
1403                 SUBTEST(igt_global_reset), /* attempt to recover GPU first */
1404                 SUBTEST(igt_hang_sanitycheck),
1405                 SUBTEST(igt_reset_idle_engine),
1406                 SUBTEST(igt_reset_active_engine),
1407                 SUBTEST(igt_reset_engines),
1408                 SUBTEST(igt_reset_queue),
1409                 SUBTEST(igt_reset_wait),
1410                 SUBTEST(igt_reset_evict_ggtt),
1411                 SUBTEST(igt_reset_evict_ppgtt),
1412                 SUBTEST(igt_handle_error),
1413         };
1414         bool saved_hangcheck;
1415         int err;
1416
1417         if (!intel_has_gpu_reset(i915))
1418                 return 0;
1419
1420         if (i915_terminally_wedged(&i915->gpu_error))
1421                 return -EIO; /* we're long past hope of a successful reset */
1422
1423         intel_runtime_pm_get(i915);
1424         saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
1425
1426         err = i915_subtests(tests, i915);
1427
1428         mutex_lock(&i915->drm.struct_mutex);
1429         igt_flush_test(i915, I915_WAIT_LOCKED);
1430         mutex_unlock(&i915->drm.struct_mutex);
1431
1432         i915_modparams.enable_hangcheck = saved_hangcheck;
1433         intel_runtime_pm_put(i915);
1434
1435         return err;
1436 }