drivers/gpu/drm/i915/gt/selftest_timeline.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2017-2018 Intel Corporation
   5  */
   6
   7 #include <linux/prime_numbers.h>
   8
   9 #include "intel_context.h"
  10 #include "intel_engine_heartbeat.h"
  11 #include "intel_engine_pm.h"
  12 #include "intel_gt.h"
  13 #include "intel_gt_requests.h"
  14 #include "intel_ring.h"
  15 #include "selftest_engine_heartbeat.h"
  16
  17 #include "../selftests/i915_random.h"
  18 #include "../i915_selftest.h"
  19
  20 #include "../selftests/igt_flush_test.h"
  21 #include "../selftests/mock_gem_device.h"
  22 #include "selftests/mock_timeline.h"
  23
  24 static struct page *hwsp_page(struct intel_timeline *tl)
  25 {
  26         struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
  27
  28         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
  29         return sg_page(obj->mm.pages->sgl);
  30 }
  31
  32 static unsigned long hwsp_cacheline(struct intel_timeline *tl)
  33 {
  34         unsigned long address = (unsigned long)page_address(hwsp_page(tl));
  35
  36         return (address + tl->hwsp_offset) / CACHELINE_BYTES;
  37 }
  38
  39 #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
  40
  41 struct mock_hwsp_freelist {
  42         struct intel_gt *gt;
  43         struct radix_tree_root cachelines;
  44         struct intel_timeline **history;
  45         unsigned long count, max;
  46         struct rnd_state prng;
  47 };
  48
  49 enum {
  50         SHUFFLE = BIT(0),
  51 };
  52
  53 static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
  54                                unsigned int idx,
  55                                struct intel_timeline *tl)
  56 {
  57         tl = xchg(&state->history[idx], tl);
  58         if (tl) {
  59                 radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
  60                 intel_timeline_put(tl);
  61         }
  62 }
  63
  64 static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
  65                                 unsigned int count,
  66                                 unsigned int flags)
  67 {
  68         struct intel_timeline *tl;
  69         unsigned int idx;
  70
  71         while (count--) {
  72                 unsigned long cacheline;
  73                 int err;
  74
  75                 tl = intel_timeline_create(state->gt);
  76                 if (IS_ERR(tl))
  77                         return PTR_ERR(tl);
  78
  79                 cacheline = hwsp_cacheline(tl);
  80                 err = radix_tree_insert(&state->cachelines, cacheline, tl);
  81                 if (err) {
  82                         if (err == -EEXIST) {
  83                                 pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
  84                                        cacheline);
  85                         }
  86                         intel_timeline_put(tl);
  87                         return err;
  88                 }
  89
  90                 idx = state->count++ % state->max;
  91                 __mock_hwsp_record(state, idx, tl);
  92         }
  93
  94         if (flags & SHUFFLE)
  95                 i915_prandom_shuffle(state->history,
  96                                      sizeof(*state->history),
  97                                      min(state->count, state->max),
  98                                      &state->prng);
  99
 100         count = i915_prandom_u32_max_state(min(state->count, state->max),
 101                                            &state->prng);
 102         while (count--) {
 103                 idx = --state->count % state->max;
 104                 __mock_hwsp_record(state, idx, NULL);
 105         }
 106
 107         return 0;
 108 }
 109
 110 static int mock_hwsp_freelist(void *arg)
 111 {
 112         struct mock_hwsp_freelist state;
 113         struct drm_i915_private *i915;
 114         const struct {
 115                 const char *name;
 116                 unsigned int flags;
 117         } phases[] = {
 118                 { "linear", 0 },
 119                 { "shuffled", SHUFFLE },
 120                 { },
 121         }, *p;
 122         unsigned int na;
 123         int err = 0;
 124
 125         i915 = mock_gem_device();
 126         if (!i915)
 127                 return -ENOMEM;
 128
 129         INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
 130         state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
 131
 132         state.gt = &i915->gt;
 133
 134         /*
 135          * Create a bunch of timelines and check that their HWSP do not overlap.
 136          * Free some, and try again.
 137          */
 138
 139         state.max = PAGE_SIZE / sizeof(*state.history);
 140         state.count = 0;
 141         state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
 142         if (!state.history) {
 143                 err = -ENOMEM;
 144                 goto err_put;
 145         }
 146
 147         for (p = phases; p->name; p++) {
 148                 pr_debug("%s(%s)\n", __func__, p->name);
 149                 for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
 150                         err = __mock_hwsp_timeline(&state, na, p->flags);
 151                         if (err)
 152                                 goto out;
 153                 }
 154         }
 155
 156 out:
 157         for (na = 0; na < state.max; na++)
 158                 __mock_hwsp_record(&state, na, NULL);
 159         kfree(state.history);
 160 err_put:
 161         mock_destroy_device(i915);
 162         return err;
 163 }
 164
 165 struct __igt_sync {
 166         const char *name;
 167         u32 seqno;
 168         bool expected;
 169         bool set;
 170 };
 171
 172 static int __igt_sync(struct intel_timeline *tl,
 173                       u64 ctx,
 174                       const struct __igt_sync *p,
 175                       const char *name)
 176 {
 177         int ret;
 178
 179         if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
 180                 pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
 181                        name, p->name, ctx, p->seqno, yesno(p->expected));
 182                 return -EINVAL;
 183         }
 184
 185         if (p->set) {
 186                 ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
 187                 if (ret)
 188                         return ret;
 189         }
 190
 191         return 0;
 192 }
 193
 194 static int igt_sync(void *arg)
 195 {
 196         const struct __igt_sync pass[] = {
 197                 { "unset", 0, false, false },
 198                 { "new", 0, false, true },
 199                 { "0a", 0, true, true },
 200                 { "1a", 1, false, true },
 201                 { "1b", 1, true, true },
 202                 { "0b", 0, true, false },
 203                 { "2a", 2, false, true },
 204                 { "4", 4, false, true },
 205                 { "INT_MAX", INT_MAX, false, true },
 206                 { "INT_MAX-1", INT_MAX-1, true, false },
 207                 { "INT_MAX+1", (u32)INT_MAX+1, false, true },
 208                 { "INT_MAX", INT_MAX, true, false },
 209                 { "UINT_MAX", UINT_MAX, false, true },
 210                 { "wrap", 0, false, true },
 211                 { "unwrap", UINT_MAX, true, false },
 212                 {},
 213         }, *p;
 214         struct intel_timeline tl;
 215         int order, offset;
 216         int ret = -ENODEV;
 217
 218         mock_timeline_init(&tl, 0);
 219         for (p = pass; p->name; p++) {
 220                 for (order = 1; order < 64; order++) {
 221                         for (offset = -1; offset <= (order > 1); offset++) {
 222                                 u64 ctx = BIT_ULL(order) + offset;
 223
 224                                 ret = __igt_sync(&tl, ctx, p, "1");
 225                                 if (ret)
 226                                         goto out;
 227                         }
 228                 }
 229         }
 230         mock_timeline_fini(&tl);
 231
 232         mock_timeline_init(&tl, 0);
 233         for (order = 1; order < 64; order++) {
 234                 for (offset = -1; offset <= (order > 1); offset++) {
 235                         u64 ctx = BIT_ULL(order) + offset;
 236
 237                         for (p = pass; p->name; p++) {
 238                                 ret = __igt_sync(&tl, ctx, p, "2");
 239                                 if (ret)
 240                                         goto out;
 241                         }
 242                 }
 243         }
 244
 245 out:
 246         mock_timeline_fini(&tl);
 247         return ret;
 248 }
 249
 250 static unsigned int random_engine(struct rnd_state *rnd)
 251 {
 252         return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
 253 }
 254
 255 static int bench_sync(void *arg)
 256 {
 257         struct rnd_state prng;
 258         struct intel_timeline tl;
 259         unsigned long end_time, count;
 260         u64 prng32_1M;
 261         ktime_t kt;
 262         int order, last_order;
 263
 264         mock_timeline_init(&tl, 0);
 265
 266         /* Lookups from cache are very fast and so the random number generation
 267          * and the loop itself becomes a significant factor in the per-iteration
 268          * timings. We try to compensate the results by measuring the overhead
 269          * of the prng and subtract it from the reported results.
 270          */
 271         prandom_seed_state(&prng, i915_selftest.random_seed);
 272         count = 0;
 273         kt = ktime_get();
 274         end_time = jiffies + HZ/10;
 275         do {
 276                 u32 x;
 277
 278                 /* Make sure the compiler doesn't optimise away the prng call */
 279                 WRITE_ONCE(x, prandom_u32_state(&prng));
 280
 281                 count++;
 282         } while (!time_after(jiffies, end_time));
 283         kt = ktime_sub(ktime_get(), kt);
 284         pr_debug("%s: %lu random evaluations, %lluns/prng\n",
 285                  __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 286         prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
 287
 288         /* Benchmark (only) setting random context ids */
 289         prandom_seed_state(&prng, i915_selftest.random_seed);
 290         count = 0;
 291         kt = ktime_get();
 292         end_time = jiffies + HZ/10;
 293         do {
 294                 u64 id = i915_prandom_u64_state(&prng);
 295
 296                 __intel_timeline_sync_set(&tl, id, 0);
 297                 count++;
 298         } while (!time_after(jiffies, end_time));
 299         kt = ktime_sub(ktime_get(), kt);
 300         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 301         pr_info("%s: %lu random insertions, %lluns/insert\n",
 302                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 303
 304         /* Benchmark looking up the exact same context ids as we just set */
 305         prandom_seed_state(&prng, i915_selftest.random_seed);
 306         end_time = count;
 307         kt = ktime_get();
 308         while (end_time--) {
 309                 u64 id = i915_prandom_u64_state(&prng);
 310
 311                 if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
 312                         mock_timeline_fini(&tl);
 313                         pr_err("Lookup of %llu failed\n", id);
 314                         return -EINVAL;
 315                 }
 316         }
 317         kt = ktime_sub(ktime_get(), kt);
 318         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 319         pr_info("%s: %lu random lookups, %lluns/lookup\n",
 320                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 321
 322         mock_timeline_fini(&tl);
 323         cond_resched();
 324
 325         mock_timeline_init(&tl, 0);
 326
 327         /* Benchmark setting the first N (in order) contexts */
 328         count = 0;
 329         kt = ktime_get();
 330         end_time = jiffies + HZ/10;
 331         do {
 332                 __intel_timeline_sync_set(&tl, count++, 0);
 333         } while (!time_after(jiffies, end_time));
 334         kt = ktime_sub(ktime_get(), kt);
 335         pr_info("%s: %lu in-order insertions, %lluns/insert\n",
 336                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 337
 338         /* Benchmark looking up the exact same context ids as we just set */
 339         end_time = count;
 340         kt = ktime_get();
 341         while (end_time--) {
 342                 if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
 343                         pr_err("Lookup of %lu failed\n", end_time);
 344                         mock_timeline_fini(&tl);
 345                         return -EINVAL;
 346                 }
 347         }
 348         kt = ktime_sub(ktime_get(), kt);
 349         pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
 350                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 351
 352         mock_timeline_fini(&tl);
 353         cond_resched();
 354
 355         mock_timeline_init(&tl, 0);
 356
 357         /* Benchmark searching for a random context id and maybe changing it */
 358         prandom_seed_state(&prng, i915_selftest.random_seed);
 359         count = 0;
 360         kt = ktime_get();
 361         end_time = jiffies + HZ/10;
 362         do {
 363                 u32 id = random_engine(&prng);
 364                 u32 seqno = prandom_u32_state(&prng);
 365
 366                 if (!__intel_timeline_sync_is_later(&tl, id, seqno))
 367                         __intel_timeline_sync_set(&tl, id, seqno);
 368
 369                 count++;
 370         } while (!time_after(jiffies, end_time));
 371         kt = ktime_sub(ktime_get(), kt);
 372         kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
 373         pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
 374                 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
 375         mock_timeline_fini(&tl);
 376         cond_resched();
 377
 378         /* Benchmark searching for a known context id and changing the seqno */
 379         for (last_order = 1, order = 1; order < 32;
 380              ({ int tmp = last_order; last_order = order; order += tmp; })) {
 381                 unsigned int mask = BIT(order) - 1;
 382
 383                 mock_timeline_init(&tl, 0);
 384
 385                 count = 0;
 386                 kt = ktime_get();
 387                 end_time = jiffies + HZ/10;
 388                 do {
 389                         /* Without assuming too many details of the underlying
 390                          * implementation, try to identify its phase-changes
 391                          * (if any)!
 392                          */
 393                         u64 id = (u64)(count & mask) << order;
 394
 395                         __intel_timeline_sync_is_later(&tl, id, 0);
 396                         __intel_timeline_sync_set(&tl, id, 0);
 397
 398                         count++;
 399                 } while (!time_after(jiffies, end_time));
 400                 kt = ktime_sub(ktime_get(), kt);
 401                 pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
 402                         __func__, count, order,
 403                         (long long)div64_ul(ktime_to_ns(kt), count));
 404                 mock_timeline_fini(&tl);
 405                 cond_resched();
 406         }
 407
 408         return 0;
 409 }
 410
 411 int intel_timeline_mock_selftests(void)
 412 {
 413         static const struct i915_subtest tests[] = {
 414                 SUBTEST(mock_hwsp_freelist),
 415                 SUBTEST(igt_sync),
 416                 SUBTEST(bench_sync),
 417         };
 418
 419         return i915_subtests(tests, NULL);
 420 }
 421
 422 static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
 423 {
 424         u32 *cs;
 425
 426         cs = intel_ring_begin(rq, 4);
 427         if (IS_ERR(cs))
 428                 return PTR_ERR(cs);
 429
 430         if (INTEL_GEN(rq->engine->i915) >= 8) {
 431                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 432                 *cs++ = addr;
 433                 *cs++ = 0;
 434                 *cs++ = value;
 435         } else if (INTEL_GEN(rq->engine->i915) >= 4) {
 436                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 437                 *cs++ = 0;
 438                 *cs++ = addr;
 439                 *cs++ = value;
 440         } else {
 441                 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
 442                 *cs++ = addr;
 443                 *cs++ = value;
 444                 *cs++ = MI_NOOP;
 445         }
 446
 447         intel_ring_advance(rq, cs);
 448
 449         return 0;
 450 }
 451
 452 static struct i915_request *
 453 tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
 454 {
 455         struct i915_request *rq;
 456         int err;
 457
 458         err = intel_timeline_pin(tl, NULL);
 459         if (err) {
 460                 rq = ERR_PTR(err);
 461                 goto out;
 462         }
 463
 464         rq = intel_engine_create_kernel_request(engine);
 465         if (IS_ERR(rq))
 466                 goto out_unpin;
 467
 468         i915_request_get(rq);
 469
 470         err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
 471         i915_request_add(rq);
 472         if (err) {
 473                 i915_request_put(rq);
 474                 rq = ERR_PTR(err);
 475         }
 476
 477 out_unpin:
 478         intel_timeline_unpin(tl);
 479 out:
 480         if (IS_ERR(rq))
 481                 pr_err("Failed to write to timeline!\n");
 482         return rq;
 483 }
 484
 485 static struct intel_timeline *
 486 checked_intel_timeline_create(struct intel_gt *gt)
 487 {
 488         struct intel_timeline *tl;
 489
 490         tl = intel_timeline_create(gt);
 491         if (IS_ERR(tl))
 492                 return tl;
 493
 494         if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) {
 495                 pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
 496                        *tl->hwsp_seqno, tl->seqno);
 497                 intel_timeline_put(tl);
 498                 return ERR_PTR(-EINVAL);
 499         }
 500
 501         return tl;
 502 }
 503
 504 static int live_hwsp_engine(void *arg)
 505 {
 506 #define NUM_TIMELINES 4096
 507         struct intel_gt *gt = arg;
 508         struct intel_timeline **timelines;
 509         struct intel_engine_cs *engine;
 510         enum intel_engine_id id;
 511         unsigned long count, n;
 512         int err = 0;
 513
 514         /*
 515          * Create a bunch of timelines and check we can write
 516          * independently to each of their breadcrumb slots.
 517          */
 518
 519         timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
 520                                    sizeof(*timelines),
 521                                    GFP_KERNEL);
 522         if (!timelines)
 523                 return -ENOMEM;
 524
 525         count = 0;
 526         for_each_engine(engine, gt, id) {
 527                 if (!intel_engine_can_store_dword(engine))
 528                         continue;
 529
 530                 intel_engine_pm_get(engine);
 531
 532                 for (n = 0; n < NUM_TIMELINES; n++) {
 533                         struct intel_timeline *tl;
 534                         struct i915_request *rq;
 535
 536                         tl = checked_intel_timeline_create(gt);
 537                         if (IS_ERR(tl)) {
 538                                 err = PTR_ERR(tl);
 539                                 break;
 540                         }
 541
 542                         rq = tl_write(tl, engine, count);
 543                         if (IS_ERR(rq)) {
 544                                 intel_timeline_put(tl);
 545                                 err = PTR_ERR(rq);
 546                                 break;
 547                         }
 548
 549                         timelines[count++] = tl;
 550                         i915_request_put(rq);
 551                 }
 552
 553                 intel_engine_pm_put(engine);
 554                 if (err)
 555                         break;
 556         }
 557
 558         if (igt_flush_test(gt->i915))
 559                 err = -EIO;
 560
 561         for (n = 0; n < count; n++) {
 562                 struct intel_timeline *tl = timelines[n];
 563
 564                 if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
 565                         GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
 566                                       n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
 567                         GEM_TRACE_DUMP();
 568                         err = -EINVAL;
 569                 }
 570                 intel_timeline_put(tl);
 571         }
 572
 573         kvfree(timelines);
 574         return err;
 575 #undef NUM_TIMELINES
 576 }
 577
 578 static int live_hwsp_alternate(void *arg)
 579 {
 580 #define NUM_TIMELINES 4096
 581         struct intel_gt *gt = arg;
 582         struct intel_timeline **timelines;
 583         struct intel_engine_cs *engine;
 584         enum intel_engine_id id;
 585         unsigned long count, n;
 586         int err = 0;
 587
 588         /*
 589          * Create a bunch of timelines and check we can write
 590          * independently to each of their breadcrumb slots with adjacent
 591          * engines.
 592          */
 593
 594         timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
 595                                    sizeof(*timelines),
 596                                    GFP_KERNEL);
 597         if (!timelines)
 598                 return -ENOMEM;
 599
 600         count = 0;
 601         for (n = 0; n < NUM_TIMELINES; n++) {
 602                 for_each_engine(engine, gt, id) {
 603                         struct intel_timeline *tl;
 604                         struct i915_request *rq;
 605
 606                         if (!intel_engine_can_store_dword(engine))
 607                                 continue;
 608
 609                         tl = checked_intel_timeline_create(gt);
 610                         if (IS_ERR(tl)) {
 611                                 err = PTR_ERR(tl);
 612                                 goto out;
 613                         }
 614
 615                         intel_engine_pm_get(engine);
 616                         rq = tl_write(tl, engine, count);
 617                         intel_engine_pm_put(engine);
 618                         if (IS_ERR(rq)) {
 619                                 intel_timeline_put(tl);
 620                                 err = PTR_ERR(rq);
 621                                 goto out;
 622                         }
 623
 624                         timelines[count++] = tl;
 625                         i915_request_put(rq);
 626                 }
 627         }
 628
 629 out:
 630         if (igt_flush_test(gt->i915))
 631                 err = -EIO;
 632
 633         for (n = 0; n < count; n++) {
 634                 struct intel_timeline *tl = timelines[n];
 635
 636                 if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
 637                         GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
 638                                       n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
 639                         GEM_TRACE_DUMP();
 640                         err = -EINVAL;
 641                 }
 642                 intel_timeline_put(tl);
 643         }
 644
 645         kvfree(timelines);
 646         return err;
 647 #undef NUM_TIMELINES
 648 }
 649
 650 static int live_hwsp_wrap(void *arg)
 651 {
 652         struct intel_gt *gt = arg;
 653         struct intel_engine_cs *engine;
 654         struct intel_timeline *tl;
 655         enum intel_engine_id id;
 656         int err = 0;
 657
 658         /*
 659          * Across a seqno wrap, we need to keep the old cacheline alive for
 660          * foreign GPU references.
 661          */
 662
 663         tl = intel_timeline_create(gt);
 664         if (IS_ERR(tl))
 665                 return PTR_ERR(tl);
 666
 667         if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
 668                 goto out_free;
 669
 670         err = intel_timeline_pin(tl, NULL);
 671         if (err)
 672                 goto out_free;
 673
 674         for_each_engine(engine, gt, id) {
 675                 const u32 *hwsp_seqno[2];
 676                 struct i915_request *rq;
 677                 u32 seqno[2];
 678
 679                 if (!intel_engine_can_store_dword(engine))
 680                         continue;
 681
 682                 rq = intel_engine_create_kernel_request(engine);
 683                 if (IS_ERR(rq)) {
 684                         err = PTR_ERR(rq);
 685                         goto out;
 686                 }
 687
 688                 tl->seqno = -4u;
 689
 690                 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 691                 err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
 692                 mutex_unlock(&tl->mutex);
 693                 if (err) {
 694                         i915_request_add(rq);
 695                         goto out;
 696                 }
 697                 pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
 698                          seqno[0], tl->hwsp_offset);
 699
 700                 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
 701                 if (err) {
 702                         i915_request_add(rq);
 703                         goto out;
 704                 }
 705                 hwsp_seqno[0] = tl->hwsp_seqno;
 706
 707                 mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 708                 err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
 709                 mutex_unlock(&tl->mutex);
 710                 if (err) {
 711                         i915_request_add(rq);
 712                         goto out;
 713                 }
 714                 pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
 715                          seqno[1], tl->hwsp_offset);
 716
 717                 err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
 718                 if (err) {
 719                         i915_request_add(rq);
 720                         goto out;
 721                 }
 722                 hwsp_seqno[1] = tl->hwsp_seqno;
 723
 724                 /* With wrap should come a new hwsp */
 725                 GEM_BUG_ON(seqno[1] >= seqno[0]);
 726                 GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
 727
 728                 i915_request_add(rq);
 729
 730                 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 731                         pr_err("Wait for timeline writes timed out!\n");
 732                         err = -EIO;
 733                         goto out;
 734                 }
 735
 736                 if (READ_ONCE(*hwsp_seqno[0]) != seqno[0] ||
 737                     READ_ONCE(*hwsp_seqno[1]) != seqno[1]) {
 738                         pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
 739                                *hwsp_seqno[0], *hwsp_seqno[1],
 740                                seqno[0], seqno[1]);
 741                         err = -EINVAL;
 742                         goto out;
 743                 }
 744
 745                 intel_gt_retire_requests(gt); /* recycle HWSP */
 746         }
 747
 748 out:
 749         if (igt_flush_test(gt->i915))
 750                 err = -EIO;
 751
 752         intel_timeline_unpin(tl);
 753 out_free:
 754         intel_timeline_put(tl);
 755         return err;
 756 }
 757
 758 static int live_hwsp_rollover_kernel(void *arg)
 759 {
 760         struct intel_gt *gt = arg;
 761         struct intel_engine_cs *engine;
 762         enum intel_engine_id id;
 763         int err = 0;
 764
 765         /*
 766          * Run the host for long enough, and even the kernel context will
 767          * see a seqno rollover.
 768          */
 769
 770         for_each_engine(engine, gt, id) {
 771                 struct intel_context *ce = engine->kernel_context;
 772                 struct intel_timeline *tl = ce->timeline;
 773                 struct i915_request *rq[3] = {};
 774                 int i;
 775
 776                 st_engine_heartbeat_disable(engine);
 777                 if (intel_gt_wait_for_idle(gt, HZ / 2)) {
 778                         err = -EIO;
 779                         goto out;
 780                 }
 781
 782                 GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
 783                 tl->seqno = 0;
 784                 timeline_rollback(tl);
 785                 timeline_rollback(tl);
 786                 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
 787
 788                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
 789                         struct i915_request *this;
 790
 791                         this = i915_request_create(ce);
 792                         if (IS_ERR(this)) {
 793                                 err = PTR_ERR(this);
 794                                 goto out;
 795                         }
 796
 797                         pr_debug("%s: create fence.seqnp:%d\n",
 798                                  engine->name,
 799                                  lower_32_bits(this->fence.seqno));
 800
 801                         GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
 802
 803                         rq[i] = i915_request_get(this);
 804                         i915_request_add(this);
 805                 }
 806
 807                 /* We expected a wrap! */
 808                 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
 809
 810                 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
 811                         pr_err("Wait for timeline wrap timed out!\n");
 812                         err = -EIO;
 813                         goto out;
 814                 }
 815
 816                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
 817                         if (!i915_request_completed(rq[i])) {
 818                                 pr_err("Pre-wrap request not completed!\n");
 819                                 err = -EINVAL;
 820                                 goto out;
 821                         }
 822                 }
 823
 824 out:
 825                 for (i = 0; i < ARRAY_SIZE(rq); i++)
 826                         i915_request_put(rq[i]);
 827                 st_engine_heartbeat_enable(engine);
 828                 if (err)
 829                         break;
 830         }
 831
 832         if (igt_flush_test(gt->i915))
 833                 err = -EIO;
 834
 835         return err;
 836 }
 837
 838 static int live_hwsp_rollover_user(void *arg)
 839 {
 840         struct intel_gt *gt = arg;
 841         struct intel_engine_cs *engine;
 842         enum intel_engine_id id;
 843         int err = 0;
 844
 845         /*
 846          * Simulate a long running user context, and force the seqno wrap
 847          * on the user's timeline.
 848          */
 849
 850         for_each_engine(engine, gt, id) {
 851                 struct i915_request *rq[3] = {};
 852                 struct intel_timeline *tl;
 853                 struct intel_context *ce;
 854                 int i;
 855
 856                 ce = intel_context_create(engine);
 857                 if (IS_ERR(ce))
 858                         return PTR_ERR(ce);
 859
 860                 err = intel_context_alloc_state(ce);
 861                 if (err)
 862                         goto out;
 863
 864                 tl = ce->timeline;
 865                 if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
 866                         goto out;
 867
 868                 timeline_rollback(tl);
 869                 timeline_rollback(tl);
 870                 WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
 871
 872                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
 873                         struct i915_request *this;
 874
 875                         this = intel_context_create_request(ce);
 876                         if (IS_ERR(this)) {
 877                                 err = PTR_ERR(this);
 878                                 goto out;
 879                         }
 880
 881                         pr_debug("%s: create fence.seqnp:%d\n",
 882                                  engine->name,
 883                                  lower_32_bits(this->fence.seqno));
 884
 885                         GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
 886
 887                         rq[i] = i915_request_get(this);
 888                         i915_request_add(this);
 889                 }
 890
 891                 /* We expected a wrap! */
 892                 GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
 893
 894                 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
 895                         pr_err("Wait for timeline wrap timed out!\n");
 896                         err = -EIO;
 897                         goto out;
 898                 }
 899
 900                 for (i = 0; i < ARRAY_SIZE(rq); i++) {
 901                         if (!i915_request_completed(rq[i])) {
 902                                 pr_err("Pre-wrap request not completed!\n");
 903                                 err = -EINVAL;
 904                                 goto out;
 905                         }
 906                 }
 907
 908 out:
 909                 for (i = 0; i < ARRAY_SIZE(rq); i++)
 910                         i915_request_put(rq[i]);
 911                 intel_context_put(ce);
 912                 if (err)
 913                         break;
 914         }
 915
 916         if (igt_flush_test(gt->i915))
 917                 err = -EIO;
 918
 919         return err;
 920 }
 921
 922 static int live_hwsp_recycle(void *arg)
 923 {
 924         struct intel_gt *gt = arg;
 925         struct intel_engine_cs *engine;
 926         enum intel_engine_id id;
 927         unsigned long count;
 928         int err = 0;
 929
 930         /*
 931          * Check seqno writes into one timeline at a time. We expect to
 932          * recycle the breadcrumb slot between iterations and neither
 933          * want to confuse ourselves or the GPU.
 934          */
 935
 936         count = 0;
 937         for_each_engine(engine, gt, id) {
 938                 IGT_TIMEOUT(end_time);
 939
 940                 if (!intel_engine_can_store_dword(engine))
 941                         continue;
 942
 943                 intel_engine_pm_get(engine);
 944
 945                 do {
 946                         struct intel_timeline *tl;
 947                         struct i915_request *rq;
 948
 949                         tl = checked_intel_timeline_create(gt);
 950                         if (IS_ERR(tl)) {
 951                                 err = PTR_ERR(tl);
 952                                 break;
 953                         }
 954
 955                         rq = tl_write(tl, engine, count);
 956                         if (IS_ERR(rq)) {
 957                                 intel_timeline_put(tl);
 958                                 err = PTR_ERR(rq);
 959                                 break;
 960                         }
 961
 962                         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 963                                 pr_err("Wait for timeline writes timed out!\n");
 964                                 i915_request_put(rq);
 965                                 intel_timeline_put(tl);
 966                                 err = -EIO;
 967                                 break;
 968                         }
 969
 970                         if (READ_ONCE(*tl->hwsp_seqno) != count) {
 971                                 GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n",
 972                                               count, tl->fence_context,
 973                                               tl->hwsp_offset, *tl->hwsp_seqno);
 974                                 GEM_TRACE_DUMP();
 975                                 err = -EINVAL;
 976                         }
 977
 978                         i915_request_put(rq);
 979                         intel_timeline_put(tl);
 980                         count++;
 981
 982                         if (err)
 983                                 break;
 984                 } while (!__igt_timeout(end_time, NULL));
 985
 986                 intel_engine_pm_put(engine);
 987                 if (err)
 988                         break;
 989         }
 990
 991         return err;
 992 }
 993
 994 int intel_timeline_live_selftests(struct drm_i915_private *i915)
 995 {
 996         static const struct i915_subtest tests[] = {
 997                 SUBTEST(live_hwsp_recycle),
 998                 SUBTEST(live_hwsp_engine),
 999                 SUBTEST(live_hwsp_alternate),
1000                 SUBTEST(live_hwsp_wrap),
1001                 SUBTEST(live_hwsp_rollover_kernel),
1002                 SUBTEST(live_hwsp_rollover_user),
1003         };
1004
1005         if (intel_gt_is_wedged(&i915->gt))
1006                 return 0;
1007
1008         return intel_gt_live_subtests(tests, &i915->gt);
1009 }