drivers/gpu/drm/i915/selftests/i915_perf.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2019 Intel Corporation
   5  */
   6
   7 #include <linux/kref.h>
   8
   9 #include "gem/i915_gem_pm.h"
  10 #include "gt/intel_gt.h"
  11
  12 #include "i915_selftest.h"
  13
  14 #include "igt_flush_test.h"
  15 #include "lib_sw_fence.h"
  16
  17 #define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab"
  18
  19 static int
  20 alloc_empty_config(struct i915_perf *perf)
  21 {
  22         struct i915_oa_config *oa_config;
  23
  24         oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
  25         if (!oa_config)
  26                 return -ENOMEM;
  27
  28         oa_config->perf = perf;
  29         kref_init(&oa_config->ref);
  30
  31         strlcpy(oa_config->uuid, TEST_OA_CONFIG_UUID, sizeof(oa_config->uuid));
  32
  33         mutex_lock(&perf->metrics_lock);
  34
  35         oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL);
  36         if (oa_config->id < 0)  {
  37                 mutex_unlock(&perf->metrics_lock);
  38                 i915_oa_config_put(oa_config);
  39                 return -ENOMEM;
  40         }
  41
  42         mutex_unlock(&perf->metrics_lock);
  43
  44         return 0;
  45 }
  46
  47 static void
  48 destroy_empty_config(struct i915_perf *perf)
  49 {
  50         struct i915_oa_config *oa_config = NULL, *tmp;
  51         int id;
  52
  53         mutex_lock(&perf->metrics_lock);
  54
  55         idr_for_each_entry(&perf->metrics_idr, tmp, id) {
  56                 if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
  57                         oa_config = tmp;
  58                         break;
  59                 }
  60         }
  61
  62         if (oa_config)
  63                 idr_remove(&perf->metrics_idr, oa_config->id);
  64
  65         mutex_unlock(&perf->metrics_lock);
  66
  67         if (oa_config)
  68                 i915_oa_config_put(oa_config);
  69 }
  70
  71 static struct i915_oa_config *
  72 get_empty_config(struct i915_perf *perf)
  73 {
  74         struct i915_oa_config *oa_config = NULL, *tmp;
  75         int id;
  76
  77         mutex_lock(&perf->metrics_lock);
  78
  79         idr_for_each_entry(&perf->metrics_idr, tmp, id) {
  80                 if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
  81                         oa_config = i915_oa_config_get(tmp);
  82                         break;
  83                 }
  84         }
  85
  86         mutex_unlock(&perf->metrics_lock);
  87
  88         return oa_config;
  89 }
  90
  91 static struct i915_perf_stream *
  92 test_stream(struct i915_perf *perf)
  93 {
  94         struct drm_i915_perf_open_param param = {};
  95         struct i915_oa_config *oa_config = get_empty_config(perf);
  96         struct perf_open_properties props = {
  97                 .engine = intel_engine_lookup_user(perf->i915,
  98                                                    I915_ENGINE_CLASS_RENDER,
  99                                                    0),
 100                 .sample_flags = SAMPLE_OA_REPORT,
 101                 .oa_format = IS_GEN(perf->i915, 12) ?
 102                 I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8,
 103         };
 104         struct i915_perf_stream *stream;
 105
 106         if (!oa_config)
 107                 return NULL;
 108
 109         props.metrics_set = oa_config->id;
 110
 111         stream = kzalloc(sizeof(*stream), GFP_KERNEL);
 112         if (!stream) {
 113                 i915_oa_config_put(oa_config);
 114                 return NULL;
 115         }
 116
 117         stream->perf = perf;
 118
 119         mutex_lock(&perf->lock);
 120         if (i915_oa_stream_init(stream, &param, &props)) {
 121                 kfree(stream);
 122                 stream =  NULL;
 123         }
 124         mutex_unlock(&perf->lock);
 125
 126         i915_oa_config_put(oa_config);
 127
 128         return stream;
 129 }
 130
 131 static void stream_destroy(struct i915_perf_stream *stream)
 132 {
 133         struct i915_perf *perf = stream->perf;
 134
 135         mutex_lock(&perf->lock);
 136         i915_perf_destroy_locked(stream);
 137         mutex_unlock(&perf->lock);
 138 }
 139
 140 static int live_sanitycheck(void *arg)
 141 {
 142         struct drm_i915_private *i915 = arg;
 143         struct i915_perf_stream *stream;
 144
 145         /* Quick check we can create a perf stream */
 146
 147         stream = test_stream(&i915->perf);
 148         if (!stream)
 149                 return -EINVAL;
 150
 151         stream_destroy(stream);
 152         return 0;
 153 }
 154
 155 static int write_timestamp(struct i915_request *rq, int slot)
 156 {
 157         u32 *cs;
 158         int len;
 159
 160         cs = intel_ring_begin(rq, 6);
 161         if (IS_ERR(cs))
 162                 return PTR_ERR(cs);
 163
 164         len = 5;
 165         if (INTEL_GEN(rq->engine->i915) >= 8)
 166                 len++;
 167
 168         *cs++ = GFX_OP_PIPE_CONTROL(len);
 169         *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
 170                 PIPE_CONTROL_STORE_DATA_INDEX |
 171                 PIPE_CONTROL_WRITE_TIMESTAMP;
 172         *cs++ = slot * sizeof(u32);
 173         *cs++ = 0;
 174         *cs++ = 0;
 175         *cs++ = 0;
 176
 177         intel_ring_advance(rq, cs);
 178
 179         return 0;
 180 }
 181
 182 static ktime_t poll_status(struct i915_request *rq, int slot)
 183 {
 184         while (!intel_read_status_page(rq->engine, slot) &&
 185                !i915_request_completed(rq))
 186                 cpu_relax();
 187
 188         return ktime_get();
 189 }
 190
 191 static int live_noa_delay(void *arg)
 192 {
 193         struct drm_i915_private *i915 = arg;
 194         struct i915_perf_stream *stream;
 195         struct i915_request *rq;
 196         ktime_t t0, t1;
 197         u64 expected;
 198         u32 delay;
 199         int err;
 200         int i;
 201
 202         /* Check that the GPU delays matches expectations */
 203
 204         stream = test_stream(&i915->perf);
 205         if (!stream)
 206                 return -ENOMEM;
 207
 208         expected = atomic64_read(&stream->perf->noa_programming_delay);
 209
 210         if (stream->engine->class != RENDER_CLASS) {
 211                 err = -ENODEV;
 212                 goto out;
 213         }
 214
 215         for (i = 0; i < 4; i++)
 216                 intel_write_status_page(stream->engine, 0x100 + i, 0);
 217
 218         rq = intel_engine_create_kernel_request(stream->engine);
 219         if (IS_ERR(rq)) {
 220                 err = PTR_ERR(rq);
 221                 goto out;
 222         }
 223
 224         if (rq->engine->emit_init_breadcrumb) {
 225                 err = rq->engine->emit_init_breadcrumb(rq);
 226                 if (err) {
 227                         i915_request_add(rq);
 228                         goto out;
 229                 }
 230         }
 231
 232         err = write_timestamp(rq, 0x100);
 233         if (err) {
 234                 i915_request_add(rq);
 235                 goto out;
 236         }
 237
 238         err = rq->engine->emit_bb_start(rq,
 239                                         i915_ggtt_offset(stream->noa_wait), 0,
 240                                         I915_DISPATCH_SECURE);
 241         if (err) {
 242                 i915_request_add(rq);
 243                 goto out;
 244         }
 245
 246         err = write_timestamp(rq, 0x102);
 247         if (err) {
 248                 i915_request_add(rq);
 249                 goto out;
 250         }
 251
 252         i915_request_get(rq);
 253         i915_request_add(rq);
 254
 255         preempt_disable();
 256         t0 = poll_status(rq, 0x100);
 257         t1 = poll_status(rq, 0x102);
 258         preempt_enable();
 259
 260         pr_info("CPU delay: %lluns, expected %lluns\n",
 261                 ktime_sub(t1, t0), expected);
 262
 263         delay = intel_read_status_page(stream->engine, 0x102);
 264         delay -= intel_read_status_page(stream->engine, 0x100);
 265         delay = i915_cs_timestamp_ticks_to_ns(i915, delay);
 266         pr_info("GPU delay: %uns, expected %lluns\n",
 267                 delay, expected);
 268
 269         if (4 * delay < 3 * expected || 2 * delay > 3 * expected) {
 270                 pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
 271                        delay / 1000,
 272                        div_u64(3 * expected, 4000),
 273                        div_u64(3 * expected, 2000));
 274                 err = -EINVAL;
 275         }
 276
 277         i915_request_put(rq);
 278 out:
 279         stream_destroy(stream);
 280         return err;
 281 }
 282
 283 static int live_noa_gpr(void *arg)
 284 {
 285         struct drm_i915_private *i915 = arg;
 286         struct i915_perf_stream *stream;
 287         struct intel_context *ce;
 288         struct i915_request *rq;
 289         u32 *cs, *store;
 290         void *scratch;
 291         u32 gpr0;
 292         int err;
 293         int i;
 294
 295         /* Check that the delay does not clobber user context state (GPR) */
 296
 297         stream = test_stream(&i915->perf);
 298         if (!stream)
 299                 return -ENOMEM;
 300
 301         gpr0 = i915_mmio_reg_offset(GEN8_RING_CS_GPR(stream->engine->mmio_base, 0));
 302
 303         ce = intel_context_create(stream->engine);
 304         if (IS_ERR(ce)) {
 305                 err = PTR_ERR(ce);
 306                 goto out;
 307         }
 308
 309         /* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */
 310         scratch = kmap(__px_page(ce->vm->scratch[0]));
 311         memset(scratch, POISON_FREE, PAGE_SIZE);
 312
 313         rq = intel_context_create_request(ce);
 314         if (IS_ERR(rq)) {
 315                 err = PTR_ERR(rq);
 316                 goto out_ce;
 317         }
 318         i915_request_get(rq);
 319
 320         if (rq->engine->emit_init_breadcrumb) {
 321                 err = rq->engine->emit_init_breadcrumb(rq);
 322                 if (err) {
 323                         i915_request_add(rq);
 324                         goto out_rq;
 325                 }
 326         }
 327
 328         /* Fill the 16 qword [32 dword] GPR with a known unlikely value */
 329         cs = intel_ring_begin(rq, 2 * 32 + 2);
 330         if (IS_ERR(cs)) {
 331                 err = PTR_ERR(cs);
 332                 i915_request_add(rq);
 333                 goto out_rq;
 334         }
 335
 336         *cs++ = MI_LOAD_REGISTER_IMM(32);
 337         for (i = 0; i < 32; i++) {
 338                 *cs++ = gpr0 + i * sizeof(u32);
 339                 *cs++ = STACK_MAGIC;
 340         }
 341         *cs++ = MI_NOOP;
 342         intel_ring_advance(rq, cs);
 343
 344         /* Execute the GPU delay */
 345         err = rq->engine->emit_bb_start(rq,
 346                                         i915_ggtt_offset(stream->noa_wait), 0,
 347                                         I915_DISPATCH_SECURE);
 348         if (err) {
 349                 i915_request_add(rq);
 350                 goto out_rq;
 351         }
 352
 353         /* Read the GPR back, using the pinned global HWSP for convenience */
 354         store = memset32(rq->engine->status_page.addr + 512, 0, 32);
 355         for (i = 0; i < 32; i++) {
 356                 u32 cmd;
 357
 358                 cs = intel_ring_begin(rq, 4);
 359                 if (IS_ERR(cs)) {
 360                         err = PTR_ERR(cs);
 361                         i915_request_add(rq);
 362                         goto out_rq;
 363                 }
 364
 365                 cmd = MI_STORE_REGISTER_MEM;
 366                 if (INTEL_GEN(i915) >= 8)
 367                         cmd++;
 368                 cmd |= MI_USE_GGTT;
 369
 370                 *cs++ = cmd;
 371                 *cs++ = gpr0 + i * sizeof(u32);
 372                 *cs++ = i915_ggtt_offset(rq->engine->status_page.vma) +
 373                         offset_in_page(store) +
 374                         i * sizeof(u32);
 375                 *cs++ = 0;
 376                 intel_ring_advance(rq, cs);
 377         }
 378
 379         i915_request_add(rq);
 380
 381         if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, HZ / 2) < 0) {
 382                 pr_err("noa_wait timed out\n");
 383                 intel_gt_set_wedged(stream->engine->gt);
 384                 err = -EIO;
 385                 goto out_rq;
 386         }
 387
 388         /* Verify that the GPR contain our expected values */
 389         for (i = 0; i < 32; i++) {
 390                 if (store[i] == STACK_MAGIC)
 391                         continue;
 392
 393                 pr_err("GPR[%d] lost, found:%08x, expected:%08x!\n",
 394                        i, store[i], STACK_MAGIC);
 395                 err = -EINVAL;
 396         }
 397
 398         /* Verify that the user's scratch page was not used for GPR storage */
 399         if (memchr_inv(scratch, POISON_FREE, PAGE_SIZE)) {
 400                 pr_err("Scratch page overwritten!\n");
 401                 igt_hexdump(scratch, 4096);
 402                 err = -EINVAL;
 403         }
 404
 405 out_rq:
 406         i915_request_put(rq);
 407 out_ce:
 408         kunmap(__px_page(ce->vm->scratch[0]));
 409         intel_context_put(ce);
 410 out:
 411         stream_destroy(stream);
 412         return err;
 413 }
 414
 415 int i915_perf_live_selftests(struct drm_i915_private *i915)
 416 {
 417         static const struct i915_subtest tests[] = {
 418                 SUBTEST(live_sanitycheck),
 419                 SUBTEST(live_noa_delay),
 420                 SUBTEST(live_noa_gpr),
 421         };
 422         struct i915_perf *perf = &i915->perf;
 423         int err;
 424
 425         if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
 426                 return 0;
 427
 428         if (intel_gt_is_wedged(&i915->gt))
 429                 return 0;
 430
 431         err = alloc_empty_config(&i915->perf);
 432         if (err)
 433                 return err;
 434
 435         err = i915_subtests(tests, i915);
 436
 437         destroy_empty_config(&i915->perf);
 438
 439         return err;
 440 }