drivers/gpu/drm/i915/gt/intel_ring.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2019 Intel Corporation
   5  */
   6
   7 #include "gem/i915_gem_object.h"
   8 #include "i915_drv.h"
   9 #include "i915_vma.h"
  10 #include "intel_engine.h"
  11 #include "intel_ring.h"
  12 #include "intel_timeline.h"
  13
  14 unsigned int intel_ring_update_space(struct intel_ring *ring)
  15 {
  16         unsigned int space;
  17
  18         space = __intel_ring_space(ring->head, ring->emit, ring->size);
  19
  20         ring->space = space;
  21         return space;
  22 }
  23
  24 void __intel_ring_pin(struct intel_ring *ring)
  25 {
  26         GEM_BUG_ON(!atomic_read(&ring->pin_count));
  27         atomic_inc(&ring->pin_count);
  28 }
  29
  30 int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww)
  31 {
  32         struct i915_vma *vma = ring->vma;
  33         unsigned int flags;
  34         void *addr;
  35         int ret;
  36
  37         if (atomic_fetch_inc(&ring->pin_count))
  38                 return 0;
  39
  40         /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
  41         flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
  42
  43         if (vma->obj->stolen)
  44                 flags |= PIN_MAPPABLE;
  45         else
  46                 flags |= PIN_HIGH;
  47
  48         ret = i915_ggtt_pin(vma, ww, 0, flags);
  49         if (unlikely(ret))
  50                 goto err_unpin;
  51
  52         if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915))
  53                 addr = (void __force *)i915_vma_pin_iomap(vma);
  54         else
  55                 addr = i915_gem_object_pin_map(vma->obj,
  56                                                i915_coherent_map_type(vma->vm->i915));
  57         if (IS_ERR(addr)) {
  58                 ret = PTR_ERR(addr);
  59                 goto err_ring;
  60         }
  61
  62         i915_vma_make_unshrinkable(vma);
  63
  64         /* Discard any unused bytes beyond that submitted to hw. */
  65         intel_ring_reset(ring, ring->emit);
  66
  67         ring->vaddr = addr;
  68         return 0;
  69
  70 err_ring:
  71         i915_vma_unpin(vma);
  72 err_unpin:
  73         atomic_dec(&ring->pin_count);
  74         return ret;
  75 }
  76
  77 void intel_ring_reset(struct intel_ring *ring, u32 tail)
  78 {
  79         tail = intel_ring_wrap(ring, tail);
  80         ring->tail = tail;
  81         ring->head = tail;
  82         ring->emit = tail;
  83         intel_ring_update_space(ring);
  84 }
  85
  86 void intel_ring_unpin(struct intel_ring *ring)
  87 {
  88         struct i915_vma *vma = ring->vma;
  89
  90         if (!atomic_dec_and_test(&ring->pin_count))
  91                 return;
  92
  93         i915_vma_unset_ggtt_write(vma);
  94         if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915))
  95                 i915_vma_unpin_iomap(vma);
  96         else
  97                 i915_gem_object_unpin_map(vma->obj);
  98
  99         i915_vma_make_purgeable(vma);
 100         i915_vma_unpin(vma);
 101 }
 102
 103 static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
 104 {
 105         struct i915_address_space *vm = &ggtt->vm;
 106         struct drm_i915_private *i915 = vm->i915;
 107         struct drm_i915_gem_object *obj;
 108         struct i915_vma *vma;
 109
 110         obj = ERR_PTR(-ENODEV);
 111         if (i915_ggtt_has_aperture(ggtt) && !HAS_LLC(i915))
 112                 obj = i915_gem_object_create_stolen(i915, size);
 113         if (IS_ERR(obj))
 114                 obj = i915_gem_object_create_internal(i915, size);
 115         if (IS_ERR(obj))
 116                 return ERR_CAST(obj);
 117
 118         /*
 119          * Mark ring buffers as read-only from GPU side (so no stray overwrites)
 120          * if supported by the platform's GGTT.
 121          */
 122         if (vm->has_read_only)
 123                 i915_gem_object_set_readonly(obj);
 124
 125         vma = i915_vma_instance(obj, vm, NULL);
 126         if (IS_ERR(vma))
 127                 goto err;
 128
 129         return vma;
 130
 131 err:
 132         i915_gem_object_put(obj);
 133         return vma;
 134 }
 135
 136 struct intel_ring *
 137 intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 138 {
 139         struct drm_i915_private *i915 = engine->i915;
 140         struct intel_ring *ring;
 141         struct i915_vma *vma;
 142
 143         GEM_BUG_ON(!is_power_of_2(size));
 144         GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
 145
 146         ring = kzalloc(sizeof(*ring), GFP_KERNEL);
 147         if (!ring)
 148                 return ERR_PTR(-ENOMEM);
 149
 150         kref_init(&ring->ref);
 151         ring->size = size;
 152         ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(size);
 153
 154         /*
 155          * Workaround an erratum on the i830 which causes a hang if
 156          * the TAIL pointer points to within the last 2 cachelines
 157          * of the buffer.
 158          */
 159         ring->effective_size = size;
 160         if (IS_I830(i915) || IS_I845G(i915))
 161                 ring->effective_size -= 2 * CACHELINE_BYTES;
 162
 163         intel_ring_update_space(ring);
 164
 165         vma = create_ring_vma(engine->gt->ggtt, size);
 166         if (IS_ERR(vma)) {
 167                 kfree(ring);
 168                 return ERR_CAST(vma);
 169         }
 170         ring->vma = vma;
 171
 172         return ring;
 173 }
 174
 175 void intel_ring_free(struct kref *ref)
 176 {
 177         struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
 178
 179         i915_vma_put(ring->vma);
 180         kfree(ring);
 181 }
 182
 183 static noinline int
 184 wait_for_space(struct intel_ring *ring,
 185                struct intel_timeline *tl,
 186                unsigned int bytes)
 187 {
 188         struct i915_request *target;
 189         long timeout;
 190
 191         if (intel_ring_update_space(ring) >= bytes)
 192                 return 0;
 193
 194         GEM_BUG_ON(list_empty(&tl->requests));
 195         list_for_each_entry(target, &tl->requests, link) {
 196                 if (target->ring != ring)
 197                         continue;
 198
 199                 /* Would completion of this request free enough space? */
 200                 if (bytes <= __intel_ring_space(target->postfix,
 201                                                 ring->emit, ring->size))
 202                         break;
 203         }
 204
 205         if (GEM_WARN_ON(&target->link == &tl->requests))
 206                 return -ENOSPC;
 207
 208         timeout = i915_request_wait(target,
 209                                     I915_WAIT_INTERRUPTIBLE,
 210                                     MAX_SCHEDULE_TIMEOUT);
 211         if (timeout < 0)
 212                 return timeout;
 213
 214         i915_request_retire_upto(target);
 215
 216         intel_ring_update_space(ring);
 217         GEM_BUG_ON(ring->space < bytes);
 218         return 0;
 219 }
 220
 221 u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
 222 {
 223         struct intel_ring *ring = rq->ring;
 224         const unsigned int remain_usable = ring->effective_size - ring->emit;
 225         const unsigned int bytes = num_dwords * sizeof(u32);
 226         unsigned int need_wrap = 0;
 227         unsigned int total_bytes;
 228         u32 *cs;
 229
 230         /* Packets must be qword aligned. */
 231         GEM_BUG_ON(num_dwords & 1);
 232
 233         total_bytes = bytes + rq->reserved_space;
 234         GEM_BUG_ON(total_bytes > ring->effective_size);
 235
 236         if (unlikely(total_bytes > remain_usable)) {
 237                 const int remain_actual = ring->size - ring->emit;
 238
 239                 if (bytes > remain_usable) {
 240                         /*
 241                          * Not enough space for the basic request. So need to
 242                          * flush out the remainder and then wait for
 243                          * base + reserved.
 244                          */
 245                         total_bytes += remain_actual;
 246                         need_wrap = remain_actual | 1;
 247                 } else  {
 248                         /*
 249                          * The base request will fit but the reserved space
 250                          * falls off the end. So we don't need an immediate
 251                          * wrap and only need to effectively wait for the
 252                          * reserved size from the start of ringbuffer.
 253                          */
 254                         total_bytes = rq->reserved_space + remain_actual;
 255                 }
 256         }
 257
 258         if (unlikely(total_bytes > ring->space)) {
 259                 int ret;
 260
 261                 /*
 262                  * Space is reserved in the ringbuffer for finalising the
 263                  * request, as that cannot be allowed to fail. During request
 264                  * finalisation, reserved_space is set to 0 to stop the
 265                  * overallocation and the assumption is that then we never need
 266                  * to wait (which has the risk of failing with EINTR).
 267                  *
 268                  * See also i915_request_alloc() and i915_request_add().
 269                  */
 270                 GEM_BUG_ON(!rq->reserved_space);
 271
 272                 ret = wait_for_space(ring,
 273                                      i915_request_timeline(rq),
 274                                      total_bytes);
 275                 if (unlikely(ret))
 276                         return ERR_PTR(ret);
 277         }
 278
 279         if (unlikely(need_wrap)) {
 280                 need_wrap &= ~1;
 281                 GEM_BUG_ON(need_wrap > ring->space);
 282                 GEM_BUG_ON(ring->emit + need_wrap > ring->size);
 283                 GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
 284
 285                 /* Fill the tail with MI_NOOP */
 286                 memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
 287                 ring->space -= need_wrap;
 288                 ring->emit = 0;
 289         }
 290
 291         GEM_BUG_ON(ring->emit > ring->size - bytes);
 292         GEM_BUG_ON(ring->space < bytes);
 293         cs = ring->vaddr + ring->emit;
 294         GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
 295         ring->emit += bytes;
 296         ring->space -= bytes;
 297
 298         return cs;
 299 }
 300
 301 /* Align the ring tail to a cacheline boundary */
 302 int intel_ring_cacheline_align(struct i915_request *rq)
 303 {
 304         int num_dwords;
 305         void *cs;
 306
 307         num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
 308         if (num_dwords == 0)
 309                 return 0;
 310
 311         num_dwords = CACHELINE_DWORDS - num_dwords;
 312         GEM_BUG_ON(num_dwords & 1);
 313
 314         cs = intel_ring_begin(rq, num_dwords);
 315         if (IS_ERR(cs))
 316                 return PTR_ERR(cs);
 317
 318         memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
 319         intel_ring_advance(rq, cs + num_dwords);
 320
 321         GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
 322         return 0;
 323 }
 324
 325 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 326 #include "selftest_ring.c"
 327 #endif