Mention branches and keyring.
[releases.git] / gem / i915_gem_object_blt.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5
6 #include "i915_drv.h"
7 #include "gt/intel_context.h"
8 #include "gt/intel_engine_pm.h"
9 #include "gt/intel_gt.h"
10 #include "gt/intel_gt_buffer_pool.h"
11 #include "gt/intel_ring.h"
12 #include "i915_gem_clflush.h"
13 #include "i915_gem_object_blt.h"
14
15 struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
16                                          struct i915_vma *vma,
17                                          struct i915_gem_ww_ctx *ww,
18                                          u32 value)
19 {
20         struct drm_i915_private *i915 = ce->vm->i915;
21         const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
22         struct intel_gt_buffer_pool_node *pool;
23         struct i915_vma *batch;
24         u64 offset;
25         u64 count;
26         u64 rem;
27         u32 size;
28         u32 *cmd;
29         int err;
30
31         GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
32         intel_engine_pm_get(ce->engine);
33
34         count = div_u64(round_up(vma->size, block_size), block_size);
35         size = (1 + 8 * count) * sizeof(u32);
36         size = round_up(size, PAGE_SIZE);
37         pool = intel_gt_get_buffer_pool(ce->engine->gt, size);
38         if (IS_ERR(pool)) {
39                 err = PTR_ERR(pool);
40                 goto out_pm;
41         }
42
43         err = i915_gem_object_lock(pool->obj, ww);
44         if (err)
45                 goto out_put;
46
47         batch = i915_vma_instance(pool->obj, ce->vm, NULL);
48         if (IS_ERR(batch)) {
49                 err = PTR_ERR(batch);
50                 goto out_put;
51         }
52
53         err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
54         if (unlikely(err))
55                 goto out_put;
56
57         cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
58         if (IS_ERR(cmd)) {
59                 err = PTR_ERR(cmd);
60                 goto out_unpin;
61         }
62
63         rem = vma->size;
64         offset = vma->node.start;
65
66         do {
67                 u32 size = min_t(u64, rem, block_size);
68
69                 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
70
71                 if (INTEL_GEN(i915) >= 8) {
72                         *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
73                         *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
74                         *cmd++ = 0;
75                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
76                         *cmd++ = lower_32_bits(offset);
77                         *cmd++ = upper_32_bits(offset);
78                         *cmd++ = value;
79                 } else {
80                         *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
81                         *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
82                         *cmd++ = 0;
83                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
84                         *cmd++ = offset;
85                         *cmd++ = value;
86                 }
87
88                 /* Allow ourselves to be preempted in between blocks. */
89                 *cmd++ = MI_ARB_CHECK;
90
91                 offset += size;
92                 rem -= size;
93         } while (rem);
94
95         *cmd = MI_BATCH_BUFFER_END;
96
97         i915_gem_object_flush_map(pool->obj);
98         i915_gem_object_unpin_map(pool->obj);
99
100         intel_gt_chipset_flush(ce->vm->gt);
101
102         batch->private = pool;
103         return batch;
104
105 out_unpin:
106         i915_vma_unpin(batch);
107 out_put:
108         intel_gt_buffer_pool_put(pool);
109 out_pm:
110         intel_engine_pm_put(ce->engine);
111         return ERR_PTR(err);
112 }
113
114 int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
115 {
116         int err;
117
118         err = i915_request_await_object(rq, vma->obj, false);
119         if (err == 0)
120                 err = i915_vma_move_to_active(vma, rq, 0);
121         if (unlikely(err))
122                 return err;
123
124         return intel_gt_buffer_pool_mark_active(vma->private, rq);
125 }
126
127 void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
128 {
129         i915_vma_unpin(vma);
130         intel_gt_buffer_pool_put(vma->private);
131         intel_engine_pm_put(ce->engine);
132 }
133
134 static int
135 move_obj_to_gpu(struct drm_i915_gem_object *obj,
136                 struct i915_request *rq,
137                 bool write)
138 {
139         if (obj->cache_dirty & ~obj->cache_coherent)
140                 i915_gem_clflush_object(obj, 0);
141
142         return i915_request_await_object(rq, obj, write);
143 }
144
145 int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
146                              struct intel_context *ce,
147                              u32 value)
148 {
149         struct i915_gem_ww_ctx ww;
150         struct i915_request *rq;
151         struct i915_vma *batch;
152         struct i915_vma *vma;
153         int err;
154
155         vma = i915_vma_instance(obj, ce->vm, NULL);
156         if (IS_ERR(vma))
157                 return PTR_ERR(vma);
158
159         i915_gem_ww_ctx_init(&ww, true);
160         intel_engine_pm_get(ce->engine);
161 retry:
162         err = i915_gem_object_lock(obj, &ww);
163         if (err)
164                 goto out;
165
166         err = intel_context_pin_ww(ce, &ww);
167         if (err)
168                 goto out;
169
170         err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
171         if (err)
172                 goto out_ctx;
173
174         batch = intel_emit_vma_fill_blt(ce, vma, &ww, value);
175         if (IS_ERR(batch)) {
176                 err = PTR_ERR(batch);
177                 goto out_vma;
178         }
179
180         rq = i915_request_create(ce);
181         if (IS_ERR(rq)) {
182                 err = PTR_ERR(rq);
183                 goto out_batch;
184         }
185
186         err = intel_emit_vma_mark_active(batch, rq);
187         if (unlikely(err))
188                 goto out_request;
189
190         err = move_obj_to_gpu(vma->obj, rq, true);
191         if (err == 0)
192                 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
193         if (unlikely(err))
194                 goto out_request;
195
196         if (ce->engine->emit_init_breadcrumb)
197                 err = ce->engine->emit_init_breadcrumb(rq);
198
199         if (likely(!err))
200                 err = ce->engine->emit_bb_start(rq,
201                                                 batch->node.start,
202                                                 batch->node.size,
203                                                 0);
204 out_request:
205         if (unlikely(err))
206                 i915_request_set_error_once(rq, err);
207
208         i915_request_add(rq);
209 out_batch:
210         intel_emit_vma_release(ce, batch);
211 out_vma:
212         i915_vma_unpin(vma);
213 out_ctx:
214         intel_context_unpin(ce);
215 out:
216         if (err == -EDEADLK) {
217                 err = i915_gem_ww_ctx_backoff(&ww);
218                 if (!err)
219                         goto retry;
220         }
221         i915_gem_ww_ctx_fini(&ww);
222         intel_engine_pm_put(ce->engine);
223         return err;
224 }
225
226 /* Wa_1209644611:icl,ehl */
227 static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size)
228 {
229         u32 height = size >> PAGE_SHIFT;
230
231         if (!IS_GEN(i915, 11))
232                 return false;
233
234         return height % 4 == 3 && height <= 8;
235 }
236
237 struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
238                                          struct i915_gem_ww_ctx *ww,
239                                          struct i915_vma *src,
240                                          struct i915_vma *dst)
241 {
242         struct drm_i915_private *i915 = ce->vm->i915;
243         const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
244         struct intel_gt_buffer_pool_node *pool;
245         struct i915_vma *batch;
246         u64 src_offset, dst_offset;
247         u64 count, rem;
248         u32 size, *cmd;
249         int err;
250
251         GEM_BUG_ON(src->size != dst->size);
252
253         GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
254         intel_engine_pm_get(ce->engine);
255
256         count = div_u64(round_up(dst->size, block_size), block_size);
257         size = (1 + 11 * count) * sizeof(u32);
258         size = round_up(size, PAGE_SIZE);
259         pool = intel_gt_get_buffer_pool(ce->engine->gt, size);
260         if (IS_ERR(pool)) {
261                 err = PTR_ERR(pool);
262                 goto out_pm;
263         }
264
265         err = i915_gem_object_lock(pool->obj, ww);
266         if (err)
267                 goto out_put;
268
269         batch = i915_vma_instance(pool->obj, ce->vm, NULL);
270         if (IS_ERR(batch)) {
271                 err = PTR_ERR(batch);
272                 goto out_put;
273         }
274
275         err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
276         if (unlikely(err))
277                 goto out_put;
278
279         cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
280         if (IS_ERR(cmd)) {
281                 err = PTR_ERR(cmd);
282                 goto out_unpin;
283         }
284
285         rem = src->size;
286         src_offset = src->node.start;
287         dst_offset = dst->node.start;
288
289         do {
290                 size = min_t(u64, rem, block_size);
291                 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
292
293                 if (INTEL_GEN(i915) >= 9 &&
294                     !wa_1209644611_applies(i915, size)) {
295                         *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
296                         *cmd++ = BLT_DEPTH_32 | PAGE_SIZE;
297                         *cmd++ = 0;
298                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
299                         *cmd++ = lower_32_bits(dst_offset);
300                         *cmd++ = upper_32_bits(dst_offset);
301                         *cmd++ = 0;
302                         *cmd++ = PAGE_SIZE;
303                         *cmd++ = lower_32_bits(src_offset);
304                         *cmd++ = upper_32_bits(src_offset);
305                 } else if (INTEL_GEN(i915) >= 8) {
306                         *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
307                         *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
308                         *cmd++ = 0;
309                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
310                         *cmd++ = lower_32_bits(dst_offset);
311                         *cmd++ = upper_32_bits(dst_offset);
312                         *cmd++ = 0;
313                         *cmd++ = PAGE_SIZE;
314                         *cmd++ = lower_32_bits(src_offset);
315                         *cmd++ = upper_32_bits(src_offset);
316                 } else {
317                         *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
318                         *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
319                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
320                         *cmd++ = dst_offset;
321                         *cmd++ = PAGE_SIZE;
322                         *cmd++ = src_offset;
323                 }
324
325                 /* Allow ourselves to be preempted in between blocks. */
326                 *cmd++ = MI_ARB_CHECK;
327
328                 src_offset += size;
329                 dst_offset += size;
330                 rem -= size;
331         } while (rem);
332
333         *cmd = MI_BATCH_BUFFER_END;
334
335         i915_gem_object_flush_map(pool->obj);
336         i915_gem_object_unpin_map(pool->obj);
337
338         intel_gt_chipset_flush(ce->vm->gt);
339         batch->private = pool;
340         return batch;
341
342 out_unpin:
343         i915_vma_unpin(batch);
344 out_put:
345         intel_gt_buffer_pool_put(pool);
346 out_pm:
347         intel_engine_pm_put(ce->engine);
348         return ERR_PTR(err);
349 }
350
351 int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
352                              struct drm_i915_gem_object *dst,
353                              struct intel_context *ce)
354 {
355         struct i915_address_space *vm = ce->vm;
356         struct i915_vma *vma[2], *batch;
357         struct i915_gem_ww_ctx ww;
358         struct i915_request *rq;
359         int err, i;
360
361         vma[0] = i915_vma_instance(src, vm, NULL);
362         if (IS_ERR(vma[0]))
363                 return PTR_ERR(vma[0]);
364
365         vma[1] = i915_vma_instance(dst, vm, NULL);
366         if (IS_ERR(vma[1]))
367                 return PTR_ERR(vma[1]);
368
369         i915_gem_ww_ctx_init(&ww, true);
370         intel_engine_pm_get(ce->engine);
371 retry:
372         err = i915_gem_object_lock(src, &ww);
373         if (!err)
374                 err = i915_gem_object_lock(dst, &ww);
375         if (!err)
376                 err = intel_context_pin_ww(ce, &ww);
377         if (err)
378                 goto out;
379
380         err = i915_vma_pin_ww(vma[0], &ww, 0, 0, PIN_USER);
381         if (err)
382                 goto out_ctx;
383
384         err = i915_vma_pin_ww(vma[1], &ww, 0, 0, PIN_USER);
385         if (unlikely(err))
386                 goto out_unpin_src;
387
388         batch = intel_emit_vma_copy_blt(ce, &ww, vma[0], vma[1]);
389         if (IS_ERR(batch)) {
390                 err = PTR_ERR(batch);
391                 goto out_unpin_dst;
392         }
393
394         rq = i915_request_create(ce);
395         if (IS_ERR(rq)) {
396                 err = PTR_ERR(rq);
397                 goto out_batch;
398         }
399
400         err = intel_emit_vma_mark_active(batch, rq);
401         if (unlikely(err))
402                 goto out_request;
403
404         for (i = 0; i < ARRAY_SIZE(vma); i++) {
405                 err = move_obj_to_gpu(vma[i]->obj, rq, i);
406                 if (unlikely(err))
407                         goto out_request;
408         }
409
410         for (i = 0; i < ARRAY_SIZE(vma); i++) {
411                 unsigned int flags = i ? EXEC_OBJECT_WRITE : 0;
412
413                 err = i915_vma_move_to_active(vma[i], rq, flags);
414                 if (unlikely(err))
415                         goto out_request;
416         }
417
418         if (rq->engine->emit_init_breadcrumb) {
419                 err = rq->engine->emit_init_breadcrumb(rq);
420                 if (unlikely(err))
421                         goto out_request;
422         }
423
424         err = rq->engine->emit_bb_start(rq,
425                                         batch->node.start, batch->node.size,
426                                         0);
427
428 out_request:
429         if (unlikely(err))
430                 i915_request_set_error_once(rq, err);
431
432         i915_request_add(rq);
433 out_batch:
434         intel_emit_vma_release(ce, batch);
435 out_unpin_dst:
436         i915_vma_unpin(vma[1]);
437 out_unpin_src:
438         i915_vma_unpin(vma[0]);
439 out_ctx:
440         intel_context_unpin(ce);
441 out:
442         if (err == -EDEADLK) {
443                 err = i915_gem_ww_ctx_backoff(&ww);
444                 if (!err)
445                         goto retry;
446         }
447         i915_gem_ww_ctx_fini(&ww);
448         intel_engine_pm_put(ce->engine);
449         return err;
450 }
451
452 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
453 #include "selftests/i915_gem_object_blt.c"
454 #endif