1 // SPDX-License-Identifier: MIT
3 * Copyright © 2020 Intel Corporation
6 #include "gen2_engine_cs.h"
8 #include "intel_engine.h"
9 #include "intel_gpu_commands.h"
11 #include "intel_gt_irq.h"
12 #include "intel_ring.h"
14 int gen2_emit_flush(struct i915_request *rq, u32 mode)
16 unsigned int num_store_dw = 12;
20 if (mode & EMIT_INVALIDATE)
23 cs = intel_ring_begin(rq, 2 + 4 * num_store_dw);
28 while (num_store_dw--) {
29 *cs++ = MI_STORE_DWORD_INDEX;
30 *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
32 *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
36 intel_ring_advance(rq, cs);
41 int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode)
49 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
50 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
51 * also flushed at 2d versus 3d pipeline switches.
55 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
56 * MI_READ_FLUSH is set, and is always flushed on 965.
58 * I915_GEM_DOMAIN_COMMAND may not exist?
60 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
61 * invalidated when MI_EXE_FLUSH is set.
63 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
64 * invalidated with every MI_FLUSH.
68 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
69 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
70 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
71 * are flushed at any MI_FLUSH.
75 if (mode & EMIT_INVALIDATE) {
77 if (IS_G4X(rq->engine->i915) || IS_GEN(rq->engine->i915, 5))
78 cmd |= MI_INVALIDATE_ISP;
82 if (mode & EMIT_INVALIDATE)
85 cs = intel_ring_begin(rq, i);
92 * A random delay to let the CS invalidate take effect? Without this
93 * delay, the GPU relocation path fails as the CS does not see
94 * the updated contents. Just as important, if we apply the flushes
95 * to the EMIT_FLUSH branch (i.e. immediately after the relocation
96 * write and before the invalidate on the next batch), the relocations
97 * still fail. This implies that is a delay following invalidation
98 * that is required to reset the caches as opposed to a delay to
99 * ensure the memory is written.
101 if (mode & EMIT_INVALIDATE) {
102 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
103 *cs++ = intel_gt_scratch_offset(rq->engine->gt,
104 INTEL_GT_SCRATCH_FIELD_DEFAULT) |
105 PIPE_CONTROL_GLOBAL_GTT;
109 for (i = 0; i < 12; i++)
112 *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
113 *cs++ = intel_gt_scratch_offset(rq->engine->gt,
114 INTEL_GT_SCRATCH_FIELD_DEFAULT) |
115 PIPE_CONTROL_GLOBAL_GTT;
122 intel_ring_advance(rq, cs);
127 int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode)
131 cs = intel_ring_begin(rq, 2);
137 intel_ring_advance(rq, cs);
142 static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs,
145 GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
146 GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
151 *cs++ = MI_STORE_DWORD_INDEX;
152 *cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
153 *cs++ = rq->fence.seqno;
157 *cs++ = MI_STORE_DWORD_INDEX;
158 *cs++ = I915_GEM_HWS_SEQNO_ADDR;
159 *cs++ = rq->fence.seqno;
162 *cs++ = MI_USER_INTERRUPT;
164 rq->tail = intel_ring_offset(rq, cs);
165 assert_ring_tail_valid(rq->ring, rq->tail);
170 u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs)
172 return __gen2_emit_breadcrumb(rq, cs, 16, 8);
175 u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
177 return __gen2_emit_breadcrumb(rq, cs, 8, 8);
180 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
181 #define I830_BATCH_LIMIT SZ_256K
182 #define I830_TLB_ENTRIES (2)
183 #define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT)
184 int i830_emit_bb_start(struct i915_request *rq,
186 unsigned int dispatch_flags)
189 intel_gt_scratch_offset(rq->engine->gt,
190 INTEL_GT_SCRATCH_FIELD_DEFAULT);
192 GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE);
194 cs = intel_ring_begin(rq, 6);
198 /* Evict the invalid PTE TLBs */
199 *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
200 *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
201 *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
205 intel_ring_advance(rq, cs);
207 if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
208 if (len > I830_BATCH_LIMIT)
211 cs = intel_ring_begin(rq, 6 + 2);
216 * Blit the batch (which has now all relocs applied) to the
217 * stable batch scratch bo area (so that the CS never
218 * stumbles over its tlb invalidation bug) ...
220 *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
221 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
222 *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
229 intel_ring_advance(rq, cs);
231 /* ... and execute it. */
235 if (!(dispatch_flags & I915_DISPATCH_SECURE))
236 offset |= MI_BATCH_NON_SECURE;
238 cs = intel_ring_begin(rq, 2);
242 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
244 intel_ring_advance(rq, cs);
249 int gen3_emit_bb_start(struct i915_request *rq,
251 unsigned int dispatch_flags)
255 if (!(dispatch_flags & I915_DISPATCH_SECURE))
256 offset |= MI_BATCH_NON_SECURE;
258 cs = intel_ring_begin(rq, 2);
262 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
264 intel_ring_advance(rq, cs);
269 int gen4_emit_bb_start(struct i915_request *rq,
270 u64 offset, u32 length,
271 unsigned int dispatch_flags)
276 security = MI_BATCH_NON_SECURE_I965;
277 if (dispatch_flags & I915_DISPATCH_SECURE)
280 cs = intel_ring_begin(rq, 2);
284 *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security;
286 intel_ring_advance(rq, cs);
291 void gen2_irq_enable(struct intel_engine_cs *engine)
293 struct drm_i915_private *i915 = engine->i915;
295 i915->irq_mask &= ~engine->irq_enable_mask;
296 intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
297 ENGINE_POSTING_READ16(engine, RING_IMR);
300 void gen2_irq_disable(struct intel_engine_cs *engine)
302 struct drm_i915_private *i915 = engine->i915;
304 i915->irq_mask |= engine->irq_enable_mask;
305 intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
308 void gen3_irq_enable(struct intel_engine_cs *engine)
310 engine->i915->irq_mask &= ~engine->irq_enable_mask;
311 intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
312 intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
315 void gen3_irq_disable(struct intel_engine_cs *engine)
317 engine->i915->irq_mask |= engine->irq_enable_mask;
318 intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
321 void gen5_irq_enable(struct intel_engine_cs *engine)
323 gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
326 void gen5_irq_disable(struct intel_engine_cs *engine)
328 gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);