GNU Linux-libre 5.10.215-gnu1
[releases.git] / drivers / gpu / drm / i915 / gt / intel_gtt.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5
6 #include <linux/slab.h> /* fault-inject.h is not standalone! */
7
8 #include <linux/fault-inject.h>
9
10 #include "i915_trace.h"
11 #include "intel_gt.h"
12 #include "intel_gtt.h"
13
14 struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
15 {
16         if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
17                 i915_gem_shrink_all(vm->i915);
18
19         return i915_gem_object_create_internal(vm->i915, sz);
20 }
21
22 int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
23 {
24         int err;
25
26         err = i915_gem_object_pin_pages(obj);
27         if (err)
28                 return err;
29
30         i915_gem_object_make_unshrinkable(obj);
31         return 0;
32 }
33
34 void __i915_vm_close(struct i915_address_space *vm)
35 {
36         struct i915_vma *vma, *vn;
37
38         if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex))
39                 return;
40
41         list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
42                 struct drm_i915_gem_object *obj = vma->obj;
43
44                 /* Keep the obj (and hence the vma) alive as _we_ destroy it */
45                 if (!kref_get_unless_zero(&obj->base.refcount))
46                         continue;
47
48                 atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
49                 WARN_ON(__i915_vma_unbind(vma));
50                 __i915_vma_put(vma);
51
52                 i915_gem_object_put(obj);
53         }
54         GEM_BUG_ON(!list_empty(&vm->bound_list));
55
56         mutex_unlock(&vm->mutex);
57 }
58
59 void i915_address_space_fini(struct i915_address_space *vm)
60 {
61         drm_mm_takedown(&vm->mm);
62         mutex_destroy(&vm->mutex);
63 }
64
65 static void __i915_vm_release(struct work_struct *work)
66 {
67         struct i915_address_space *vm =
68                 container_of(work, struct i915_address_space, rcu.work);
69
70         vm->cleanup(vm);
71         i915_address_space_fini(vm);
72
73         kfree(vm);
74 }
75
76 void i915_vm_release(struct kref *kref)
77 {
78         struct i915_address_space *vm =
79                 container_of(kref, struct i915_address_space, ref);
80
81         GEM_BUG_ON(i915_is_ggtt(vm));
82         trace_i915_ppgtt_release(vm);
83
84         queue_rcu_work(vm->i915->wq, &vm->rcu);
85 }
86
87 void i915_address_space_init(struct i915_address_space *vm, int subclass)
88 {
89         kref_init(&vm->ref);
90         INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
91         atomic_set(&vm->open, 1);
92
93         /*
94          * The vm->mutex must be reclaim safe (for use in the shrinker).
95          * Do a dummy acquire now under fs_reclaim so that any allocation
96          * attempt holding the lock is immediately reported by lockdep.
97          */
98         mutex_init(&vm->mutex);
99         lockdep_set_subclass(&vm->mutex, subclass);
100         i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
101
102         GEM_BUG_ON(!vm->total);
103         drm_mm_init(&vm->mm, 0, vm->total);
104         vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
105
106         INIT_LIST_HEAD(&vm->bound_list);
107 }
108
109 void clear_pages(struct i915_vma *vma)
110 {
111         GEM_BUG_ON(!vma->pages);
112
113         if (vma->pages != vma->obj->mm.pages) {
114                 sg_free_table(vma->pages);
115                 kfree(vma->pages);
116         }
117         vma->pages = NULL;
118
119         memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
120 }
121
122 dma_addr_t __px_dma(struct drm_i915_gem_object *p)
123 {
124         GEM_BUG_ON(!i915_gem_object_has_pages(p));
125         return sg_dma_address(p->mm.pages->sgl);
126 }
127
128 struct page *__px_page(struct drm_i915_gem_object *p)
129 {
130         GEM_BUG_ON(!i915_gem_object_has_pages(p));
131         return sg_page(p->mm.pages->sgl);
132 }
133
134 void
135 fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count)
136 {
137         struct page *page = __px_page(p);
138         void *vaddr;
139
140         vaddr = kmap(page);
141         memset64(vaddr, val, count);
142         clflush_cache_range(vaddr, PAGE_SIZE);
143         kunmap(page);
144 }
145
146 static void poison_scratch_page(struct drm_i915_gem_object *scratch)
147 {
148         struct sgt_iter sgt;
149         struct page *page;
150         u8 val;
151
152         val = 0;
153         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
154                 val = POISON_FREE;
155
156         for_each_sgt_page(page, sgt, scratch->mm.pages) {
157                 void *vaddr;
158
159                 vaddr = kmap(page);
160                 memset(vaddr, val, PAGE_SIZE);
161                 kunmap(page);
162         }
163 }
164
165 int setup_scratch_page(struct i915_address_space *vm)
166 {
167         unsigned long size;
168
169         /*
170          * In order to utilize 64K pages for an object with a size < 2M, we will
171          * need to support a 64K scratch page, given that every 16th entry for a
172          * page-table operating in 64K mode must point to a properly aligned 64K
173          * region, including any PTEs which happen to point to scratch.
174          *
175          * This is only relevant for the 48b PPGTT where we support
176          * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
177          * scratch (read-only) between all vm, we create one 64k scratch page
178          * for all.
179          */
180         size = I915_GTT_PAGE_SIZE_4K;
181         if (i915_vm_is_4lvl(vm) &&
182             HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K))
183                 size = I915_GTT_PAGE_SIZE_64K;
184
185         do {
186                 struct drm_i915_gem_object *obj;
187
188                 obj = vm->alloc_pt_dma(vm, size);
189                 if (IS_ERR(obj))
190                         goto skip;
191
192                 if (pin_pt_dma(vm, obj))
193                         goto skip_obj;
194
195                 /* We need a single contiguous page for our scratch */
196                 if (obj->mm.page_sizes.sg < size)
197                         goto skip_obj;
198
199                 /* And it needs to be correspondingly aligned */
200                 if (__px_dma(obj) & (size - 1))
201                         goto skip_obj;
202
203                 /*
204                  * Use a non-zero scratch page for debugging.
205                  *
206                  * We want a value that should be reasonably obvious
207                  * to spot in the error state, while also causing a GPU hang
208                  * if executed. We prefer using a clear page in production, so
209                  * should it ever be accidentally used, the effect should be
210                  * fairly benign.
211                  */
212                 poison_scratch_page(obj);
213
214                 vm->scratch[0] = obj;
215                 vm->scratch_order = get_order(size);
216                 return 0;
217
218 skip_obj:
219                 i915_gem_object_put(obj);
220 skip:
221                 if (size == I915_GTT_PAGE_SIZE_4K)
222                         return -ENOMEM;
223
224                 size = I915_GTT_PAGE_SIZE_4K;
225         } while (1);
226 }
227
228 void free_scratch(struct i915_address_space *vm)
229 {
230         int i;
231
232         for (i = 0; i <= vm->top; i++)
233                 i915_gem_object_put(vm->scratch[i]);
234 }
235
236 void gtt_write_workarounds(struct intel_gt *gt)
237 {
238         struct drm_i915_private *i915 = gt->i915;
239         struct intel_uncore *uncore = gt->uncore;
240
241         /*
242          * This function is for gtt related workarounds. This function is
243          * called on driver load and after a GPU reset, so you can place
244          * workarounds here even if they get overwritten by GPU reset.
245          */
246         /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
247         if (IS_BROADWELL(i915))
248                 intel_uncore_write(uncore,
249                                    GEN8_L3_LRA_1_GPGPU,
250                                    GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
251         else if (IS_CHERRYVIEW(i915))
252                 intel_uncore_write(uncore,
253                                    GEN8_L3_LRA_1_GPGPU,
254                                    GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
255         else if (IS_GEN9_LP(i915))
256                 intel_uncore_write(uncore,
257                                    GEN8_L3_LRA_1_GPGPU,
258                                    GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
259         else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11)
260                 intel_uncore_write(uncore,
261                                    GEN8_L3_LRA_1_GPGPU,
262                                    GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
263
264         /*
265          * To support 64K PTEs we need to first enable the use of the
266          * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
267          * mmio, otherwise the page-walker will simply ignore the IPS bit. This
268          * shouldn't be needed after GEN10.
269          *
270          * 64K pages were first introduced from BDW+, although technically they
271          * only *work* from gen9+. For pre-BDW we instead have the option for
272          * 32K pages, but we don't currently have any support for it in our
273          * driver.
274          */
275         if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
276             INTEL_GEN(i915) <= 10)
277                 intel_uncore_rmw(uncore,
278                                  GEN8_GAMW_ECO_DEV_RW_IA,
279                                  0,
280                                  GAMW_ECO_ENABLE_64K_IPS_FIELD);
281
282         if (IS_GEN_RANGE(i915, 8, 11)) {
283                 bool can_use_gtt_cache = true;
284
285                 /*
286                  * According to the BSpec if we use 2M/1G pages then we also
287                  * need to disable the GTT cache. At least on BDW we can see
288                  * visual corruption when using 2M pages, and not disabling the
289                  * GTT cache.
290                  */
291                 if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
292                         can_use_gtt_cache = false;
293
294                 /* WaGttCachingOffByDefault */
295                 intel_uncore_write(uncore,
296                                    HSW_GTT_CACHE_EN,
297                                    can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
298                 drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache &&
299                                  intel_uncore_read(uncore,
300                                                    HSW_GTT_CACHE_EN) == 0);
301         }
302 }
303
304 static void tgl_setup_private_ppat(struct intel_uncore *uncore)
305 {
306         /* TGL doesn't support LLC or AGE settings */
307         intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
308         intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
309         intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
310         intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
311         intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
312         intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
313         intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
314         intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
315 }
316
317 static void cnl_setup_private_ppat(struct intel_uncore *uncore)
318 {
319         intel_uncore_write(uncore,
320                            GEN10_PAT_INDEX(0),
321                            GEN8_PPAT_WB | GEN8_PPAT_LLC);
322         intel_uncore_write(uncore,
323                            GEN10_PAT_INDEX(1),
324                            GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
325         intel_uncore_write(uncore,
326                            GEN10_PAT_INDEX(2),
327                            GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
328         intel_uncore_write(uncore,
329                            GEN10_PAT_INDEX(3),
330                            GEN8_PPAT_UC);
331         intel_uncore_write(uncore,
332                            GEN10_PAT_INDEX(4),
333                            GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
334         intel_uncore_write(uncore,
335                            GEN10_PAT_INDEX(5),
336                            GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
337         intel_uncore_write(uncore,
338                            GEN10_PAT_INDEX(6),
339                            GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
340         intel_uncore_write(uncore,
341                            GEN10_PAT_INDEX(7),
342                            GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
343 }
344
345 /*
346  * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
347  * bits. When using advanced contexts each context stores its own PAT, but
348  * writing this data shouldn't be harmful even in those cases.
349  */
350 static void bdw_setup_private_ppat(struct intel_uncore *uncore)
351 {
352         u64 pat;
353
354         pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) |      /* for normal objects, no eLLC */
355               GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) |  /* for something pointing to ptes? */
356               GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) |  /* for scanout with eLLC */
357               GEN8_PPAT(3, GEN8_PPAT_UC) |                      /* Uncached objects, mostly for scanout */
358               GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
359               GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
360               GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
361               GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
362
363         intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
364         intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
365 }
366
367 static void chv_setup_private_ppat(struct intel_uncore *uncore)
368 {
369         u64 pat;
370
371         /*
372          * Map WB on BDW to snooped on CHV.
373          *
374          * Only the snoop bit has meaning for CHV, the rest is
375          * ignored.
376          *
377          * The hardware will never snoop for certain types of accesses:
378          * - CPU GTT (GMADR->GGTT->no snoop->memory)
379          * - PPGTT page tables
380          * - some other special cycles
381          *
382          * As with BDW, we also need to consider the following for GT accesses:
383          * "For GGTT, there is NO pat_sel[2:0] from the entry,
384          * so RTL will always use the value corresponding to
385          * pat_sel = 000".
386          * Which means we must set the snoop bit in PAT entry 0
387          * in order to keep the global status page working.
388          */
389
390         pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
391               GEN8_PPAT(1, 0) |
392               GEN8_PPAT(2, 0) |
393               GEN8_PPAT(3, 0) |
394               GEN8_PPAT(4, CHV_PPAT_SNOOP) |
395               GEN8_PPAT(5, CHV_PPAT_SNOOP) |
396               GEN8_PPAT(6, CHV_PPAT_SNOOP) |
397               GEN8_PPAT(7, CHV_PPAT_SNOOP);
398
399         intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
400         intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
401 }
402
403 void setup_private_pat(struct intel_uncore *uncore)
404 {
405         struct drm_i915_private *i915 = uncore->i915;
406
407         GEM_BUG_ON(INTEL_GEN(i915) < 8);
408
409         if (INTEL_GEN(i915) >= 12)
410                 tgl_setup_private_ppat(uncore);
411         else if (INTEL_GEN(i915) >= 10)
412                 cnl_setup_private_ppat(uncore);
413         else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915))
414                 chv_setup_private_ppat(uncore);
415         else
416                 bdw_setup_private_ppat(uncore);
417 }
418
419 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
420 #include "selftests/mock_gtt.c"
421 #endif