GNU Linux-libre 5.10.215-gnu1
[releases.git] / drivers / gpu / drm / i915 / gt / selftest_reset.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5
6 #include <linux/crc32.h>
7
8 #include "gem/i915_gem_stolen.h"
9
10 #include "i915_memcpy.h"
11 #include "i915_selftest.h"
12 #include "selftests/igt_reset.h"
13 #include "selftests/igt_atomic.h"
14 #include "selftests/igt_spinner.h"
15
16 static int
17 __igt_reset_stolen(struct intel_gt *gt,
18                    intel_engine_mask_t mask,
19                    const char *msg)
20 {
21         struct i915_ggtt *ggtt = &gt->i915->ggtt;
22         const struct resource *dsm = &gt->i915->dsm;
23         resource_size_t num_pages, page;
24         struct intel_engine_cs *engine;
25         intel_wakeref_t wakeref;
26         enum intel_engine_id id;
27         struct igt_spinner spin;
28         long max, count;
29         void *tmp;
30         u32 *crc;
31         int err;
32
33         if (!drm_mm_node_allocated(&ggtt->error_capture))
34                 return 0;
35
36         num_pages = resource_size(dsm) >> PAGE_SHIFT;
37         if (!num_pages)
38                 return 0;
39
40         crc = kmalloc_array(num_pages, sizeof(u32), GFP_KERNEL);
41         if (!crc)
42                 return -ENOMEM;
43
44         tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
45         if (!tmp) {
46                 err = -ENOMEM;
47                 goto err_crc;
48         }
49
50         igt_global_reset_lock(gt);
51         wakeref = intel_runtime_pm_get(gt->uncore->rpm);
52
53         err = igt_spinner_init(&spin, gt);
54         if (err)
55                 goto err_lock;
56
57         for_each_engine(engine, gt, id) {
58                 struct intel_context *ce;
59                 struct i915_request *rq;
60
61                 if (!(mask & engine->mask))
62                         continue;
63
64                 if (!intel_engine_can_store_dword(engine))
65                         continue;
66
67                 ce = intel_context_create(engine);
68                 if (IS_ERR(ce)) {
69                         err = PTR_ERR(ce);
70                         goto err_spin;
71                 }
72                 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
73                 intel_context_put(ce);
74                 if (IS_ERR(rq)) {
75                         err = PTR_ERR(rq);
76                         goto err_spin;
77                 }
78                 i915_request_add(rq);
79         }
80
81         for (page = 0; page < num_pages; page++) {
82                 dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
83                 void __iomem *s;
84                 void *in;
85
86                 ggtt->vm.insert_page(&ggtt->vm, dma,
87                                      ggtt->error_capture.start,
88                                      I915_CACHE_NONE, 0);
89                 mb();
90
91                 s = io_mapping_map_wc(&ggtt->iomap,
92                                       ggtt->error_capture.start,
93                                       PAGE_SIZE);
94
95                 if (!__drm_mm_interval_first(&gt->i915->mm.stolen,
96                                              page << PAGE_SHIFT,
97                                              ((page + 1) << PAGE_SHIFT) - 1))
98                         memset32(s, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
99
100                 in = s;
101                 if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE))
102                         in = tmp;
103                 crc[page] = crc32_le(0, in, PAGE_SIZE);
104
105                 io_mapping_unmap(s);
106         }
107         mb();
108         ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
109
110         if (mask == ALL_ENGINES) {
111                 intel_gt_reset(gt, mask, NULL);
112         } else {
113                 for_each_engine(engine, gt, id) {
114                         if (mask & engine->mask)
115                                 intel_engine_reset(engine, NULL);
116                 }
117         }
118
119         max = -1;
120         count = 0;
121         for (page = 0; page < num_pages; page++) {
122                 dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
123                 void __iomem *s;
124                 void *in;
125                 u32 x;
126
127                 ggtt->vm.insert_page(&ggtt->vm, dma,
128                                      ggtt->error_capture.start,
129                                      I915_CACHE_NONE, 0);
130                 mb();
131
132                 s = io_mapping_map_wc(&ggtt->iomap,
133                                       ggtt->error_capture.start,
134                                       PAGE_SIZE);
135
136                 in = s;
137                 if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE))
138                         in = tmp;
139                 x = crc32_le(0, in, PAGE_SIZE);
140
141                 if (x != crc[page] &&
142                     !__drm_mm_interval_first(&gt->i915->mm.stolen,
143                                              page << PAGE_SHIFT,
144                                              ((page + 1) << PAGE_SHIFT) - 1)) {
145                         pr_debug("unused stolen page %pa modified by GPU reset\n",
146                                  &page);
147                         if (count++ == 0)
148                                 igt_hexdump(in, PAGE_SIZE);
149                         max = page;
150                 }
151
152                 io_mapping_unmap(s);
153         }
154         mb();
155         ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
156
157         if (count > 0) {
158                 pr_info("%s reset clobbered %ld pages of stolen, last clobber at page %ld\n",
159                         msg, count, max);
160         }
161         if (max >= I915_GEM_STOLEN_BIAS >> PAGE_SHIFT) {
162                 pr_err("%s reset clobbered unreserved area [above %x] of stolen; may cause severe faults\n",
163                        msg, I915_GEM_STOLEN_BIAS);
164                 err = -EINVAL;
165         }
166
167 err_spin:
168         igt_spinner_fini(&spin);
169
170 err_lock:
171         intel_runtime_pm_put(gt->uncore->rpm, wakeref);
172         igt_global_reset_unlock(gt);
173
174         kfree(tmp);
175 err_crc:
176         kfree(crc);
177         return err;
178 }
179
180 static int igt_reset_device_stolen(void *arg)
181 {
182         return __igt_reset_stolen(arg, ALL_ENGINES, "device");
183 }
184
185 static int igt_reset_engines_stolen(void *arg)
186 {
187         struct intel_gt *gt = arg;
188         struct intel_engine_cs *engine;
189         enum intel_engine_id id;
190         int err;
191
192         if (!intel_has_reset_engine(gt))
193                 return 0;
194
195         for_each_engine(engine, gt, id) {
196                 err = __igt_reset_stolen(gt, engine->mask, engine->name);
197                 if (err)
198                         return err;
199         }
200
201         return 0;
202 }
203
204 static int igt_global_reset(void *arg)
205 {
206         struct intel_gt *gt = arg;
207         unsigned int reset_count;
208         intel_wakeref_t wakeref;
209         int err = 0;
210
211         /* Check that we can issue a global GPU reset */
212
213         igt_global_reset_lock(gt);
214         wakeref = intel_runtime_pm_get(gt->uncore->rpm);
215
216         reset_count = i915_reset_count(&gt->i915->gpu_error);
217
218         intel_gt_reset(gt, ALL_ENGINES, NULL);
219
220         if (i915_reset_count(&gt->i915->gpu_error) == reset_count) {
221                 pr_err("No GPU reset recorded!\n");
222                 err = -EINVAL;
223         }
224
225         intel_runtime_pm_put(gt->uncore->rpm, wakeref);
226         igt_global_reset_unlock(gt);
227
228         if (intel_gt_is_wedged(gt))
229                 err = -EIO;
230
231         return err;
232 }
233
234 static int igt_wedged_reset(void *arg)
235 {
236         struct intel_gt *gt = arg;
237         intel_wakeref_t wakeref;
238
239         /* Check that we can recover a wedged device with a GPU reset */
240
241         igt_global_reset_lock(gt);
242         wakeref = intel_runtime_pm_get(gt->uncore->rpm);
243
244         intel_gt_set_wedged(gt);
245
246         GEM_BUG_ON(!intel_gt_is_wedged(gt));
247         intel_gt_reset(gt, ALL_ENGINES, NULL);
248
249         intel_runtime_pm_put(gt->uncore->rpm, wakeref);
250         igt_global_reset_unlock(gt);
251
252         return intel_gt_is_wedged(gt) ? -EIO : 0;
253 }
254
255 static int igt_atomic_reset(void *arg)
256 {
257         struct intel_gt *gt = arg;
258         const typeof(*igt_atomic_phases) *p;
259         int err = 0;
260
261         /* Check that the resets are usable from atomic context */
262
263         intel_gt_pm_get(gt);
264         igt_global_reset_lock(gt);
265
266         /* Flush any requests before we get started and check basics */
267         if (!igt_force_reset(gt))
268                 goto unlock;
269
270         for (p = igt_atomic_phases; p->name; p++) {
271                 intel_engine_mask_t awake;
272
273                 GEM_TRACE("__intel_gt_reset under %s\n", p->name);
274
275                 awake = reset_prepare(gt);
276                 p->critical_section_begin();
277
278                 err = __intel_gt_reset(gt, ALL_ENGINES);
279
280                 p->critical_section_end();
281                 reset_finish(gt, awake);
282
283                 if (err) {
284                         pr_err("__intel_gt_reset failed under %s\n", p->name);
285                         break;
286                 }
287         }
288
289         /* As we poke around the guts, do a full reset before continuing. */
290         igt_force_reset(gt);
291
292 unlock:
293         igt_global_reset_unlock(gt);
294         intel_gt_pm_put(gt);
295
296         return err;
297 }
298
299 static int igt_atomic_engine_reset(void *arg)
300 {
301         struct intel_gt *gt = arg;
302         const typeof(*igt_atomic_phases) *p;
303         struct intel_engine_cs *engine;
304         enum intel_engine_id id;
305         int err = 0;
306
307         /* Check that the resets are usable from atomic context */
308
309         if (!intel_has_reset_engine(gt))
310                 return 0;
311
312         if (intel_uc_uses_guc_submission(&gt->uc))
313                 return 0;
314
315         intel_gt_pm_get(gt);
316         igt_global_reset_lock(gt);
317
318         /* Flush any requests before we get started and check basics */
319         if (!igt_force_reset(gt))
320                 goto out_unlock;
321
322         for_each_engine(engine, gt, id) {
323                 tasklet_disable(&engine->execlists.tasklet);
324                 intel_engine_pm_get(engine);
325
326                 for (p = igt_atomic_phases; p->name; p++) {
327                         GEM_TRACE("intel_engine_reset(%s) under %s\n",
328                                   engine->name, p->name);
329
330                         p->critical_section_begin();
331                         err = intel_engine_reset(engine, NULL);
332                         p->critical_section_end();
333
334                         if (err) {
335                                 pr_err("intel_engine_reset(%s) failed under %s\n",
336                                        engine->name, p->name);
337                                 break;
338                         }
339                 }
340
341                 intel_engine_pm_put(engine);
342                 tasklet_enable(&engine->execlists.tasklet);
343                 if (err)
344                         break;
345         }
346
347         /* As we poke around the guts, do a full reset before continuing. */
348         igt_force_reset(gt);
349
350 out_unlock:
351         igt_global_reset_unlock(gt);
352         intel_gt_pm_put(gt);
353
354         return err;
355 }
356
357 int intel_reset_live_selftests(struct drm_i915_private *i915)
358 {
359         static const struct i915_subtest tests[] = {
360                 SUBTEST(igt_global_reset), /* attempt to recover GPU first */
361                 SUBTEST(igt_reset_device_stolen),
362                 SUBTEST(igt_reset_engines_stolen),
363                 SUBTEST(igt_wedged_reset),
364                 SUBTEST(igt_atomic_reset),
365                 SUBTEST(igt_atomic_engine_reset),
366         };
367         struct intel_gt *gt = &i915->gt;
368
369         if (!intel_has_gpu_reset(gt))
370                 return 0;
371
372         if (intel_gt_is_wedged(gt))
373                 return -EIO; /* we're long past hope of a successful reset */
374
375         return intel_gt_live_subtests(tests, gt);
376 }