GNU Linux-libre 5.19-rc6-gnu
[releases.git] / drivers / gpu / drm / msm / adreno / a6xx_gpu_state.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
3
4 #include <linux/ascii85.h>
5 #include "msm_gem.h"
6 #include "a6xx_gpu.h"
7 #include "a6xx_gmu.h"
8 #include "a6xx_gpu_state.h"
9 #include "a6xx_gmu.xml.h"
10
11 struct a6xx_gpu_state_obj {
12         const void *handle;
13         u32 *data;
14 };
15
16 struct a6xx_gpu_state {
17         struct msm_gpu_state base;
18
19         struct a6xx_gpu_state_obj *gmu_registers;
20         int nr_gmu_registers;
21
22         struct a6xx_gpu_state_obj *registers;
23         int nr_registers;
24
25         struct a6xx_gpu_state_obj *shaders;
26         int nr_shaders;
27
28         struct a6xx_gpu_state_obj *clusters;
29         int nr_clusters;
30
31         struct a6xx_gpu_state_obj *dbgahb_clusters;
32         int nr_dbgahb_clusters;
33
34         struct a6xx_gpu_state_obj *indexed_regs;
35         int nr_indexed_regs;
36
37         struct a6xx_gpu_state_obj *debugbus;
38         int nr_debugbus;
39
40         struct a6xx_gpu_state_obj *vbif_debugbus;
41
42         struct a6xx_gpu_state_obj *cx_debugbus;
43         int nr_cx_debugbus;
44
45         struct msm_gpu_state_bo *gmu_log;
46         struct msm_gpu_state_bo *gmu_hfi;
47         struct msm_gpu_state_bo *gmu_debug;
48
49         s32 hfi_queue_history[2][HFI_HISTORY_SZ];
50
51         struct list_head objs;
52
53         bool gpu_initialized;
54 };
55
56 static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
57 {
58         in[0] = val;
59         in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
60
61         return 2;
62 }
63
64 static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
65 {
66         in[0] = target;
67         in[1] = (((u64) reg) << 44 | dwords);
68
69         return 2;
70 }
71
72 static inline int CRASHDUMP_FINI(u64 *in)
73 {
74         in[0] = 0;
75         in[1] = 0;
76
77         return 2;
78 }
79
80 struct a6xx_crashdumper {
81         void *ptr;
82         struct drm_gem_object *bo;
83         u64 iova;
84 };
85
86 struct a6xx_state_memobj {
87         struct list_head node;
88         unsigned long long data[];
89 };
90
91 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
92 {
93         struct a6xx_state_memobj *obj =
94                 kzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
95
96         if (!obj)
97                 return NULL;
98
99         list_add_tail(&obj->node, &a6xx_state->objs);
100         return &obj->data;
101 }
102
103 static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
104                 size_t size)
105 {
106         void *dst = state_kcalloc(a6xx_state, 1, size);
107
108         if (dst)
109                 memcpy(dst, src, size);
110         return dst;
111 }
112
113 /*
114  * Allocate 1MB for the crashdumper scratch region - 8k for the script and
115  * the rest for the data
116  */
117 #define A6XX_CD_DATA_OFFSET 8192
118 #define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
119
120 static int a6xx_crashdumper_init(struct msm_gpu *gpu,
121                 struct a6xx_crashdumper *dumper)
122 {
123         dumper->ptr = msm_gem_kernel_new(gpu->dev,
124                 SZ_1M, MSM_BO_WC, gpu->aspace,
125                 &dumper->bo, &dumper->iova);
126
127         if (!IS_ERR(dumper->ptr))
128                 msm_gem_object_set_name(dumper->bo, "crashdump");
129
130         return PTR_ERR_OR_ZERO(dumper->ptr);
131 }
132
133 static int a6xx_crashdumper_run(struct msm_gpu *gpu,
134                 struct a6xx_crashdumper *dumper)
135 {
136         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
137         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
138         u32 val;
139         int ret;
140
141         if (IS_ERR_OR_NULL(dumper->ptr))
142                 return -EINVAL;
143
144         if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
145                 return -EINVAL;
146
147         /* Make sure all pending memory writes are posted */
148         wmb();
149
150         gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
151                 REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
152
153         gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
154
155         ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
156                 val & 0x02, 100, 10000);
157
158         gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
159
160         return ret;
161 }
162
163 /* read a value from the GX debug bus */
164 static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
165                 u32 *data)
166 {
167         u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
168                 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
169
170         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
171         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
172         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
173         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
174
175         /* Wait 1 us to make sure the data is flowing */
176         udelay(1);
177
178         data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
179         data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
180
181         return 2;
182 }
183
184 #define cxdbg_write(ptr, offset, val) \
185         msm_writel((val), (ptr) + ((offset) << 2))
186
187 #define cxdbg_read(ptr, offset) \
188         msm_readl((ptr) + ((offset) << 2))
189
190 /* read a value from the CX debug bus */
191 static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
192                 u32 *data)
193 {
194         u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
195                 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
196
197         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
198         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
199         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
200         cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
201
202         /* Wait 1 us to make sure the data is flowing */
203         udelay(1);
204
205         data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
206         data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
207
208         return 2;
209 }
210
211 /* Read a chunk of data from the VBIF debug bus */
212 static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
213                 u32 reg, int count, u32 *data)
214 {
215         int i;
216
217         gpu_write(gpu, ctrl0, reg);
218
219         for (i = 0; i < count; i++) {
220                 gpu_write(gpu, ctrl1, i);
221                 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
222         }
223
224         return count;
225 }
226
227 #define AXI_ARB_BLOCKS 2
228 #define XIN_AXI_BLOCKS 5
229 #define XIN_CORE_BLOCKS 4
230
231 #define VBIF_DEBUGBUS_BLOCK_SIZE \
232         ((16 * AXI_ARB_BLOCKS) + \
233          (18 * XIN_AXI_BLOCKS) + \
234          (12 * XIN_CORE_BLOCKS))
235
236 static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
237                 struct a6xx_gpu_state *a6xx_state,
238                 struct a6xx_gpu_state_obj *obj)
239 {
240         u32 clk, *ptr;
241         int i;
242
243         obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
244                 sizeof(u32));
245         if (!obj->data)
246                 return;
247
248         obj->handle = NULL;
249
250         /* Get the current clock setting */
251         clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
252
253         /* Force on the bus so we can read it */
254         gpu_write(gpu, REG_A6XX_VBIF_CLKON,
255                 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
256
257         /* We will read from BUS2 first, so disable BUS1 */
258         gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
259
260         /* Enable the VBIF bus for reading */
261         gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
262
263         ptr = obj->data;
264
265         for (i = 0; i < AXI_ARB_BLOCKS; i++)
266                 ptr += vbif_debugbus_read(gpu,
267                         REG_A6XX_VBIF_TEST_BUS2_CTRL0,
268                         REG_A6XX_VBIF_TEST_BUS2_CTRL1,
269                         1 << (i + 16), 16, ptr);
270
271         for (i = 0; i < XIN_AXI_BLOCKS; i++)
272                 ptr += vbif_debugbus_read(gpu,
273                         REG_A6XX_VBIF_TEST_BUS2_CTRL0,
274                         REG_A6XX_VBIF_TEST_BUS2_CTRL1,
275                         1 << i, 18, ptr);
276
277         /* Stop BUS2 so we can turn on BUS1 */
278         gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
279
280         for (i = 0; i < XIN_CORE_BLOCKS; i++)
281                 ptr += vbif_debugbus_read(gpu,
282                         REG_A6XX_VBIF_TEST_BUS1_CTRL0,
283                         REG_A6XX_VBIF_TEST_BUS1_CTRL1,
284                         1 << i, 12, ptr);
285
286         /* Restore the VBIF clock setting */
287         gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
288 }
289
290 static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
291                 struct a6xx_gpu_state *a6xx_state,
292                 const struct a6xx_debugbus_block *block,
293                 struct a6xx_gpu_state_obj *obj)
294 {
295         int i;
296         u32 *ptr;
297
298         obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
299         if (!obj->data)
300                 return;
301
302         obj->handle = block;
303
304         for (ptr = obj->data, i = 0; i < block->count; i++)
305                 ptr += debugbus_read(gpu, block->id, i, ptr);
306 }
307
308 static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
309                 struct a6xx_gpu_state *a6xx_state,
310                 const struct a6xx_debugbus_block *block,
311                 struct a6xx_gpu_state_obj *obj)
312 {
313         int i;
314         u32 *ptr;
315
316         obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
317         if (!obj->data)
318                 return;
319
320         obj->handle = block;
321
322         for (ptr = obj->data, i = 0; i < block->count; i++)
323                 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
324 }
325
326 static void a6xx_get_debugbus(struct msm_gpu *gpu,
327                 struct a6xx_gpu_state *a6xx_state)
328 {
329         struct resource *res;
330         void __iomem *cxdbg = NULL;
331         int nr_debugbus_blocks;
332
333         /* Set up the GX debug bus */
334
335         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
336                 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
337
338         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
339                 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
340
341         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
342         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
343         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
344         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
345
346         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
347         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
348
349         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
350         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
351         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
352         gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
353
354         /* Set up the CX debug bus - it lives elsewhere in the system so do a
355          * temporary ioremap for the registers
356          */
357         res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
358                         "cx_dbgc");
359
360         if (res)
361                 cxdbg = ioremap(res->start, resource_size(res));
362
363         if (cxdbg) {
364                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
365                         A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
366
367                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
368                         A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
369
370                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
371                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
372                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
373                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
374
375                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
376                         0x76543210);
377                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
378                         0xFEDCBA98);
379
380                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
381                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
382                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
383                 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
384         }
385
386         nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
387                 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
388
389         a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
390                         sizeof(*a6xx_state->debugbus));
391
392         if (a6xx_state->debugbus) {
393                 int i;
394
395                 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
396                         a6xx_get_debugbus_block(gpu,
397                                 a6xx_state,
398                                 &a6xx_debugbus_blocks[i],
399                                 &a6xx_state->debugbus[i]);
400
401                 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
402
403                 /*
404                  * GBIF has same debugbus as of other GPU blocks, fall back to
405                  * default path if GPU uses GBIF, also GBIF uses exactly same
406                  * ID as of VBIF.
407                  */
408                 if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
409                         a6xx_get_debugbus_block(gpu, a6xx_state,
410                                 &a6xx_gbif_debugbus_block,
411                                 &a6xx_state->debugbus[i]);
412
413                         a6xx_state->nr_debugbus += 1;
414                 }
415         }
416
417         /*  Dump the VBIF debugbus on applicable targets */
418         if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
419                 a6xx_state->vbif_debugbus =
420                         state_kcalloc(a6xx_state, 1,
421                                         sizeof(*a6xx_state->vbif_debugbus));
422
423                 if (a6xx_state->vbif_debugbus)
424                         a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
425                                         a6xx_state->vbif_debugbus);
426         }
427
428         if (cxdbg) {
429                 a6xx_state->cx_debugbus =
430                         state_kcalloc(a6xx_state,
431                         ARRAY_SIZE(a6xx_cx_debugbus_blocks),
432                         sizeof(*a6xx_state->cx_debugbus));
433
434                 if (a6xx_state->cx_debugbus) {
435                         int i;
436
437                         for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
438                                 a6xx_get_cx_debugbus_block(cxdbg,
439                                         a6xx_state,
440                                         &a6xx_cx_debugbus_blocks[i],
441                                         &a6xx_state->cx_debugbus[i]);
442
443                         a6xx_state->nr_cx_debugbus =
444                                 ARRAY_SIZE(a6xx_cx_debugbus_blocks);
445                 }
446
447                 iounmap(cxdbg);
448         }
449 }
450
451 #define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
452
453 /* Read a data cluster from behind the AHB aperture */
454 static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
455                 struct a6xx_gpu_state *a6xx_state,
456                 const struct a6xx_dbgahb_cluster *dbgahb,
457                 struct a6xx_gpu_state_obj *obj,
458                 struct a6xx_crashdumper *dumper)
459 {
460         u64 *in = dumper->ptr;
461         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
462         size_t datasize;
463         int i, regcount = 0;
464
465         for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
466                 int j;
467
468                 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
469                         (dbgahb->statetype + i * 2) << 8);
470
471                 for (j = 0; j < dbgahb->count; j += 2) {
472                         int count = RANGE(dbgahb->registers, j);
473                         u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
474                                 dbgahb->registers[j] - (dbgahb->base >> 2);
475
476                         in += CRASHDUMP_READ(in, offset, count, out);
477
478                         out += count * sizeof(u32);
479
480                         if (i == 0)
481                                 regcount += count;
482                 }
483         }
484
485         CRASHDUMP_FINI(in);
486
487         datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
488
489         if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
490                 return;
491
492         if (a6xx_crashdumper_run(gpu, dumper))
493                 return;
494
495         obj->handle = dbgahb;
496         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
497                 datasize);
498 }
499
500 static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
501                 struct a6xx_gpu_state *a6xx_state,
502                 struct a6xx_crashdumper *dumper)
503 {
504         int i;
505
506         a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
507                 ARRAY_SIZE(a6xx_dbgahb_clusters),
508                 sizeof(*a6xx_state->dbgahb_clusters));
509
510         if (!a6xx_state->dbgahb_clusters)
511                 return;
512
513         a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
514
515         for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
516                 a6xx_get_dbgahb_cluster(gpu, a6xx_state,
517                         &a6xx_dbgahb_clusters[i],
518                         &a6xx_state->dbgahb_clusters[i], dumper);
519 }
520
521 /* Read a data cluster from the CP aperture with the crashdumper */
522 static void a6xx_get_cluster(struct msm_gpu *gpu,
523                 struct a6xx_gpu_state *a6xx_state,
524                 const struct a6xx_cluster *cluster,
525                 struct a6xx_gpu_state_obj *obj,
526                 struct a6xx_crashdumper *dumper)
527 {
528         u64 *in = dumper->ptr;
529         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
530         size_t datasize;
531         int i, regcount = 0;
532
533         /* Some clusters need a selector register to be programmed too */
534         if (cluster->sel_reg)
535                 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
536
537         for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
538                 int j;
539
540                 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
541                         (cluster->id << 8) | (i << 4) | i);
542
543                 for (j = 0; j < cluster->count; j += 2) {
544                         int count = RANGE(cluster->registers, j);
545
546                         in += CRASHDUMP_READ(in, cluster->registers[j],
547                                 count, out);
548
549                         out += count * sizeof(u32);
550
551                         if (i == 0)
552                                 regcount += count;
553                 }
554         }
555
556         CRASHDUMP_FINI(in);
557
558         datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
559
560         if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
561                 return;
562
563         if (a6xx_crashdumper_run(gpu, dumper))
564                 return;
565
566         obj->handle = cluster;
567         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
568                 datasize);
569 }
570
571 static void a6xx_get_clusters(struct msm_gpu *gpu,
572                 struct a6xx_gpu_state *a6xx_state,
573                 struct a6xx_crashdumper *dumper)
574 {
575         int i;
576
577         a6xx_state->clusters = state_kcalloc(a6xx_state,
578                 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
579
580         if (!a6xx_state->clusters)
581                 return;
582
583         a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
584
585         for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
586                 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
587                         &a6xx_state->clusters[i], dumper);
588 }
589
590 /* Read a shader / debug block from the HLSQ aperture with the crashdumper */
591 static void a6xx_get_shader_block(struct msm_gpu *gpu,
592                 struct a6xx_gpu_state *a6xx_state,
593                 const struct a6xx_shader_block *block,
594                 struct a6xx_gpu_state_obj *obj,
595                 struct a6xx_crashdumper *dumper)
596 {
597         u64 *in = dumper->ptr;
598         size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
599         int i;
600
601         if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
602                 return;
603
604         for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
605                 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
606                         (block->type << 8) | i);
607
608                 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
609                         block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
610         }
611
612         CRASHDUMP_FINI(in);
613
614         if (a6xx_crashdumper_run(gpu, dumper))
615                 return;
616
617         obj->handle = block;
618         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
619                 datasize);
620 }
621
622 static void a6xx_get_shaders(struct msm_gpu *gpu,
623                 struct a6xx_gpu_state *a6xx_state,
624                 struct a6xx_crashdumper *dumper)
625 {
626         int i;
627
628         a6xx_state->shaders = state_kcalloc(a6xx_state,
629                 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
630
631         if (!a6xx_state->shaders)
632                 return;
633
634         a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
635
636         for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
637                 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
638                         &a6xx_state->shaders[i], dumper);
639 }
640
641 /* Read registers from behind the HLSQ aperture with the crashdumper */
642 static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
643                 struct a6xx_gpu_state *a6xx_state,
644                 const struct a6xx_registers *regs,
645                 struct a6xx_gpu_state_obj *obj,
646                 struct a6xx_crashdumper *dumper)
647
648 {
649         u64 *in = dumper->ptr;
650         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
651         int i, regcount = 0;
652
653         in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
654
655         for (i = 0; i < regs->count; i += 2) {
656                 u32 count = RANGE(regs->registers, i);
657                 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
658                         regs->registers[i] - (regs->val0 >> 2);
659
660                 in += CRASHDUMP_READ(in, offset, count, out);
661
662                 out += count * sizeof(u32);
663                 regcount += count;
664         }
665
666         CRASHDUMP_FINI(in);
667
668         if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
669                 return;
670
671         if (a6xx_crashdumper_run(gpu, dumper))
672                 return;
673
674         obj->handle = regs;
675         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
676                 regcount * sizeof(u32));
677 }
678
679 /* Read a block of registers using the crashdumper */
680 static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
681                 struct a6xx_gpu_state *a6xx_state,
682                 const struct a6xx_registers *regs,
683                 struct a6xx_gpu_state_obj *obj,
684                 struct a6xx_crashdumper *dumper)
685
686 {
687         u64 *in = dumper->ptr;
688         u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
689         int i, regcount = 0;
690
691         /* Some blocks might need to program a selector register first */
692         if (regs->val0)
693                 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
694
695         for (i = 0; i < regs->count; i += 2) {
696                 u32 count = RANGE(regs->registers, i);
697
698                 in += CRASHDUMP_READ(in, regs->registers[i], count, out);
699
700                 out += count * sizeof(u32);
701                 regcount += count;
702         }
703
704         CRASHDUMP_FINI(in);
705
706         if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
707                 return;
708
709         if (a6xx_crashdumper_run(gpu, dumper))
710                 return;
711
712         obj->handle = regs;
713         obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
714                 regcount * sizeof(u32));
715 }
716
717 /* Read a block of registers via AHB */
718 static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
719                 struct a6xx_gpu_state *a6xx_state,
720                 const struct a6xx_registers *regs,
721                 struct a6xx_gpu_state_obj *obj)
722 {
723         int i, regcount = 0, index = 0;
724
725         for (i = 0; i < regs->count; i += 2)
726                 regcount += RANGE(regs->registers, i);
727
728         obj->handle = (const void *) regs;
729         obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
730         if (!obj->data)
731                 return;
732
733         for (i = 0; i < regs->count; i += 2) {
734                 u32 count = RANGE(regs->registers, i);
735                 int j;
736
737                 for (j = 0; j < count; j++)
738                         obj->data[index++] = gpu_read(gpu,
739                                 regs->registers[i] + j);
740         }
741 }
742
743 /* Read a block of GMU registers */
744 static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
745                 struct a6xx_gpu_state *a6xx_state,
746                 const struct a6xx_registers *regs,
747                 struct a6xx_gpu_state_obj *obj,
748                 bool rscc)
749 {
750         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
751         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
752         struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
753         int i, regcount = 0, index = 0;
754
755         for (i = 0; i < regs->count; i += 2)
756                 regcount += RANGE(regs->registers, i);
757
758         obj->handle = (const void *) regs;
759         obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
760         if (!obj->data)
761                 return;
762
763         for (i = 0; i < regs->count; i += 2) {
764                 u32 count = RANGE(regs->registers, i);
765                 int j;
766
767                 for (j = 0; j < count; j++) {
768                         u32 offset = regs->registers[i] + j;
769                         u32 val;
770
771                         if (rscc)
772                                 val = gmu_read_rscc(gmu, offset);
773                         else
774                                 val = gmu_read(gmu, offset);
775
776                         obj->data[index++] = val;
777                 }
778         }
779 }
780
781 static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
782                 struct a6xx_gpu_state *a6xx_state)
783 {
784         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
785         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
786
787         a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
788                 3, sizeof(*a6xx_state->gmu_registers));
789
790         if (!a6xx_state->gmu_registers)
791                 return;
792
793         a6xx_state->nr_gmu_registers = 3;
794
795         /* Get the CX GMU registers from AHB */
796         _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
797                 &a6xx_state->gmu_registers[0], false);
798         _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
799                 &a6xx_state->gmu_registers[1], true);
800
801         if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
802                 return;
803
804         /* Set the fence to ALLOW mode so we can access the registers */
805         gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
806
807         _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
808                 &a6xx_state->gmu_registers[2], false);
809 }
810
811 static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
812                 struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
813 {
814         struct msm_gpu_state_bo *snapshot;
815
816         snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
817         if (!snapshot)
818                 return NULL;
819
820         snapshot->iova = bo->iova;
821         snapshot->size = bo->size;
822         snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
823         if (!snapshot->data)
824                 return NULL;
825
826         memcpy(snapshot->data, bo->virt, bo->size);
827
828         return snapshot;
829 }
830
831 static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
832                                           struct a6xx_gpu_state *a6xx_state)
833 {
834         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
835         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
836         struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
837         unsigned i, j;
838
839         BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
840
841         for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
842                 struct a6xx_hfi_queue *queue = &gmu->queues[i];
843                 for (j = 0; j < HFI_HISTORY_SZ; j++) {
844                         unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
845                         a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
846                 }
847         }
848 }
849
850 #define A6XX_GBIF_REGLIST_SIZE   1
851 static void a6xx_get_registers(struct msm_gpu *gpu,
852                 struct a6xx_gpu_state *a6xx_state,
853                 struct a6xx_crashdumper *dumper)
854 {
855         int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
856                 ARRAY_SIZE(a6xx_reglist) +
857                 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
858         int index = 0;
859         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
860
861         a6xx_state->registers = state_kcalloc(a6xx_state,
862                 count, sizeof(*a6xx_state->registers));
863
864         if (!a6xx_state->registers)
865                 return;
866
867         a6xx_state->nr_registers = count;
868
869         for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
870                 a6xx_get_ahb_gpu_registers(gpu,
871                         a6xx_state, &a6xx_ahb_reglist[i],
872                         &a6xx_state->registers[index++]);
873
874         if (a6xx_has_gbif(adreno_gpu))
875                 a6xx_get_ahb_gpu_registers(gpu,
876                                 a6xx_state, &a6xx_gbif_reglist,
877                                 &a6xx_state->registers[index++]);
878         else
879                 a6xx_get_ahb_gpu_registers(gpu,
880                                 a6xx_state, &a6xx_vbif_reglist,
881                                 &a6xx_state->registers[index++]);
882         if (!dumper) {
883                 /*
884                  * We can't use the crashdumper when the SMMU is stalled,
885                  * because the GPU has no memory access until we resume
886                  * translation (but we don't want to do that until after
887                  * we have captured as much useful GPU state as possible).
888                  * So instead collect registers via the CPU:
889                  */
890                 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
891                         a6xx_get_ahb_gpu_registers(gpu,
892                                 a6xx_state, &a6xx_reglist[i],
893                                 &a6xx_state->registers[index++]);
894                 return;
895         }
896
897         for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
898                 a6xx_get_crashdumper_registers(gpu,
899                         a6xx_state, &a6xx_reglist[i],
900                         &a6xx_state->registers[index++],
901                         dumper);
902
903         for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
904                 a6xx_get_crashdumper_hlsq_registers(gpu,
905                         a6xx_state, &a6xx_hlsq_reglist[i],
906                         &a6xx_state->registers[index++],
907                         dumper);
908 }
909
910 /* Read a block of data from an indexed register pair */
911 static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
912                 struct a6xx_gpu_state *a6xx_state,
913                 const struct a6xx_indexed_registers *indexed,
914                 struct a6xx_gpu_state_obj *obj)
915 {
916         int i;
917
918         obj->handle = (const void *) indexed;
919         obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
920         if (!obj->data)
921                 return;
922
923         /* All the indexed banks start at address 0 */
924         gpu_write(gpu, indexed->addr, 0);
925
926         /* Read the data - each read increments the internal address by 1 */
927         for (i = 0; i < indexed->count; i++)
928                 obj->data[i] = gpu_read(gpu, indexed->data);
929 }
930
931 static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
932                 struct a6xx_gpu_state *a6xx_state)
933 {
934         u32 mempool_size;
935         int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
936         int i;
937
938         a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
939                 sizeof(*a6xx_state->indexed_regs));
940         if (!a6xx_state->indexed_regs)
941                 return;
942
943         for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
944                 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
945                         &a6xx_state->indexed_regs[i]);
946
947         /* Set the CP mempool size to 0 to stabilize it while dumping */
948         mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
949         gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
950
951         /* Get the contents of the CP mempool */
952         a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
953                 &a6xx_state->indexed_regs[i]);
954
955         /*
956          * Offset 0x2000 in the mempool is the size - copy the saved size over
957          * so the data is consistent
958          */
959         a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
960
961         /* Restore the size in the hardware */
962         gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
963
964         a6xx_state->nr_indexed_regs = count;
965 }
966
967 struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
968 {
969         struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
970         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
971         struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
972         struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
973                 GFP_KERNEL);
974         bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
975                         A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
976
977         if (!a6xx_state)
978                 return ERR_PTR(-ENOMEM);
979
980         INIT_LIST_HEAD(&a6xx_state->objs);
981
982         /* Get the generic state from the adreno core */
983         adreno_gpu_state_get(gpu, &a6xx_state->base);
984
985         a6xx_get_gmu_registers(gpu, a6xx_state);
986
987         a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
988         a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
989         a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
990
991         a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
992
993         /* If GX isn't on the rest of the data isn't going to be accessible */
994         if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
995                 return &a6xx_state->base;
996
997         /* Get the banks of indexed registers */
998         a6xx_get_indexed_registers(gpu, a6xx_state);
999
1000         /*
1001          * Try to initialize the crashdumper, if we are not dumping state
1002          * with the SMMU stalled.  The crashdumper needs memory access to
1003          * write out GPU state, so we need to skip this when the SMMU is
1004          * stalled in response to an iova fault
1005          */
1006         if (!stalled && !gpu->needs_hw_init &&
1007             !a6xx_crashdumper_init(gpu, &_dumper)) {
1008                 dumper = &_dumper;
1009         }
1010
1011         a6xx_get_registers(gpu, a6xx_state, dumper);
1012
1013         if (dumper) {
1014                 a6xx_get_shaders(gpu, a6xx_state, dumper);
1015                 a6xx_get_clusters(gpu, a6xx_state, dumper);
1016                 a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1017
1018                 msm_gem_kernel_put(dumper->bo, gpu->aspace);
1019         }
1020
1021         if (snapshot_debugbus)
1022                 a6xx_get_debugbus(gpu, a6xx_state);
1023
1024         a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1025
1026         return  &a6xx_state->base;
1027 }
1028
1029 static void a6xx_gpu_state_destroy(struct kref *kref)
1030 {
1031         struct a6xx_state_memobj *obj, *tmp;
1032         struct msm_gpu_state *state = container_of(kref,
1033                         struct msm_gpu_state, ref);
1034         struct a6xx_gpu_state *a6xx_state = container_of(state,
1035                         struct a6xx_gpu_state, base);
1036
1037         if (a6xx_state->gmu_log)
1038                 kvfree(a6xx_state->gmu_log->data);
1039
1040         if (a6xx_state->gmu_hfi)
1041                 kvfree(a6xx_state->gmu_hfi->data);
1042
1043         list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node)
1044                 kfree(obj);
1045
1046         adreno_gpu_state_destroy(state);
1047         kfree(a6xx_state);
1048 }
1049
1050 int a6xx_gpu_state_put(struct msm_gpu_state *state)
1051 {
1052         if (IS_ERR_OR_NULL(state))
1053                 return 1;
1054
1055         return kref_put(&state->ref, a6xx_gpu_state_destroy);
1056 }
1057
1058 static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1059                 struct drm_printer *p)
1060 {
1061         int i, index = 0;
1062
1063         if (!data)
1064                 return;
1065
1066         for (i = 0; i < count; i += 2) {
1067                 u32 count = RANGE(registers, i);
1068                 u32 offset = registers[i];
1069                 int j;
1070
1071                 for (j = 0; j < count; index++, offset++, j++) {
1072                         if (data[index] == 0xdeafbead)
1073                                 continue;
1074
1075                         drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1076                                 offset << 2, data[index]);
1077                 }
1078         }
1079 }
1080
1081 static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1082 {
1083         char out[ASCII85_BUFSZ];
1084         long i, l, datalen = 0;
1085
1086         for (i = 0; i < len >> 2; i++) {
1087                 if (data[i])
1088                         datalen = (i + 1) << 2;
1089         }
1090
1091         if (datalen == 0)
1092                 return;
1093
1094         drm_puts(p, "    data: !!ascii85 |\n");
1095         drm_puts(p, "      ");
1096
1097
1098         l = ascii85_encode_len(datalen);
1099
1100         for (i = 0; i < l; i++)
1101                 drm_puts(p, ascii85_encode(data[i], out));
1102
1103         drm_puts(p, "\n");
1104 }
1105
1106 static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1107 {
1108         drm_puts(p, fmt);
1109         drm_puts(p, name);
1110         drm_puts(p, "\n");
1111 }
1112
1113 static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1114                 struct drm_printer *p)
1115 {
1116         const struct a6xx_shader_block *block = obj->handle;
1117         int i;
1118
1119         if (!obj->handle)
1120                 return;
1121
1122         print_name(p, "  - type: ", block->name);
1123
1124         for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1125                 drm_printf(p, "    - bank: %d\n", i);
1126                 drm_printf(p, "      size: %d\n", block->size);
1127
1128                 if (!obj->data)
1129                         continue;
1130
1131                 print_ascii85(p, block->size << 2,
1132                         obj->data + (block->size * i));
1133         }
1134 }
1135
1136 static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1137                 struct drm_printer *p)
1138 {
1139         int ctx, index = 0;
1140
1141         for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1142                 int j;
1143
1144                 drm_printf(p, "    - context: %d\n", ctx);
1145
1146                 for (j = 0; j < size; j += 2) {
1147                         u32 count = RANGE(registers, j);
1148                         u32 offset = registers[j];
1149                         int k;
1150
1151                         for (k = 0; k < count; index++, offset++, k++) {
1152                                 if (data[index] == 0xdeafbead)
1153                                         continue;
1154
1155                                 drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1156                                         offset << 2, data[index]);
1157                         }
1158                 }
1159         }
1160 }
1161
1162 static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1163                 struct drm_printer *p)
1164 {
1165         const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1166
1167         if (dbgahb) {
1168                 print_name(p, "  - cluster-name: ", dbgahb->name);
1169                 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1170                         obj->data, p);
1171         }
1172 }
1173
1174 static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1175                 struct drm_printer *p)
1176 {
1177         const struct a6xx_cluster *cluster = obj->handle;
1178
1179         if (cluster) {
1180                 print_name(p, "  - cluster-name: ", cluster->name);
1181                 a6xx_show_cluster_data(cluster->registers, cluster->count,
1182                         obj->data, p);
1183         }
1184 }
1185
1186 static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1187                 struct drm_printer *p)
1188 {
1189         const struct a6xx_indexed_registers *indexed = obj->handle;
1190
1191         if (!indexed)
1192                 return;
1193
1194         print_name(p, "  - regs-name: ", indexed->name);
1195         drm_printf(p, "    dwords: %d\n", indexed->count);
1196
1197         print_ascii85(p, indexed->count << 2, obj->data);
1198 }
1199
1200 static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1201                 u32 *data, struct drm_printer *p)
1202 {
1203         if (block) {
1204                 print_name(p, "  - debugbus-block: ", block->name);
1205
1206                 /*
1207                  * count for regular debugbus data is in quadwords,
1208                  * but print the size in dwords for consistency
1209                  */
1210                 drm_printf(p, "    count: %d\n", block->count << 1);
1211
1212                 print_ascii85(p, block->count << 3, data);
1213         }
1214 }
1215
1216 static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1217                 struct drm_printer *p)
1218 {
1219         int i;
1220
1221         for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1222                 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1223
1224                 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1225         }
1226
1227         if (a6xx_state->vbif_debugbus) {
1228                 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1229
1230                 drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1231                 drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1232
1233                 /* vbif debugbus data is in dwords.  Confusing, huh? */
1234                 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1235         }
1236
1237         for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1238                 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1239
1240                 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1241         }
1242 }
1243
1244 void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1245                 struct drm_printer *p)
1246 {
1247         struct a6xx_gpu_state *a6xx_state = container_of(state,
1248                         struct a6xx_gpu_state, base);
1249         int i;
1250
1251         if (IS_ERR_OR_NULL(state))
1252                 return;
1253
1254         drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1255
1256         adreno_show(gpu, state, p);
1257
1258         drm_puts(p, "gmu-log:\n");
1259         if (a6xx_state->gmu_log) {
1260                 struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1261
1262                 drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1263                 drm_printf(p, "    size: %zu\n", gmu_log->size);
1264                 adreno_show_object(p, &gmu_log->data, gmu_log->size,
1265                                 &gmu_log->encoded);
1266         }
1267
1268         drm_puts(p, "gmu-hfi:\n");
1269         if (a6xx_state->gmu_hfi) {
1270                 struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1271                 unsigned i, j;
1272
1273                 drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
1274                 drm_printf(p, "    size: %zu\n", gmu_hfi->size);
1275                 for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1276                         drm_printf(p, "    queue-history[%u]:", i);
1277                         for (j = 0; j < HFI_HISTORY_SZ; j++) {
1278                                 drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1279                         }
1280                         drm_printf(p, "\n");
1281                 }
1282                 adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1283                                 &gmu_hfi->encoded);
1284         }
1285
1286         drm_puts(p, "gmu-debug:\n");
1287         if (a6xx_state->gmu_debug) {
1288                 struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1289
1290                 drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
1291                 drm_printf(p, "    size: %zu\n", gmu_debug->size);
1292                 adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1293                                 &gmu_debug->encoded);
1294         }
1295
1296         drm_puts(p, "registers:\n");
1297         for (i = 0; i < a6xx_state->nr_registers; i++) {
1298                 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1299                 const struct a6xx_registers *regs = obj->handle;
1300
1301                 if (!obj->handle)
1302                         continue;
1303
1304                 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1305         }
1306
1307         drm_puts(p, "registers-gmu:\n");
1308         for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1309                 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1310                 const struct a6xx_registers *regs = obj->handle;
1311
1312                 if (!obj->handle)
1313                         continue;
1314
1315                 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1316         }
1317
1318         drm_puts(p, "indexed-registers:\n");
1319         for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1320                 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1321
1322         drm_puts(p, "shader-blocks:\n");
1323         for (i = 0; i < a6xx_state->nr_shaders; i++)
1324                 a6xx_show_shader(&a6xx_state->shaders[i], p);
1325
1326         drm_puts(p, "clusters:\n");
1327         for (i = 0; i < a6xx_state->nr_clusters; i++)
1328                 a6xx_show_cluster(&a6xx_state->clusters[i], p);
1329
1330         for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1331                 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1332
1333         drm_puts(p, "debugbus:\n");
1334         a6xx_show_debugbus(a6xx_state, p);
1335 }