GNU Linux-libre 5.19-rc6-gnu
[releases.git] / drivers / gpu / drm / msm / adreno / a3xx_gpu.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2013 Red Hat
4  * Author: Rob Clark <robdclark@gmail.com>
5  *
6  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
7  */
8
9 #include "a3xx_gpu.h"
10
11 #define A3XX_INT0_MASK \
12         (A3XX_INT0_RBBM_AHB_ERROR |        \
13          A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
14          A3XX_INT0_CP_T0_PACKET_IN_IB |    \
15          A3XX_INT0_CP_OPCODE_ERROR |       \
16          A3XX_INT0_CP_RESERVED_BIT_ERROR | \
17          A3XX_INT0_CP_HW_FAULT |           \
18          A3XX_INT0_CP_IB1_INT |            \
19          A3XX_INT0_CP_IB2_INT |            \
20          A3XX_INT0_CP_RB_INT |             \
21          A3XX_INT0_CP_REG_PROTECT_FAULT |  \
22          A3XX_INT0_CP_AHB_ERROR_HALT |     \
23          A3XX_INT0_CACHE_FLUSH_TS |        \
24          A3XX_INT0_UCHE_OOB_ACCESS)
25
26 extern bool hang_debug;
27
28 static void a3xx_dump(struct msm_gpu *gpu);
29 static bool a3xx_idle(struct msm_gpu *gpu);
30
31 static void a3xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
32 {
33         struct msm_ringbuffer *ring = submit->ring;
34         unsigned int i;
35
36         for (i = 0; i < submit->nr_cmds; i++) {
37                 switch (submit->cmd[i].type) {
38                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
39                         /* ignore IB-targets */
40                         break;
41                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
42                         /* ignore if there has not been a ctx switch: */
43                         if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
44                                 break;
45                         fallthrough;
46                 case MSM_SUBMIT_CMD_BUF:
47                         OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
48                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
49                         OUT_RING(ring, submit->cmd[i].size);
50                         OUT_PKT2(ring);
51                         break;
52                 }
53         }
54
55         OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
56         OUT_RING(ring, submit->seqno);
57
58         /* Flush HLSQ lazy updates to make sure there is nothing
59          * pending for indirect loads after the timestamp has
60          * passed:
61          */
62         OUT_PKT3(ring, CP_EVENT_WRITE, 1);
63         OUT_RING(ring, HLSQ_FLUSH);
64
65         /* wait for idle before cache flush/interrupt */
66         OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
67         OUT_RING(ring, 0x00000000);
68
69         /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
70         OUT_PKT3(ring, CP_EVENT_WRITE, 3);
71         OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
72         OUT_RING(ring, rbmemptr(ring, fence));
73         OUT_RING(ring, submit->seqno);
74
75 #if 0
76         /* Dummy set-constant to trigger context rollover */
77         OUT_PKT3(ring, CP_SET_CONSTANT, 2);
78         OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
79         OUT_RING(ring, 0x00000000);
80 #endif
81
82         adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
83 }
84
85 static bool a3xx_me_init(struct msm_gpu *gpu)
86 {
87         struct msm_ringbuffer *ring = gpu->rb[0];
88
89         OUT_PKT3(ring, CP_ME_INIT, 17);
90         OUT_RING(ring, 0x000003f7);
91         OUT_RING(ring, 0x00000000);
92         OUT_RING(ring, 0x00000000);
93         OUT_RING(ring, 0x00000000);
94         OUT_RING(ring, 0x00000080);
95         OUT_RING(ring, 0x00000100);
96         OUT_RING(ring, 0x00000180);
97         OUT_RING(ring, 0x00006600);
98         OUT_RING(ring, 0x00000150);
99         OUT_RING(ring, 0x0000014e);
100         OUT_RING(ring, 0x00000154);
101         OUT_RING(ring, 0x00000001);
102         OUT_RING(ring, 0x00000000);
103         OUT_RING(ring, 0x00000000);
104         OUT_RING(ring, 0x00000000);
105         OUT_RING(ring, 0x00000000);
106         OUT_RING(ring, 0x00000000);
107
108         adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
109         return a3xx_idle(gpu);
110 }
111
112 static int a3xx_hw_init(struct msm_gpu *gpu)
113 {
114         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
115         struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
116         uint32_t *ptr, len;
117         int i, ret;
118
119         DBG("%s", gpu->name);
120
121         if (adreno_is_a305(adreno_gpu)) {
122                 /* Set up 16 deep read/write request queues: */
123                 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
124                 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
125                 gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
126                 gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
127                 gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
128                 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
129                 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
130                 /* Enable WR-REQ: */
131                 gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
132                 /* Set up round robin arbitration between both AXI ports: */
133                 gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
134                 /* Set up AOOO: */
135                 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
136                 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
137         } else if (adreno_is_a306(adreno_gpu)) {
138                 gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
139                 gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
140                 gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
141         } else if (adreno_is_a320(adreno_gpu)) {
142                 /* Set up 16 deep read/write request queues: */
143                 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
144                 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
145                 gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
146                 gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
147                 gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
148                 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
149                 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
150                 /* Enable WR-REQ: */
151                 gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
152                 /* Set up round robin arbitration between both AXI ports: */
153                 gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
154                 /* Set up AOOO: */
155                 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
156                 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
157                 /* Enable 1K sort: */
158                 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
159                 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
160
161         } else if (adreno_is_a330v2(adreno_gpu)) {
162                 /*
163                  * Most of the VBIF registers on 8974v2 have the correct
164                  * values at power on, so we won't modify those if we don't
165                  * need to
166                  */
167                 /* Enable 1k sort: */
168                 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
169                 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
170                 /* Enable WR-REQ: */
171                 gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
172                 gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
173                 /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
174                 gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
175
176         } else if (adreno_is_a330(adreno_gpu)) {
177                 /* Set up 16 deep read/write request queues: */
178                 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
179                 gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
180                 gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
181                 gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
182                 gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
183                 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
184                 gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
185                 /* Enable WR-REQ: */
186                 gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
187                 /* Set up round robin arbitration between both AXI ports: */
188                 gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
189                 /* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
190                 gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
191                 /* Set up AOOO: */
192                 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
193                 gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
194                 /* Enable 1K sort: */
195                 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
196                 gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
197                 /* Disable VBIF clock gating. This is to enable AXI running
198                  * higher frequency than GPU:
199                  */
200                 gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
201
202         } else {
203                 BUG();
204         }
205
206         /* Make all blocks contribute to the GPU BUSY perf counter: */
207         gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
208
209         /* Tune the hystersis counters for SP and CP idle detection: */
210         gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
211         gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
212
213         /* Enable the RBBM error reporting bits.  This lets us get
214          * useful information on failure:
215          */
216         gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
217
218         /* Enable AHB error reporting: */
219         gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
220
221         /* Turn on the power counters: */
222         gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
223
224         /* Turn on hang detection - this spews a lot of useful information
225          * into the RBBM registers on a hang:
226          */
227         gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
228
229         /* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
230         gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
231
232         /* Enable Clock gating: */
233         if (adreno_is_a306(adreno_gpu))
234                 gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
235         else if (adreno_is_a320(adreno_gpu))
236                 gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
237         else if (adreno_is_a330v2(adreno_gpu))
238                 gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
239         else if (adreno_is_a330(adreno_gpu))
240                 gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
241
242         if (adreno_is_a330v2(adreno_gpu))
243                 gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
244         else if (adreno_is_a330(adreno_gpu))
245                 gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
246
247         /* Set the OCMEM base address for A330, etc */
248         if (a3xx_gpu->ocmem.hdl) {
249                 gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
250                         (unsigned int)(a3xx_gpu->ocmem.base >> 14));
251         }
252
253         /* Turn on performance counters: */
254         gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
255
256         /* Enable the perfcntrs that we use.. */
257         for (i = 0; i < gpu->num_perfcntrs; i++) {
258                 const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
259                 gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
260         }
261
262         gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
263
264         ret = adreno_hw_init(gpu);
265         if (ret)
266                 return ret;
267
268         /*
269          * Use the default ringbuffer size and block size but disable the RPTR
270          * shadow
271          */
272         gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
273                 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
274
275         /* Set the ringbuffer address */
276         gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
277
278         /* setup access protection: */
279         gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
280
281         /* RBBM registers */
282         gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
283         gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
284         gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
285         gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
286         gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
287         gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
288
289         /* CP registers */
290         gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
291         gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
292         gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
293         gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
294         gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
295
296         /* RB registers */
297         gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
298
299         /* VBIF registers */
300         gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
301
302         /* NOTE: PM4/micro-engine firmware registers look to be the same
303          * for a2xx and a3xx.. we could possibly push that part down to
304          * adreno_gpu base class.  Or push both PM4 and PFP but
305          * parameterize the pfp ucode addr/data registers..
306          */
307
308         /* Load PM4: */
309         ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
310         len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
311         DBG("loading PM4 ucode version: %x", ptr[1]);
312
313         gpu_write(gpu, REG_AXXX_CP_DEBUG,
314                         AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
315                         AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
316         gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
317         for (i = 1; i < len; i++)
318                 gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
319
320         /* Load PFP: */
321         ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
322         len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
323         DBG("loading PFP ucode version: %x", ptr[5]);
324
325         gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
326         for (i = 1; i < len; i++)
327                 gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
328
329         /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
330         if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
331                         adreno_is_a320(adreno_gpu)) {
332                 gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
333                                 AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
334                                 AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
335                                 AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
336         } else if (adreno_is_a330(adreno_gpu)) {
337                 /* NOTE: this (value take from downstream android driver)
338                  * includes some bits outside of the known bitfields.  But
339                  * A330 has this "MERCIU queue" thing too, which might
340                  * explain a new bitfield or reshuffling:
341                  */
342                 gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
343         }
344
345         /* clear ME_HALT to start micro engine */
346         gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
347
348         return a3xx_me_init(gpu) ? 0 : -EINVAL;
349 }
350
351 static void a3xx_recover(struct msm_gpu *gpu)
352 {
353         int i;
354
355         adreno_dump_info(gpu);
356
357         for (i = 0; i < 8; i++) {
358                 printk("CP_SCRATCH_REG%d: %u\n", i,
359                         gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
360         }
361
362         /* dump registers before resetting gpu, if enabled: */
363         if (hang_debug)
364                 a3xx_dump(gpu);
365
366         gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
367         gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
368         gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
369         adreno_recover(gpu);
370 }
371
372 static void a3xx_destroy(struct msm_gpu *gpu)
373 {
374         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
375         struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
376
377         DBG("%s", gpu->name);
378
379         adreno_gpu_cleanup(adreno_gpu);
380
381         adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem);
382
383         kfree(a3xx_gpu);
384 }
385
386 static bool a3xx_idle(struct msm_gpu *gpu)
387 {
388         /* wait for ringbuffer to drain: */
389         if (!adreno_idle(gpu, gpu->rb[0]))
390                 return false;
391
392         /* then wait for GPU to finish: */
393         if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
394                         A3XX_RBBM_STATUS_GPU_BUSY))) {
395                 DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
396
397                 /* TODO maybe we need to reset GPU here to recover from hang? */
398                 return false;
399         }
400
401         return true;
402 }
403
404 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
405 {
406         uint32_t status;
407
408         status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
409         DBG("%s: %08x", gpu->name, status);
410
411         // TODO
412
413         gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
414
415         msm_gpu_retire(gpu);
416
417         return IRQ_HANDLED;
418 }
419
420 static const unsigned int a3xx_registers[] = {
421         0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
422         0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
423         0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
424         0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
425         0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
426         0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
427         0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
428         0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
429         0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
430         0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
431         0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
432         0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
433         0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
434         0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
435         0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
436         0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
437         0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
438         0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
439         0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
440         0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
441         0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
442         0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
443         0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
444         0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444,
445         0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470,
446         0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3,
447         0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e,
448         0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea,
449         0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617,
450         0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0,
451         0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9,
452         0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
453         0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d,
454         0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f,
455         ~0   /* sentinel */
456 };
457
458 /* would be nice to not have to duplicate the _show() stuff with printk(): */
459 static void a3xx_dump(struct msm_gpu *gpu)
460 {
461         printk("status:   %08x\n",
462                         gpu_read(gpu, REG_A3XX_RBBM_STATUS));
463         adreno_dump(gpu);
464 }
465
466 static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
467 {
468         struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
469
470         if (!state)
471                 return ERR_PTR(-ENOMEM);
472
473         adreno_gpu_state_get(gpu, state);
474
475         state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
476
477         return state;
478 }
479
480 static u32 a3xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
481 {
482         ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR);
483         return ring->memptrs->rptr;
484 }
485
486 static const struct adreno_gpu_funcs funcs = {
487         .base = {
488                 .get_param = adreno_get_param,
489                 .set_param = adreno_set_param,
490                 .hw_init = a3xx_hw_init,
491                 .pm_suspend = msm_gpu_pm_suspend,
492                 .pm_resume = msm_gpu_pm_resume,
493                 .recover = a3xx_recover,
494                 .submit = a3xx_submit,
495                 .active_ring = adreno_active_ring,
496                 .irq = a3xx_irq,
497                 .destroy = a3xx_destroy,
498 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
499                 .show = adreno_show,
500 #endif
501                 .gpu_state_get = a3xx_gpu_state_get,
502                 .gpu_state_put = adreno_gpu_state_put,
503                 .create_address_space = adreno_iommu_create_address_space,
504                 .get_rptr = a3xx_get_rptr,
505         },
506 };
507
508 static const struct msm_gpu_perfcntr perfcntrs[] = {
509         { REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
510                         SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
511         { REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
512                         SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
513 };
514
515 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
516 {
517         struct a3xx_gpu *a3xx_gpu = NULL;
518         struct adreno_gpu *adreno_gpu;
519         struct msm_gpu *gpu;
520         struct msm_drm_private *priv = dev->dev_private;
521         struct platform_device *pdev = priv->gpu_pdev;
522         struct icc_path *ocmem_icc_path;
523         struct icc_path *icc_path;
524         int ret;
525
526         if (!pdev) {
527                 DRM_DEV_ERROR(dev->dev, "no a3xx device\n");
528                 ret = -ENXIO;
529                 goto fail;
530         }
531
532         a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
533         if (!a3xx_gpu) {
534                 ret = -ENOMEM;
535                 goto fail;
536         }
537
538         adreno_gpu = &a3xx_gpu->base;
539         gpu = &adreno_gpu->base;
540
541         gpu->perfcntrs = perfcntrs;
542         gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
543
544         adreno_gpu->registers = a3xx_registers;
545
546         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
547         if (ret)
548                 goto fail;
549
550         /* if needed, allocate gmem: */
551         if (adreno_is_a330(adreno_gpu)) {
552                 ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev,
553                                             adreno_gpu, &a3xx_gpu->ocmem);
554                 if (ret)
555                         goto fail;
556         }
557
558         if (!gpu->aspace) {
559                 /* TODO we think it is possible to configure the GPU to
560                  * restrict access to VRAM carveout.  But the required
561                  * registers are unknown.  For now just bail out and
562                  * limp along with just modesetting.  If it turns out
563                  * to not be possible to restrict access, then we must
564                  * implement a cmdstream validator.
565                  */
566                 DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
567                 if (!allow_vram_carveout) {
568                         ret = -ENXIO;
569                         goto fail;
570                 }
571         }
572
573         icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
574         if (IS_ERR(icc_path)) {
575                 ret = PTR_ERR(icc_path);
576                 goto fail;
577         }
578
579         ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
580         if (IS_ERR(ocmem_icc_path)) {
581                 ret = PTR_ERR(ocmem_icc_path);
582                 /* allow -ENODATA, ocmem icc is optional */
583                 if (ret != -ENODATA)
584                         goto fail;
585                 ocmem_icc_path = NULL;
586         }
587
588
589         /*
590          * Set the ICC path to maximum speed for now by multiplying the fastest
591          * frequency by the bus width (8). We'll want to scale this later on to
592          * improve battery life.
593          */
594         icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
595         icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
596
597         return gpu;
598
599 fail:
600         if (a3xx_gpu)
601                 a3xx_destroy(&a3xx_gpu->base.base);
602
603         return ERR_PTR(ret);
604 }