GNU Linux-libre 5.19-rc6-gnu
[releases.git] / drivers / gpu / drm / msm / adreno / a4xx_gpu.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2014 The Linux Foundation. All rights reserved.
3  */
4 #include "a4xx_gpu.h"
5
6 #define A4XX_INT0_MASK \
7         (A4XX_INT0_RBBM_AHB_ERROR |        \
8          A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
9          A4XX_INT0_CP_T0_PACKET_IN_IB |    \
10          A4XX_INT0_CP_OPCODE_ERROR |       \
11          A4XX_INT0_CP_RESERVED_BIT_ERROR | \
12          A4XX_INT0_CP_HW_FAULT |           \
13          A4XX_INT0_CP_IB1_INT |            \
14          A4XX_INT0_CP_IB2_INT |            \
15          A4XX_INT0_CP_RB_INT |             \
16          A4XX_INT0_CP_REG_PROTECT_FAULT |  \
17          A4XX_INT0_CP_AHB_ERROR_HALT |     \
18          A4XX_INT0_CACHE_FLUSH_TS |        \
19          A4XX_INT0_UCHE_OOB_ACCESS)
20
21 extern bool hang_debug;
22 static void a4xx_dump(struct msm_gpu *gpu);
23 static bool a4xx_idle(struct msm_gpu *gpu);
24
25 static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
26 {
27         struct msm_ringbuffer *ring = submit->ring;
28         unsigned int i;
29
30         for (i = 0; i < submit->nr_cmds; i++) {
31                 switch (submit->cmd[i].type) {
32                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
33                         /* ignore IB-targets */
34                         break;
35                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
36                         /* ignore if there has not been a ctx switch: */
37                         if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
38                                 break;
39                         fallthrough;
40                 case MSM_SUBMIT_CMD_BUF:
41                         OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2);
42                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
43                         OUT_RING(ring, submit->cmd[i].size);
44                         OUT_PKT2(ring);
45                         break;
46                 }
47         }
48
49         OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
50         OUT_RING(ring, submit->seqno);
51
52         /* Flush HLSQ lazy updates to make sure there is nothing
53          * pending for indirect loads after the timestamp has
54          * passed:
55          */
56         OUT_PKT3(ring, CP_EVENT_WRITE, 1);
57         OUT_RING(ring, HLSQ_FLUSH);
58
59         /* wait for idle before cache flush/interrupt */
60         OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
61         OUT_RING(ring, 0x00000000);
62
63         /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
64         OUT_PKT3(ring, CP_EVENT_WRITE, 3);
65         OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
66         OUT_RING(ring, rbmemptr(ring, fence));
67         OUT_RING(ring, submit->seqno);
68
69         adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
70 }
71
72 /*
73  * a4xx_enable_hwcg() - Program the clock control registers
74  * @device: The adreno device pointer
75  */
76 static void a4xx_enable_hwcg(struct msm_gpu *gpu)
77 {
78         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
79         unsigned int i;
80         for (i = 0; i < 4; i++)
81                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
82         for (i = 0; i < 4; i++)
83                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
84         for (i = 0; i < 4; i++)
85                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
86         for (i = 0; i < 4; i++)
87                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
88         for (i = 0; i < 4; i++)
89                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
90         for (i = 0; i < 4; i++)
91                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
92         for (i = 0; i < 4; i++)
93                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
94         for (i = 0; i < 4; i++)
95                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
96         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
97         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
98         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
99         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
100         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
101         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
102         for (i = 0; i < 4; i++)
103                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
104
105         /* Disable L1 clocking in A420 due to CCU issues with it */
106         for (i = 0; i < 4; i++) {
107                 if (adreno_is_a420(adreno_gpu)) {
108                         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
109                                         0x00002020);
110                 } else {
111                         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
112                                         0x00022020);
113                 }
114         }
115
116         /* No CCU for A405 */
117         if (!adreno_is_a405(adreno_gpu)) {
118                 for (i = 0; i < 4; i++) {
119                         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
120                                         0x00000922);
121                 }
122
123                 for (i = 0; i < 4; i++) {
124                         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
125                                         0x00000000);
126                 }
127
128                 for (i = 0; i < 4; i++) {
129                         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
130                                         0x00000001);
131                 }
132         }
133
134         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
135         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
136         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
137         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
138         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
139         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
140         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
141         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
142         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
143         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
144         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
145         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
146         /* Early A430's have a timing issue with SP/TP power collapse;
147            disabling HW clock gating prevents it. */
148         if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
149                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
150         else
151                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
152         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
153 }
154
155
156 static bool a4xx_me_init(struct msm_gpu *gpu)
157 {
158         struct msm_ringbuffer *ring = gpu->rb[0];
159
160         OUT_PKT3(ring, CP_ME_INIT, 17);
161         OUT_RING(ring, 0x000003f7);
162         OUT_RING(ring, 0x00000000);
163         OUT_RING(ring, 0x00000000);
164         OUT_RING(ring, 0x00000000);
165         OUT_RING(ring, 0x00000080);
166         OUT_RING(ring, 0x00000100);
167         OUT_RING(ring, 0x00000180);
168         OUT_RING(ring, 0x00006600);
169         OUT_RING(ring, 0x00000150);
170         OUT_RING(ring, 0x0000014e);
171         OUT_RING(ring, 0x00000154);
172         OUT_RING(ring, 0x00000001);
173         OUT_RING(ring, 0x00000000);
174         OUT_RING(ring, 0x00000000);
175         OUT_RING(ring, 0x00000000);
176         OUT_RING(ring, 0x00000000);
177         OUT_RING(ring, 0x00000000);
178
179         adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
180         return a4xx_idle(gpu);
181 }
182
183 static int a4xx_hw_init(struct msm_gpu *gpu)
184 {
185         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
186         struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
187         uint32_t *ptr, len;
188         int i, ret;
189
190         if (adreno_is_a405(adreno_gpu)) {
191                 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
192         } else if (adreno_is_a420(adreno_gpu)) {
193                 gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
194                 gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
195                 gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
196                 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
197                 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
198                 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
199                 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
200                 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
201         } else if (adreno_is_a430(adreno_gpu)) {
202                 gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
203                 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
204                 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
205                 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
206                 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
207                 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
208         } else {
209                 BUG();
210         }
211
212         /* Make all blocks contribute to the GPU BUSY perf counter */
213         gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
214
215         /* Tune the hystersis counters for SP and CP idle detection */
216         gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
217         gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
218
219         if (adreno_is_a430(adreno_gpu)) {
220                 gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
221         }
222
223          /* Enable the RBBM error reporting bits */
224         gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
225
226         /* Enable AHB error reporting*/
227         gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
228
229         /* Enable power counters*/
230         gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
231
232         /*
233          * Turn on hang detection - this spews a lot of useful information
234          * into the RBBM registers on a hang:
235          */
236         gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
237                         (1 << 30) | 0xFFFF);
238
239         gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
240                         (unsigned int)(a4xx_gpu->ocmem.base >> 14));
241
242         /* Turn on performance counters: */
243         gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
244
245         /* use the first CP counter for timestamp queries.. userspace may set
246          * this as well but it selects the same counter/countable:
247          */
248         gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
249
250         if (adreno_is_a430(adreno_gpu))
251                 gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
252
253         /* Disable L2 bypass to avoid UCHE out of bounds errors */
254         gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
255         gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
256
257         gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
258                         (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
259
260         /* On A430 enable SP regfile sleep for power savings */
261         /* TODO downstream does this for !420, so maybe applies for 405 too? */
262         if (!adreno_is_a420(adreno_gpu)) {
263                 gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
264                         0x00000441);
265                 gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
266                         0x00000441);
267         }
268
269         a4xx_enable_hwcg(gpu);
270
271         /*
272          * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
273          * due to timing issue with HLSQ_TP_CLK_EN
274          */
275         if (adreno_is_a420(adreno_gpu)) {
276                 unsigned int val;
277                 val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
278                 val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
279                 val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
280                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
281         }
282
283         /* setup access protection: */
284         gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
285
286         /* RBBM registers */
287         gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
288         gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
289         gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
290         gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
291         gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
292         gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
293
294         /* CP registers */
295         gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
296         gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
297
298
299         /* RB registers */
300         gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
301
302         /* HLSQ registers */
303         gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
304
305         /* VPC registers */
306         gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
307
308         /* SMMU registers */
309         gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
310
311         gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
312
313         ret = adreno_hw_init(gpu);
314         if (ret)
315                 return ret;
316
317         /*
318          * Use the default ringbuffer size and block size but disable the RPTR
319          * shadow
320          */
321         gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
322                 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
323
324         /* Set the ringbuffer address */
325         gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
326
327         /* Load PM4: */
328         ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
329         len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
330         DBG("loading PM4 ucode version: %u", ptr[0]);
331         gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
332         for (i = 1; i < len; i++)
333                 gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
334
335         /* Load PFP: */
336         ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
337         len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
338         DBG("loading PFP ucode version: %u", ptr[0]);
339
340         gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
341         for (i = 1; i < len; i++)
342                 gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
343
344         /* clear ME_HALT to start micro engine */
345         gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
346
347         return a4xx_me_init(gpu) ? 0 : -EINVAL;
348 }
349
350 static void a4xx_recover(struct msm_gpu *gpu)
351 {
352         int i;
353
354         adreno_dump_info(gpu);
355
356         for (i = 0; i < 8; i++) {
357                 printk("CP_SCRATCH_REG%d: %u\n", i,
358                         gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
359         }
360
361         /* dump registers before resetting gpu, if enabled: */
362         if (hang_debug)
363                 a4xx_dump(gpu);
364
365         gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
366         gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
367         gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
368         adreno_recover(gpu);
369 }
370
371 static void a4xx_destroy(struct msm_gpu *gpu)
372 {
373         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
374         struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
375
376         DBG("%s", gpu->name);
377
378         adreno_gpu_cleanup(adreno_gpu);
379
380         adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
381
382         kfree(a4xx_gpu);
383 }
384
385 static bool a4xx_idle(struct msm_gpu *gpu)
386 {
387         /* wait for ringbuffer to drain: */
388         if (!adreno_idle(gpu, gpu->rb[0]))
389                 return false;
390
391         /* then wait for GPU to finish: */
392         if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
393                                         A4XX_RBBM_STATUS_GPU_BUSY))) {
394                 DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
395                 /* TODO maybe we need to reset GPU here to recover from hang? */
396                 return false;
397         }
398
399         return true;
400 }
401
402 static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
403 {
404         uint32_t status;
405
406         status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
407         DBG("%s: Int status %08x", gpu->name, status);
408
409         if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
410                 uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
411                 printk("CP | Protected mode error| %s | addr=%x\n",
412                         reg & (1 << 24) ? "WRITE" : "READ",
413                         (reg & 0xFFFFF) >> 2);
414         }
415
416         gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
417
418         msm_gpu_retire(gpu);
419
420         return IRQ_HANDLED;
421 }
422
423 static const unsigned int a4xx_registers[] = {
424         /* RBBM */
425         0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
426         0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
427         0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
428         /* CP */
429         0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
430         0x0578, 0x058F,
431         /* VSC */
432         0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
433         /* GRAS */
434         0x0C80, 0x0C81, 0x0C88, 0x0C8F,
435         /* RB */
436         0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
437         /* PC */
438         0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
439         /* VFD */
440         0x0E40, 0x0E4A,
441         /* VPC */
442         0x0E60, 0x0E61, 0x0E63, 0x0E68,
443         /* UCHE */
444         0x0E80, 0x0E84, 0x0E88, 0x0E95,
445         /* VMIDMT */
446         0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
447         0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
448         0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
449         0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
450         0x1380, 0x1380,
451         /* GRAS CTX 0 */
452         0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
453         /* PC CTX 0 */
454         0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
455         /* VFD CTX 0 */
456         0x2200, 0x2204, 0x2208, 0x22A9,
457         /* GRAS CTX 1 */
458         0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
459         /* PC CTX 1 */
460         0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
461         /* VFD CTX 1 */
462         0x2600, 0x2604, 0x2608, 0x26A9,
463         /* XPU */
464         0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
465         0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
466         0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
467         /* VBIF */
468         0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
469         0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
470         0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
471         0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
472         0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
473         0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
474         0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
475         0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
476         0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
477         0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
478         0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
479         0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
480         0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
481         0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
482         0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
483         0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
484         0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
485         0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
486         0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
487         0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
488         0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
489         0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
490         0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
491         0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
492         0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
493         0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
494         0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
495         0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
496         0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
497         0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
498         0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
499         0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
500         0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
501         0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
502         ~0 /* sentinel */
503 };
504
505 static const unsigned int a405_registers[] = {
506         /* RBBM */
507         0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
508         0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
509         0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
510         /* CP */
511         0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
512         0x0578, 0x058F,
513         /* VSC */
514         0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
515         /* GRAS */
516         0x0C80, 0x0C81, 0x0C88, 0x0C8F,
517         /* RB */
518         0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
519         /* PC */
520         0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
521         /* VFD */
522         0x0E40, 0x0E4A,
523         /* VPC */
524         0x0E60, 0x0E61, 0x0E63, 0x0E68,
525         /* UCHE */
526         0x0E80, 0x0E84, 0x0E88, 0x0E95,
527         /* GRAS CTX 0 */
528         0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
529         /* PC CTX 0 */
530         0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
531         /* VFD CTX 0 */
532         0x2200, 0x2204, 0x2208, 0x22A9,
533         /* GRAS CTX 1 */
534         0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
535         /* PC CTX 1 */
536         0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
537         /* VFD CTX 1 */
538         0x2600, 0x2604, 0x2608, 0x26A9,
539         /* VBIF version 0x20050000*/
540         0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
541         0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
542         0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
543         0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
544         0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
545         0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
546         0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
547         0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
548         ~0 /* sentinel */
549 };
550
551 static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
552 {
553         struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
554
555         if (!state)
556                 return ERR_PTR(-ENOMEM);
557
558         adreno_gpu_state_get(gpu, state);
559
560         state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
561
562         return state;
563 }
564
565 static void a4xx_dump(struct msm_gpu *gpu)
566 {
567         printk("status:   %08x\n",
568                         gpu_read(gpu, REG_A4XX_RBBM_STATUS));
569         adreno_dump(gpu);
570 }
571
572 static int a4xx_pm_resume(struct msm_gpu *gpu) {
573         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
574         int ret;
575
576         ret = msm_gpu_pm_resume(gpu);
577         if (ret)
578                 return ret;
579
580         if (adreno_is_a430(adreno_gpu)) {
581                 unsigned int reg;
582                 /* Set the default register values; set SW_COLLAPSE to 0 */
583                 gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
584                 do {
585                         udelay(5);
586                         reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
587                 } while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
588         }
589         return 0;
590 }
591
592 static int a4xx_pm_suspend(struct msm_gpu *gpu) {
593         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
594         int ret;
595
596         ret = msm_gpu_pm_suspend(gpu);
597         if (ret)
598                 return ret;
599
600         if (adreno_is_a430(adreno_gpu)) {
601                 /* Set the default register values; set SW_COLLAPSE to 1 */
602                 gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
603         }
604         return 0;
605 }
606
607 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
608 {
609         *value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
610                 REG_A4XX_RBBM_PERFCTR_CP_0_HI);
611
612         return 0;
613 }
614
615 static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
616 {
617         ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR);
618         return ring->memptrs->rptr;
619 }
620
621 static const struct adreno_gpu_funcs funcs = {
622         .base = {
623                 .get_param = adreno_get_param,
624                 .set_param = adreno_set_param,
625                 .hw_init = a4xx_hw_init,
626                 .pm_suspend = a4xx_pm_suspend,
627                 .pm_resume = a4xx_pm_resume,
628                 .recover = a4xx_recover,
629                 .submit = a4xx_submit,
630                 .active_ring = adreno_active_ring,
631                 .irq = a4xx_irq,
632                 .destroy = a4xx_destroy,
633 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
634                 .show = adreno_show,
635 #endif
636                 .gpu_state_get = a4xx_gpu_state_get,
637                 .gpu_state_put = adreno_gpu_state_put,
638                 .create_address_space = adreno_iommu_create_address_space,
639                 .get_rptr = a4xx_get_rptr,
640         },
641         .get_timestamp = a4xx_get_timestamp,
642 };
643
644 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
645 {
646         struct a4xx_gpu *a4xx_gpu = NULL;
647         struct adreno_gpu *adreno_gpu;
648         struct msm_gpu *gpu;
649         struct msm_drm_private *priv = dev->dev_private;
650         struct platform_device *pdev = priv->gpu_pdev;
651         struct icc_path *ocmem_icc_path;
652         struct icc_path *icc_path;
653         int ret;
654
655         if (!pdev) {
656                 DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
657                 ret = -ENXIO;
658                 goto fail;
659         }
660
661         a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
662         if (!a4xx_gpu) {
663                 ret = -ENOMEM;
664                 goto fail;
665         }
666
667         adreno_gpu = &a4xx_gpu->base;
668         gpu = &adreno_gpu->base;
669
670         gpu->perfcntrs = NULL;
671         gpu->num_perfcntrs = 0;
672
673         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
674         if (ret)
675                 goto fail;
676
677         adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
678                                                              a4xx_registers;
679
680         /* if needed, allocate gmem: */
681         ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
682                                     &a4xx_gpu->ocmem);
683         if (ret)
684                 goto fail;
685
686         if (!gpu->aspace) {
687                 /* TODO we think it is possible to configure the GPU to
688                  * restrict access to VRAM carveout.  But the required
689                  * registers are unknown.  For now just bail out and
690                  * limp along with just modesetting.  If it turns out
691                  * to not be possible to restrict access, then we must
692                  * implement a cmdstream validator.
693                  */
694                 DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
695                 if (!allow_vram_carveout) {
696                         ret = -ENXIO;
697                         goto fail;
698                 }
699         }
700
701         icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
702         if (IS_ERR(icc_path)) {
703                 ret = PTR_ERR(icc_path);
704                 goto fail;
705         }
706
707         ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
708         if (IS_ERR(ocmem_icc_path)) {
709                 ret = PTR_ERR(ocmem_icc_path);
710                 /* allow -ENODATA, ocmem icc is optional */
711                 if (ret != -ENODATA)
712                         goto fail;
713                 ocmem_icc_path = NULL;
714         }
715
716         /*
717          * Set the ICC path to maximum speed for now by multiplying the fastest
718          * frequency by the bus width (8). We'll want to scale this later on to
719          * improve battery life.
720          */
721         icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
722         icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
723
724         return gpu;
725
726 fail:
727         if (a4xx_gpu)
728                 a4xx_destroy(&a4xx_gpu->base.base);
729
730         return ERR_PTR(ret);
731 }