1 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
14 #include <linux/kernel.h>
15 #include <linux/types.h>
16 #include <linux/cpumask.h>
17 #include <linux/qcom_scm.h>
18 #include <linux/dma-mapping.h>
19 #include <linux/of_address.h>
20 #include <linux/soc/qcom/mdt_loader.h>
21 #include <linux/pm_opp.h>
22 #include <linux/nvmem-consumer.h>
23 #include <linux/iopoll.h>
24 #include <linux/slab.h>
29 extern bool hang_debug;
30 static void a5xx_dump(struct msm_gpu *gpu);
34 static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
36 struct device *dev = &gpu->pdev->dev;
37 const struct firmware *fw;
38 struct device_node *np, *mem_np;
42 void *mem_region = NULL;
45 if (!IS_ENABLED(CONFIG_ARCH_QCOM))
48 np = of_get_child_by_name(dev->of_node, "zap-shader");
52 mem_np = of_parse_phandle(np, "memory-region", 0);
57 ret = of_address_to_resource(mem_np, 0, &r);
63 mem_size = resource_size(&r);
65 /* Request the MDT file for the firmware */
66 fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
68 DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
72 /* Figure out how much memory we need */
73 mem_size = qcom_mdt_get_size(fw);
79 /* Allocate memory for the firmware image */
80 mem_region = memremap(mem_phys, mem_size, MEMREMAP_WC);
87 * Load the rest of the MDT
89 * Note that we could be dealing with two different paths, since
90 * with upstream linux-firmware it would be in a qcom/ subdir..
91 * adreno_request_fw() handles this, but qcom_mdt_load() does
92 * not. But since we've already gotten thru adreno_request_fw()
93 * we know which of the two cases it is:
95 if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
96 ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
97 mem_region, mem_phys, mem_size, NULL);
101 newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname);
103 ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
104 mem_region, mem_phys, mem_size, NULL);
110 /* Send the image to the secure world */
111 ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
113 DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
117 memunmap(mem_region);
119 release_firmware(fw);
124 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
126 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
127 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
131 spin_lock_irqsave(&ring->lock, flags);
133 /* Copy the shadow to the actual register */
134 ring->cur = ring->next;
136 /* Make sure to wrap wptr if we need to */
137 wptr = get_wptr(ring);
139 spin_unlock_irqrestore(&ring->lock, flags);
141 /* Make sure everything is posted before making a decision */
144 /* Update HW if this is the current ring and we are not in preempt */
145 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
146 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
149 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
150 struct msm_file_private *ctx)
152 struct msm_drm_private *priv = gpu->dev->dev_private;
153 struct msm_ringbuffer *ring = submit->ring;
154 struct msm_gem_object *obj;
155 uint32_t *ptr, dwords;
158 for (i = 0; i < submit->nr_cmds; i++) {
159 switch (submit->cmd[i].type) {
160 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
162 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
163 if (priv->lastctx == ctx)
165 case MSM_SUBMIT_CMD_BUF:
166 /* copy commands into RB: */
167 obj = submit->bos[submit->cmd[i].idx].obj;
168 dwords = submit->cmd[i].size;
170 ptr = msm_gem_get_vaddr(&obj->base);
172 /* _get_vaddr() shouldn't fail at this point,
173 * since we've already mapped it once in
179 for (i = 0; i < dwords; i++) {
180 /* normally the OUT_PKTn() would wait
181 * for space for the packet. But since
182 * we just OUT_RING() the whole thing,
183 * need to call adreno_wait_ring()
186 adreno_wait_ring(ring, 1);
187 OUT_RING(ring, ptr[i]);
190 msm_gem_put_vaddr(&obj->base);
196 a5xx_flush(gpu, ring);
197 a5xx_preempt_trigger(gpu);
199 /* we might not necessarily have a cmd from userspace to
200 * trigger an event to know that submit has completed, so
203 a5xx_idle(gpu, ring);
204 ring->memptrs->fence = submit->seqno;
208 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
209 struct msm_file_private *ctx)
211 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
212 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
213 struct msm_drm_private *priv = gpu->dev->dev_private;
214 struct msm_ringbuffer *ring = submit->ring;
215 unsigned int i, ibs = 0;
217 if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
218 priv->lastctx = NULL;
219 a5xx_submit_in_rb(gpu, submit, ctx);
223 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
224 OUT_RING(ring, 0x02);
226 /* Turn off protected mode to write to special registers */
227 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
230 /* Set the save preemption record for the ring/command */
231 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
232 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
233 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
235 /* Turn back on protected mode */
236 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
239 /* Enable local preemption for finegrain preemption */
240 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
241 OUT_RING(ring, 0x02);
243 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
244 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
245 OUT_RING(ring, 0x02);
247 /* Submit the commands */
248 for (i = 0; i < submit->nr_cmds; i++) {
249 switch (submit->cmd[i].type) {
250 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
252 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
253 if (priv->lastctx == ctx)
255 case MSM_SUBMIT_CMD_BUF:
256 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
257 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
258 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
259 OUT_RING(ring, submit->cmd[i].size);
266 * Write the render mode to NULL (0) to indicate to the CP that the IBs
267 * are done rendering - otherwise a lucky preemption would start
268 * replaying from the last checkpoint
270 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
277 /* Turn off IB level preemptions */
278 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
279 OUT_RING(ring, 0x01);
281 /* Write the fence to the scratch register */
282 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
283 OUT_RING(ring, submit->seqno);
286 * Execute a CACHE_FLUSH_TS event. This will ensure that the
287 * timestamp is written to the memory and then triggers the interrupt
289 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
290 OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
291 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
292 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
293 OUT_RING(ring, submit->seqno);
295 /* Yield the floor on command completion */
296 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
298 * If dword[2:1] are non zero, they specify an address for the CP to
299 * write the value of dword[3] to on preemption complete. Write 0 to
302 OUT_RING(ring, 0x00);
303 OUT_RING(ring, 0x00);
304 /* Data value - not used if the address above is 0 */
305 OUT_RING(ring, 0x01);
306 /* Set bit 0 to trigger an interrupt on preempt complete */
307 OUT_RING(ring, 0x01);
309 a5xx_flush(gpu, ring);
311 /* Check to see if we need to start preemption */
312 a5xx_preempt_trigger(gpu);
315 static const struct {
319 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
320 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
321 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
322 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
323 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
324 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
325 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
326 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
327 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
328 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
329 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
330 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
331 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
332 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
333 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
334 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
335 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
336 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
337 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
338 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
339 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
340 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
341 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
342 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
343 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
344 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
345 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
346 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
347 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
348 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
349 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
350 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
351 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
352 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
353 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
354 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
355 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
356 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
357 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
358 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
359 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
360 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
361 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
362 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
363 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
364 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
365 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
366 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
367 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
368 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
369 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
370 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
371 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
372 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
373 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
374 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
375 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
376 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
377 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
378 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
379 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
380 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
381 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
382 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
383 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
384 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
385 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
386 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
387 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
388 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
389 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
390 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
391 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
392 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
393 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
394 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
395 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
396 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
397 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
398 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
399 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
400 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
401 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
402 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
403 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
404 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
405 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
406 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
407 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
408 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
409 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
410 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
413 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
417 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
418 gpu_write(gpu, a5xx_hwcg[i].offset,
419 state ? a5xx_hwcg[i].value : 0);
421 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
422 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
425 static int a5xx_me_init(struct msm_gpu *gpu)
427 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
428 struct msm_ringbuffer *ring = gpu->rb[0];
430 OUT_PKT7(ring, CP_ME_INIT, 8);
432 OUT_RING(ring, 0x0000002F);
434 /* Enable multiple hardware contexts */
435 OUT_RING(ring, 0x00000003);
437 /* Enable error detection */
438 OUT_RING(ring, 0x20000000);
440 /* Don't enable header dump */
441 OUT_RING(ring, 0x00000000);
442 OUT_RING(ring, 0x00000000);
444 /* Specify workarounds for various microcode issues */
445 if (adreno_is_a530(adreno_gpu)) {
446 /* Workaround for token end syncs
447 * Force a WFI after every direct-render 3D mode draw and every
450 OUT_RING(ring, 0x0000000B);
452 /* No workarounds enabled */
453 OUT_RING(ring, 0x00000000);
456 OUT_RING(ring, 0x00000000);
457 OUT_RING(ring, 0x00000000);
459 gpu->funcs->flush(gpu, ring);
460 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
463 static int a5xx_preempt_start(struct msm_gpu *gpu)
465 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
466 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
467 struct msm_ringbuffer *ring = gpu->rb[0];
469 if (gpu->nr_rings == 1)
472 /* Turn off protected mode to write to special registers */
473 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
476 /* Set the save preemption record for the ring/command */
477 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
478 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
479 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
481 /* Turn back on protected mode */
482 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
485 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
486 OUT_RING(ring, 0x00);
488 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
489 OUT_RING(ring, 0x01);
491 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
492 OUT_RING(ring, 0x01);
494 /* Yield the floor on command completion */
495 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
496 OUT_RING(ring, 0x00);
497 OUT_RING(ring, 0x00);
498 OUT_RING(ring, 0x01);
499 OUT_RING(ring, 0x01);
501 gpu->funcs->flush(gpu, ring);
503 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
506 static int a5xx_ucode_init(struct msm_gpu *gpu)
508 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
509 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
512 if (!a5xx_gpu->pm4_bo) {
513 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
514 adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
516 if (IS_ERR(a5xx_gpu->pm4_bo)) {
517 ret = PTR_ERR(a5xx_gpu->pm4_bo);
518 a5xx_gpu->pm4_bo = NULL;
519 dev_err(gpu->dev->dev, "could not allocate PM4: %d\n",
525 if (!a5xx_gpu->pfp_bo) {
526 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
527 adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
529 if (IS_ERR(a5xx_gpu->pfp_bo)) {
530 ret = PTR_ERR(a5xx_gpu->pfp_bo);
531 a5xx_gpu->pfp_bo = NULL;
532 dev_err(gpu->dev->dev, "could not allocate PFP: %d\n",
538 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
539 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
541 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
542 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
547 #define SCM_GPU_ZAP_SHADER_RESUME 0
549 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
553 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
555 DRM_ERROR("%s: zap-shader resume failed: %d\n",
561 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
564 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
565 struct platform_device *pdev = gpu->pdev;
569 * If the zap shader is already loaded into memory we just need to kick
570 * the remote processor to reinitialize it
573 return a5xx_zap_shader_resume(gpu);
575 /* We need SCM to be able to load the firmware */
576 if (!qcom_scm_is_available()) {
577 DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
578 return -EPROBE_DEFER;
581 /* Each GPU has a target specific zap shader firmware name to use */
582 if (!adreno_gpu->info->zapfw) {
583 DRM_DEV_ERROR(&pdev->dev,
584 "Zap shader firmware file not specified for this target\n");
588 ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
595 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
596 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
597 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
598 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
599 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
600 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
601 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
602 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
603 A5XX_RBBM_INT_0_MASK_CP_SW | \
604 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
605 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
606 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
608 static int a5xx_hw_init(struct msm_gpu *gpu)
610 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
613 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
615 /* Make all blocks contribute to the GPU BUSY perf counter */
616 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
618 /* Enable RBBM error reporting bits */
619 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
621 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
623 * Mask out the activity signals from RB1-3 to avoid false
627 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
629 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
631 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
633 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
635 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
637 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
639 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
641 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
645 /* Enable fault detection */
646 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
649 /* Turn on performance counters */
650 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
652 /* Select CP0 to always count cycles */
653 gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
655 /* Select RBBM0 to countable 6 to get the busy status for devfreq */
656 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
658 /* Increase VFD cache access so LRZ and other data gets evicted less */
659 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
661 /* Disable L2 bypass in the UCHE */
662 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
663 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
664 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
665 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
667 /* Set the GMEM VA range (0 to gpu->gmem) */
668 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
669 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
670 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
671 0x00100000 + adreno_gpu->gmem - 1);
672 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
674 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
675 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
676 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
677 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
679 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
681 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
682 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
684 /* Enable USE_RETENTION_FLOPS */
685 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
687 /* Enable ME/PFP split notification */
688 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
691 a5xx_set_hwcg(gpu, true);
693 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
695 /* Set the highest bank bit */
696 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
697 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
699 /* Protect registers from the CP */
700 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
703 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
704 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
705 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
706 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
707 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
708 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
710 /* Content protect */
711 gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
712 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
714 gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
715 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
718 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
719 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
720 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
721 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
724 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
725 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
728 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
729 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
732 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
734 if (adreno_is_a530(adreno_gpu))
735 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
736 ADRENO_PROTECT_RW(0x10000, 0x8000));
738 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
740 * Disable the trusted memory range - we don't actually supported secure
741 * memory rendering at this point in time and we don't want to block off
742 * part of the virtual memory space.
744 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
745 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
746 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
748 ret = adreno_hw_init(gpu);
752 a5xx_preempt_hw_init(gpu);
754 a5xx_gpmu_ucode_init(gpu);
756 ret = a5xx_ucode_init(gpu);
760 /* Disable the interrupts through the initial bringup stage */
761 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
763 /* Clear ME_HALT to start the micro engine */
764 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
765 ret = a5xx_me_init(gpu);
769 ret = a5xx_power_init(gpu);
774 * Send a pipeline event stat to get misbehaving counters to start
777 if (adreno_is_a530(adreno_gpu)) {
778 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
779 OUT_RING(gpu->rb[0], 0x0F);
781 gpu->funcs->flush(gpu, gpu->rb[0]);
782 if (!a5xx_idle(gpu, gpu->rb[0]))
787 * Try to load a zap shader into the secure world. If successful
788 * we can use the CP to switch out of secure mode. If not then we
789 * have no resource but to try to switch ourselves out manually. If we
790 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
791 * be blocked and a permissions violation will soon follow.
793 ret = a5xx_zap_shader_init(gpu);
795 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
796 OUT_RING(gpu->rb[0], 0x00000000);
798 gpu->funcs->flush(gpu, gpu->rb[0]);
799 if (!a5xx_idle(gpu, gpu->rb[0]))
802 /* Print a warning so if we die, we know why */
803 dev_warn_once(gpu->dev->dev,
804 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
805 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
808 /* Last step - yield the ringbuffer */
809 a5xx_preempt_start(gpu);
814 static void a5xx_recover(struct msm_gpu *gpu)
818 adreno_dump_info(gpu);
820 for (i = 0; i < 8; i++) {
821 printk("CP_SCRATCH_REG%d: %u\n", i,
822 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
828 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
829 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
830 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
834 static void a5xx_destroy(struct msm_gpu *gpu)
836 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
837 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
839 DBG("%s", gpu->name);
841 a5xx_preempt_fini(gpu);
843 if (a5xx_gpu->pm4_bo) {
844 if (a5xx_gpu->pm4_iova)
845 msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
846 drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
849 if (a5xx_gpu->pfp_bo) {
850 if (a5xx_gpu->pfp_iova)
851 msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
852 drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
855 if (a5xx_gpu->gpmu_bo) {
856 if (a5xx_gpu->gpmu_iova)
857 msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
858 drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
861 adreno_gpu_cleanup(adreno_gpu);
865 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
867 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
871 * Nearly every abnormality ends up pausing the GPU and triggering a
872 * fault so we can safely just watch for this one interrupt to fire
874 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
875 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
878 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
880 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
881 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
883 if (ring != a5xx_gpu->cur_ring) {
884 WARN(1, "Tried to idle a non-current ringbuffer\n");
888 /* wait for CP to drain ringbuffer: */
889 if (!adreno_idle(gpu, ring))
892 if (spin_until(_a5xx_check_idle(gpu))) {
893 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
894 gpu->name, __builtin_return_address(0),
895 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
896 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
897 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
898 gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
905 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
907 struct msm_gpu *gpu = arg;
908 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
910 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
911 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
912 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
913 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
918 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
920 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
922 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
925 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
928 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
932 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
933 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
935 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
939 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
940 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
941 gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
943 if (status & A5XX_CP_INT_CP_DMA_ERROR)
944 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
946 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
947 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
949 dev_err_ratelimited(gpu->dev->dev,
950 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
951 val & (1 << 24) ? "WRITE" : "READ",
952 (val & 0xFFFFF) >> 2, val);
955 if (status & A5XX_CP_INT_CP_AHB_ERROR) {
956 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
957 const char *access[16] = { "reserved", "reserved",
958 "timestamp lo", "timestamp hi", "pfp read", "pfp write",
959 "", "", "me read", "me write", "", "", "crashdump read",
962 dev_err_ratelimited(gpu->dev->dev,
963 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
964 status & 0xFFFFF, access[(status >> 24) & 0xF],
965 (status & (1 << 31)), status);
969 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
971 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
972 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
974 dev_err_ratelimited(gpu->dev->dev,
975 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
976 val & (1 << 28) ? "WRITE" : "READ",
977 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
980 /* Clear the error */
981 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
983 /* Clear the interrupt */
984 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
985 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
988 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
989 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
991 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
992 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
993 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
995 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
996 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
997 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
999 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1000 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1001 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1003 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1004 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1006 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1007 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1010 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1012 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1014 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1016 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1020 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1022 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1025 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1027 struct drm_device *dev = gpu->dev;
1028 struct msm_drm_private *priv = dev->dev_private;
1029 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1031 dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1032 ring ? ring->id : -1, ring ? ring->seqno : 0,
1033 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1034 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1035 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1036 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1037 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1038 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1039 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1041 /* Turn off the hangcheck timer to keep it from bothering us */
1042 del_timer(&gpu->hangcheck_timer);
1044 queue_work(priv->wq, &gpu->recover_work);
1047 #define RBBM_ERROR_MASK \
1048 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1049 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1050 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1051 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1052 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1053 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1055 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1057 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1060 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1061 * before the source is cleared the interrupt will storm.
1063 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1064 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1066 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1067 if (status & RBBM_ERROR_MASK)
1068 a5xx_rbbm_err_irq(gpu, status);
1070 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1071 a5xx_cp_err_irq(gpu);
1073 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1074 a5xx_fault_detect_irq(gpu);
1076 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1077 a5xx_uche_err_irq(gpu);
1079 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1080 a5xx_gpmu_err_irq(gpu);
1082 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1083 a5xx_preempt_trigger(gpu);
1084 msm_gpu_retire(gpu);
1087 if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1088 a5xx_preempt_irq(gpu);
1093 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1094 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1095 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1096 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1097 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1098 REG_A5XX_CP_RB_RPTR_ADDR_HI),
1099 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1100 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1101 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1104 static const u32 a5xx_registers[] = {
1105 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1106 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1107 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1108 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1109 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1110 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1111 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1112 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1113 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1114 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1115 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1116 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1117 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1118 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1119 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1120 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1121 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1122 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1123 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1124 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1125 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1126 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1127 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1128 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1129 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1130 0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1131 0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1135 static void a5xx_dump(struct msm_gpu *gpu)
1137 dev_info(gpu->dev->dev, "status: %08x\n",
1138 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1142 static int a5xx_pm_resume(struct msm_gpu *gpu)
1146 /* Turn on the core power */
1147 ret = msm_gpu_pm_resume(gpu);
1151 /* Turn the RBCCU domain first to limit the chances of voltage droop */
1152 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1154 /* Wait 3 usecs before polling */
1157 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1158 (1 << 20), (1 << 20));
1160 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1162 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1166 /* Turn on the SP domain */
1167 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1168 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1169 (1 << 20), (1 << 20));
1171 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1177 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1179 /* Clear the VBIF pipe before shutting down */
1180 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1181 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1183 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1186 * Reset the VBIF before power collapse to avoid issue with FIFO
1189 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1190 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1192 return msm_gpu_pm_suspend(gpu);
1195 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1197 *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
1198 REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
1203 struct a5xx_crashdumper {
1205 struct drm_gem_object *bo;
1209 struct a5xx_gpu_state {
1210 struct msm_gpu_state base;
1214 #define gpu_poll_timeout(gpu, addr, val, cond, interval, timeout) \
1215 readl_poll_timeout((gpu)->mmio + ((addr) << 2), val, cond, \
1218 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1219 struct a5xx_crashdumper *dumper)
1221 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1222 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1223 &dumper->bo, &dumper->iova);
1225 if (IS_ERR(dumper->ptr))
1226 return PTR_ERR(dumper->ptr);
1231 static void a5xx_crashdumper_free(struct msm_gpu *gpu,
1232 struct a5xx_crashdumper *dumper)
1234 msm_gem_put_iova(dumper->bo, gpu->aspace);
1235 msm_gem_put_vaddr(dumper->bo);
1237 drm_gem_object_unreference(dumper->bo);
1240 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1241 struct a5xx_crashdumper *dumper)
1245 if (IS_ERR_OR_NULL(dumper->ptr))
1248 gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1249 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1251 gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1253 return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1254 val & 0x04, 100, 10000);
1258 * These are a list of the registers that need to be read through the HLSQ
1259 * aperture through the crashdumper. These are not nominally accessible from
1260 * the CPU on a secure platform.
1262 static const struct {
1266 } a5xx_hlsq_aperture_regs[] = {
1267 { 0x35, 0xe00, 0x32 }, /* HSLQ non-context */
1268 { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */
1269 { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */
1270 { 0x32, 0xe780, 0x62 }, /* HLSQ 3D context 0 */
1271 { 0x34, 0xef80, 0x62 }, /* HLSQ 3D context 1 */
1272 { 0x3f, 0x0ec0, 0x40 }, /* SP non-context */
1273 { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */
1274 { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */
1275 { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1276 { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1277 { 0x3a, 0x0f00, 0x1c }, /* TP non-context */
1278 { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */
1279 { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */
1280 { 0x39, 0xe700, 0x80 }, /* TP 3D context 0 */
1281 { 0x37, 0xef00, 0x80 }, /* TP 3D context 1 */
1284 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1285 struct a5xx_gpu_state *a5xx_state)
1287 struct a5xx_crashdumper dumper = { 0 };
1288 u32 offset, count = 0;
1292 if (a5xx_crashdumper_init(gpu, &dumper))
1295 /* The script will be written at offset 0 */
1298 /* Start writing the data at offset 256k */
1299 offset = dumper.iova + (256 * SZ_1K);
1301 /* Count how many additional registers to get from the HLSQ aperture */
1302 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1303 count += a5xx_hlsq_aperture_regs[i].count;
1305 a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1306 if (!a5xx_state->hlsqregs)
1309 /* Build the crashdump script */
1310 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1311 u32 type = a5xx_hlsq_aperture_regs[i].type;
1312 u32 c = a5xx_hlsq_aperture_regs[i].count;
1314 /* Write the register to select the desired bank */
1315 *ptr++ = ((u64) type << 8);
1316 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1320 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1323 offset += c * sizeof(u32);
1326 /* Write two zeros to close off the script */
1330 if (a5xx_crashdumper_run(gpu, &dumper)) {
1331 kfree(a5xx_state->hlsqregs);
1332 a5xx_crashdumper_free(gpu, &dumper);
1336 /* Copy the data from the crashdumper to the state */
1337 memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1338 count * sizeof(u32));
1340 a5xx_crashdumper_free(gpu, &dumper);
1343 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1345 struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1349 return ERR_PTR(-ENOMEM);
1351 /* Temporarily disable hardware clock gating before reading the hw */
1352 a5xx_set_hwcg(gpu, false);
1354 /* First get the generic state from the adreno core */
1355 adreno_gpu_state_get(gpu, &(a5xx_state->base));
1357 a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1359 /* Get the HLSQ regs with the help of the crashdumper */
1360 a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1362 a5xx_set_hwcg(gpu, true);
1364 return &a5xx_state->base;
1367 static void a5xx_gpu_state_destroy(struct kref *kref)
1369 struct msm_gpu_state *state = container_of(kref,
1370 struct msm_gpu_state, ref);
1371 struct a5xx_gpu_state *a5xx_state = container_of(state,
1372 struct a5xx_gpu_state, base);
1374 kfree(a5xx_state->hlsqregs);
1376 adreno_gpu_state_destroy(state);
1380 int a5xx_gpu_state_put(struct msm_gpu_state *state)
1382 if (IS_ERR_OR_NULL(state))
1385 return kref_put(&state->ref, a5xx_gpu_state_destroy);
1389 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1390 void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1391 struct drm_printer *p)
1395 struct a5xx_gpu_state *a5xx_state = container_of(state,
1396 struct a5xx_gpu_state, base);
1398 if (IS_ERR_OR_NULL(state))
1401 adreno_show(gpu, state, p);
1403 /* Dump the additional a5xx HLSQ registers */
1404 if (!a5xx_state->hlsqregs)
1407 drm_printf(p, "registers-hlsq:\n");
1409 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1410 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1411 u32 c = a5xx_hlsq_aperture_regs[i].count;
1413 for (j = 0; j < c; j++, pos++, o++) {
1415 * To keep the crashdump simple we pull the entire range
1416 * for each register type but not all of the registers
1417 * in the range are valid. Fortunately invalid registers
1418 * stick out like a sore thumb with a value of
1421 if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1424 drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n",
1425 o << 2, a5xx_state->hlsqregs[pos]);
1431 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1433 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1434 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1436 return a5xx_gpu->cur_ring;
1439 static int a5xx_gpu_busy(struct msm_gpu *gpu, uint64_t *value)
1441 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1442 REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1447 static const struct adreno_gpu_funcs funcs = {
1449 .get_param = adreno_get_param,
1450 .hw_init = a5xx_hw_init,
1451 .pm_suspend = a5xx_pm_suspend,
1452 .pm_resume = a5xx_pm_resume,
1453 .recover = a5xx_recover,
1454 .submit = a5xx_submit,
1455 .flush = a5xx_flush,
1456 .active_ring = a5xx_active_ring,
1458 .destroy = a5xx_destroy,
1459 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1462 #if defined(CONFIG_DEBUG_FS)
1463 .debugfs_init = a5xx_debugfs_init,
1465 .gpu_busy = a5xx_gpu_busy,
1466 .gpu_state_get = a5xx_gpu_state_get,
1467 .gpu_state_put = a5xx_gpu_state_put,
1469 .get_timestamp = a5xx_get_timestamp,
1472 static void check_speed_bin(struct device *dev)
1474 struct nvmem_cell *cell;
1478 * If the OPP table specifies a opp-supported-hw property then we have
1479 * to set something with dev_pm_opp_set_supported_hw() or the table
1480 * doesn't get populated so pick an arbitrary value that should
1481 * ensure the default frequencies are selected but not conflict with any
1486 cell = nvmem_cell_get(dev, "speed_bin");
1488 if (!IS_ERR(cell)) {
1489 void *buf = nvmem_cell_read(cell, NULL);
1492 u8 bin = *((u8 *) buf);
1498 nvmem_cell_put(cell);
1501 dev_pm_opp_set_supported_hw(dev, &val, 1);
1504 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1506 struct msm_drm_private *priv = dev->dev_private;
1507 struct platform_device *pdev = priv->gpu_pdev;
1508 struct a5xx_gpu *a5xx_gpu = NULL;
1509 struct adreno_gpu *adreno_gpu;
1510 struct msm_gpu *gpu;
1514 dev_err(dev->dev, "No A5XX device is defined\n");
1515 return ERR_PTR(-ENXIO);
1518 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1520 return ERR_PTR(-ENOMEM);
1522 adreno_gpu = &a5xx_gpu->base;
1523 gpu = &adreno_gpu->base;
1525 adreno_gpu->registers = a5xx_registers;
1526 adreno_gpu->reg_offsets = a5xx_register_offsets;
1528 a5xx_gpu->lm_leakage = 0x4E001A;
1530 check_speed_bin(&pdev->dev);
1532 /* Restricting nr_rings to 1 to temporarily disable preemption */
1533 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
1535 a5xx_destroy(&(a5xx_gpu->base.base));
1536 return ERR_PTR(ret);
1540 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1542 /* Set up the preemption specific bits and pieces for each ringbuffer */
1543 a5xx_preempt_init(gpu);