drivers/gpu/drm/msm/adreno/a5xx_gpu.c

   1 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
   2  *
   3  * This program is free software; you can redistribute it and/or modify
   4  * it under the terms of the GNU General Public License version 2 and
   5  * only version 2 as published by the Free Software Foundation.
   6  *
   7  * This program is distributed in the hope that it will be useful,
   8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  10  * GNU General Public License for more details.
  11  *
  12  */
  13
  14 #include <linux/kernel.h>
  15 #include <linux/types.h>
  16 #include <linux/cpumask.h>
  17 #include <linux/qcom_scm.h>
  18 #include <linux/dma-mapping.h>
  19 #include <linux/of_address.h>
  20 #include <linux/soc/qcom/mdt_loader.h>
  21 #include <linux/pm_opp.h>
  22 #include <linux/nvmem-consumer.h>
  23 #include <linux/iopoll.h>
  24 #include <linux/slab.h>
  25 #include "msm_gem.h"
  26 #include "msm_mmu.h"
  27 #include "a5xx_gpu.h"
  28
  29 extern bool hang_debug;
  30 static void a5xx_dump(struct msm_gpu *gpu);
  31
  32 #define GPU_PAS_ID 13
  33
  34 static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
  35 {
  36         struct device *dev = &gpu->pdev->dev;
  37         const struct firmware *fw;
  38         struct device_node *np, *mem_np;
  39         struct resource r;
  40         phys_addr_t mem_phys;
  41         ssize_t mem_size;
  42         void *mem_region = NULL;
  43         int ret;
  44
  45         if (!IS_ENABLED(CONFIG_ARCH_QCOM))
  46                 return -EINVAL;
  47
  48         np = of_get_child_by_name(dev->of_node, "zap-shader");
  49         if (!np)
  50                 return -ENODEV;
  51
  52         mem_np = of_parse_phandle(np, "memory-region", 0);
  53         of_node_put(np);
  54         if (!mem_np)
  55                 return -EINVAL;
  56
  57         ret = of_address_to_resource(mem_np, 0, &r);
  58         of_node_put(mem_np);
  59         if (ret)
  60                 return ret;
  61
  62         mem_phys = r.start;
  63         mem_size = resource_size(&r);
  64
  65         /* Request the MDT file for the firmware */
  66         fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
  67         if (IS_ERR(fw)) {
  68                 DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
  69                 return PTR_ERR(fw);
  70         }
  71
  72         /* Figure out how much memory we need */
  73         mem_size = qcom_mdt_get_size(fw);
  74         if (mem_size < 0) {
  75                 ret = mem_size;
  76                 goto out;
  77         }
  78
  79         /* Allocate memory for the firmware image */
  80         mem_region = memremap(mem_phys, mem_size,  MEMREMAP_WC);
  81         if (!mem_region) {
  82                 ret = -ENOMEM;
  83                 goto out;
  84         }
  85
  86         /*
  87          * Load the rest of the MDT
  88          *
  89          * Note that we could be dealing with two different paths, since
  90          * with upstream linux-firmware it would be in a qcom/ subdir..
  91          * adreno_request_fw() handles this, but qcom_mdt_load() does
  92          * not.  But since we've already gotten thru adreno_request_fw()
  93          * we know which of the two cases it is:
  94          */
  95         if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
  96                 ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
  97                                 mem_region, mem_phys, mem_size, NULL);
  98         } else {
  99                 char *newname;
 100
 101                 newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname);
 102
 103                 ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
 104                                 mem_region, mem_phys, mem_size, NULL);
 105                 kfree(newname);
 106         }
 107         if (ret)
 108                 goto out;
 109
 110         /* Send the image to the secure world */
 111         ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
 112         if (ret)
 113                 DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
 114
 115 out:
 116         if (mem_region)
 117                 memunmap(mem_region);
 118
 119         release_firmware(fw);
 120
 121         return ret;
 122 }
 123
 124 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 125 {
 126         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 127         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 128         uint32_t wptr;
 129         unsigned long flags;
 130
 131         spin_lock_irqsave(&ring->lock, flags);
 132
 133         /* Copy the shadow to the actual register */
 134         ring->cur = ring->next;
 135
 136         /* Make sure to wrap wptr if we need to */
 137         wptr = get_wptr(ring);
 138
 139         spin_unlock_irqrestore(&ring->lock, flags);
 140
 141         /* Make sure everything is posted before making a decision */
 142         mb();
 143
 144         /* Update HW if this is the current ring and we are not in preempt */
 145         if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
 146                 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
 147 }
 148
 149 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 150         struct msm_file_private *ctx)
 151 {
 152         struct msm_drm_private *priv = gpu->dev->dev_private;
 153         struct msm_ringbuffer *ring = submit->ring;
 154         struct msm_gem_object *obj;
 155         uint32_t *ptr, dwords;
 156         unsigned int i;
 157
 158         for (i = 0; i < submit->nr_cmds; i++) {
 159                 switch (submit->cmd[i].type) {
 160                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 161                         break;
 162                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 163                         if (priv->lastctx == ctx)
 164                                 break;
 165                 case MSM_SUBMIT_CMD_BUF:
 166                         /* copy commands into RB: */
 167                         obj = submit->bos[submit->cmd[i].idx].obj;
 168                         dwords = submit->cmd[i].size;
 169
 170                         ptr = msm_gem_get_vaddr(&obj->base);
 171
 172                         /* _get_vaddr() shouldn't fail at this point,
 173                          * since we've already mapped it once in
 174                          * submit_reloc()
 175                          */
 176                         if (WARN_ON(!ptr))
 177                                 return;
 178
 179                         for (i = 0; i < dwords; i++) {
 180                                 /* normally the OUT_PKTn() would wait
 181                                  * for space for the packet.  But since
 182                                  * we just OUT_RING() the whole thing,
 183                                  * need to call adreno_wait_ring()
 184                                  * ourself:
 185                                  */
 186                                 adreno_wait_ring(ring, 1);
 187                                 OUT_RING(ring, ptr[i]);
 188                         }
 189
 190                         msm_gem_put_vaddr(&obj->base);
 191
 192                         break;
 193                 }
 194         }
 195
 196         a5xx_flush(gpu, ring);
 197         a5xx_preempt_trigger(gpu);
 198
 199         /* we might not necessarily have a cmd from userspace to
 200          * trigger an event to know that submit has completed, so
 201          * do this manually:
 202          */
 203         a5xx_idle(gpu, ring);
 204         ring->memptrs->fence = submit->seqno;
 205         msm_gpu_retire(gpu);
 206 }
 207
 208 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 209         struct msm_file_private *ctx)
 210 {
 211         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 212         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 213         struct msm_drm_private *priv = gpu->dev->dev_private;
 214         struct msm_ringbuffer *ring = submit->ring;
 215         unsigned int i, ibs = 0;
 216
 217         if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
 218                 priv->lastctx = NULL;
 219                 a5xx_submit_in_rb(gpu, submit, ctx);
 220                 return;
 221         }
 222
 223         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 224         OUT_RING(ring, 0x02);
 225
 226         /* Turn off protected mode to write to special registers */
 227         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 228         OUT_RING(ring, 0);
 229
 230         /* Set the save preemption record for the ring/command */
 231         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 232         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 233         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 234
 235         /* Turn back on protected mode */
 236         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 237         OUT_RING(ring, 1);
 238
 239         /* Enable local preemption for finegrain preemption */
 240         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 241         OUT_RING(ring, 0x02);
 242
 243         /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
 244         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 245         OUT_RING(ring, 0x02);
 246
 247         /* Submit the commands */
 248         for (i = 0; i < submit->nr_cmds; i++) {
 249                 switch (submit->cmd[i].type) {
 250                 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 251                         break;
 252                 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 253                         if (priv->lastctx == ctx)
 254                                 break;
 255                 case MSM_SUBMIT_CMD_BUF:
 256                         OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
 257                         OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 258                         OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
 259                         OUT_RING(ring, submit->cmd[i].size);
 260                         ibs++;
 261                         break;
 262                 }
 263         }
 264
 265         /*
 266          * Write the render mode to NULL (0) to indicate to the CP that the IBs
 267          * are done rendering - otherwise a lucky preemption would start
 268          * replaying from the last checkpoint
 269          */
 270         OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
 271         OUT_RING(ring, 0);
 272         OUT_RING(ring, 0);
 273         OUT_RING(ring, 0);
 274         OUT_RING(ring, 0);
 275         OUT_RING(ring, 0);
 276
 277         /* Turn off IB level preemptions */
 278         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 279         OUT_RING(ring, 0x01);
 280
 281         /* Write the fence to the scratch register */
 282         OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
 283         OUT_RING(ring, submit->seqno);
 284
 285         /*
 286          * Execute a CACHE_FLUSH_TS event. This will ensure that the
 287          * timestamp is written to the memory and then triggers the interrupt
 288          */
 289         OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 290         OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
 291         OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
 292         OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
 293         OUT_RING(ring, submit->seqno);
 294
 295         /* Yield the floor on command completion */
 296         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 297         /*
 298          * If dword[2:1] are non zero, they specify an address for the CP to
 299          * write the value of dword[3] to on preemption complete. Write 0 to
 300          * skip the write
 301          */
 302         OUT_RING(ring, 0x00);
 303         OUT_RING(ring, 0x00);
 304         /* Data value - not used if the address above is 0 */
 305         OUT_RING(ring, 0x01);
 306         /* Set bit 0 to trigger an interrupt on preempt complete */
 307         OUT_RING(ring, 0x01);
 308
 309         a5xx_flush(gpu, ring);
 310
 311         /* Check to see if we need to start preemption */
 312         a5xx_preempt_trigger(gpu);
 313 }
 314
 315 static const struct {
 316         u32 offset;
 317         u32 value;
 318 } a5xx_hwcg[] = {
 319         {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 320         {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
 321         {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
 322         {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
 323         {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 324         {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
 325         {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
 326         {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
 327         {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 328         {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
 329         {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
 330         {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
 331         {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 332         {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
 333         {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
 334         {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
 335         {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
 336         {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
 337         {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
 338         {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
 339         {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 340         {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
 341         {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
 342         {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
 343         {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
 344         {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
 345         {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
 346         {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
 347         {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 348         {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
 349         {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
 350         {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
 351         {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 352         {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
 353         {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
 354         {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
 355         {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
 356         {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
 357         {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
 358         {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
 359         {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 360         {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
 361         {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
 362         {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
 363         {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 364         {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
 365         {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
 366         {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
 367         {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
 368         {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
 369         {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
 370         {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
 371         {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 372         {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 373         {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 374         {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 375         {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
 376         {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 377         {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 378         {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
 379         {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
 380         {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
 381         {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
 382         {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
 383         {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
 384         {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
 385         {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
 386         {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
 387         {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
 388         {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
 389         {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
 390         {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
 391         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
 392         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
 393         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
 394         {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
 395         {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
 396         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
 397         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
 398         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
 399         {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
 400         {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
 401         {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 402         {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
 403         {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 404         {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 405         {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 406         {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 407         {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 408         {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 409         {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 410         {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
 411 };
 412
 413 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
 414 {
 415         unsigned int i;
 416
 417         for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
 418                 gpu_write(gpu, a5xx_hwcg[i].offset,
 419                         state ? a5xx_hwcg[i].value : 0);
 420
 421         gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
 422         gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
 423 }
 424
 425 static int a5xx_me_init(struct msm_gpu *gpu)
 426 {
 427         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 428         struct msm_ringbuffer *ring = gpu->rb[0];
 429
 430         OUT_PKT7(ring, CP_ME_INIT, 8);
 431
 432         OUT_RING(ring, 0x0000002F);
 433
 434         /* Enable multiple hardware contexts */
 435         OUT_RING(ring, 0x00000003);
 436
 437         /* Enable error detection */
 438         OUT_RING(ring, 0x20000000);
 439
 440         /* Don't enable header dump */
 441         OUT_RING(ring, 0x00000000);
 442         OUT_RING(ring, 0x00000000);
 443
 444         /* Specify workarounds for various microcode issues */
 445         if (adreno_is_a530(adreno_gpu)) {
 446                 /* Workaround for token end syncs
 447                  * Force a WFI after every direct-render 3D mode draw and every
 448                  * 2D mode 3 draw
 449                  */
 450                 OUT_RING(ring, 0x0000000B);
 451         } else {
 452                 /* No workarounds enabled */
 453                 OUT_RING(ring, 0x00000000);
 454         }
 455
 456         OUT_RING(ring, 0x00000000);
 457         OUT_RING(ring, 0x00000000);
 458
 459         gpu->funcs->flush(gpu, ring);
 460         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 461 }
 462
 463 static int a5xx_preempt_start(struct msm_gpu *gpu)
 464 {
 465         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 466         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 467         struct msm_ringbuffer *ring = gpu->rb[0];
 468
 469         if (gpu->nr_rings == 1)
 470                 return 0;
 471
 472         /* Turn off protected mode to write to special registers */
 473         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 474         OUT_RING(ring, 0);
 475
 476         /* Set the save preemption record for the ring/command */
 477         OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 478         OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 479         OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 480
 481         /* Turn back on protected mode */
 482         OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 483         OUT_RING(ring, 1);
 484
 485         OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 486         OUT_RING(ring, 0x00);
 487
 488         OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
 489         OUT_RING(ring, 0x01);
 490
 491         OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 492         OUT_RING(ring, 0x01);
 493
 494         /* Yield the floor on command completion */
 495         OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 496         OUT_RING(ring, 0x00);
 497         OUT_RING(ring, 0x00);
 498         OUT_RING(ring, 0x01);
 499         OUT_RING(ring, 0x01);
 500
 501         gpu->funcs->flush(gpu, ring);
 502
 503         return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 504 }
 505
 506 static int a5xx_ucode_init(struct msm_gpu *gpu)
 507 {
 508         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 509         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 510         int ret;
 511
 512         if (!a5xx_gpu->pm4_bo) {
 513                 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
 514                         adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
 515
 516                 if (IS_ERR(a5xx_gpu->pm4_bo)) {
 517                         ret = PTR_ERR(a5xx_gpu->pm4_bo);
 518                         a5xx_gpu->pm4_bo = NULL;
 519                         dev_err(gpu->dev->dev, "could not allocate PM4: %d\n",
 520                                 ret);
 521                         return ret;
 522                 }
 523         }
 524
 525         if (!a5xx_gpu->pfp_bo) {
 526                 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
 527                         adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
 528
 529                 if (IS_ERR(a5xx_gpu->pfp_bo)) {
 530                         ret = PTR_ERR(a5xx_gpu->pfp_bo);
 531                         a5xx_gpu->pfp_bo = NULL;
 532                         dev_err(gpu->dev->dev, "could not allocate PFP: %d\n",
 533                                 ret);
 534                         return ret;
 535                 }
 536         }
 537
 538         gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
 539                 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
 540
 541         gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
 542                 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
 543
 544         return 0;
 545 }
 546
 547 #define SCM_GPU_ZAP_SHADER_RESUME 0
 548
 549 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
 550 {
 551         int ret;
 552
 553         ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
 554         if (ret)
 555                 DRM_ERROR("%s: zap-shader resume failed: %d\n",
 556                         gpu->name, ret);
 557
 558         return ret;
 559 }
 560
 561 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 562 {
 563         static bool loaded;
 564         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 565         struct platform_device *pdev = gpu->pdev;
 566         int ret;
 567
 568         /*
 569          * If the zap shader is already loaded into memory we just need to kick
 570          * the remote processor to reinitialize it
 571          */
 572         if (loaded)
 573                 return a5xx_zap_shader_resume(gpu);
 574
 575         /* We need SCM to be able to load the firmware */
 576         if (!qcom_scm_is_available()) {
 577                 DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
 578                 return -EPROBE_DEFER;
 579         }
 580
 581         /* Each GPU has a target specific zap shader firmware name to use */
 582         if (!adreno_gpu->info->zapfw) {
 583                 DRM_DEV_ERROR(&pdev->dev,
 584                         "Zap shader firmware file not specified for this target\n");
 585                 return -ENODEV;
 586         }
 587
 588         ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
 589
 590         loaded = !ret;
 591
 592         return ret;
 593 }
 594
 595 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
 596           A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
 597           A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
 598           A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
 599           A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
 600           A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
 601           A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
 602           A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
 603           A5XX_RBBM_INT_0_MASK_CP_SW | \
 604           A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
 605           A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
 606           A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
 607
 608 static int a5xx_hw_init(struct msm_gpu *gpu)
 609 {
 610         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 611         int ret;
 612
 613         gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 614
 615         /* Make all blocks contribute to the GPU BUSY perf counter */
 616         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
 617
 618         /* Enable RBBM error reporting bits */
 619         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
 620
 621         if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
 622                 /*
 623                  * Mask out the activity signals from RB1-3 to avoid false
 624                  * positives
 625                  */
 626
 627                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
 628                         0xF0000000);
 629                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
 630                         0xFFFFFFFF);
 631                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
 632                         0xFFFFFFFF);
 633                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
 634                         0xFFFFFFFF);
 635                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
 636                         0xFFFFFFFF);
 637                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
 638                         0xFFFFFFFF);
 639                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
 640                         0xFFFFFFFF);
 641                 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
 642                         0xFFFFFFFF);
 643         }
 644
 645         /* Enable fault detection */
 646         gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
 647                 (1 << 30) | 0xFFFF);
 648
 649         /* Turn on performance counters */
 650         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
 651
 652         /* Select CP0 to always count cycles */
 653         gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
 654
 655         /* Select RBBM0 to countable 6 to get the busy status for devfreq */
 656         gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
 657
 658         /* Increase VFD cache access so LRZ and other data gets evicted less */
 659         gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
 660
 661         /* Disable L2 bypass in the UCHE */
 662         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
 663         gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
 664         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
 665         gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
 666
 667         /* Set the GMEM VA range (0 to gpu->gmem) */
 668         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
 669         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
 670         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
 671                 0x00100000 + adreno_gpu->gmem - 1);
 672         gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
 673
 674         gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
 675         gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
 676         gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
 677         gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
 678
 679         gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
 680
 681         if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
 682                 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
 683
 684         /* Enable USE_RETENTION_FLOPS */
 685         gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
 686
 687         /* Enable ME/PFP split notification */
 688         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
 689
 690         /* Enable HWCG */
 691         a5xx_set_hwcg(gpu, true);
 692
 693         gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
 694
 695         /* Set the highest bank bit */
 696         gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
 697         gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
 698
 699         /* Protect registers from the CP */
 700         gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
 701
 702         /* RBBM */
 703         gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
 704         gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
 705         gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
 706         gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
 707         gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
 708         gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
 709
 710         /* Content protect */
 711         gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
 712                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 713                         16));
 714         gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
 715                 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
 716
 717         /* CP */
 718         gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
 719         gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
 720         gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
 721         gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
 722
 723         /* RB */
 724         gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
 725         gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
 726
 727         /* VPC */
 728         gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
 729         gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
 730
 731         /* UCHE */
 732         gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
 733
 734         if (adreno_is_a530(adreno_gpu))
 735                 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
 736                         ADRENO_PROTECT_RW(0x10000, 0x8000));
 737
 738         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
 739         /*
 740          * Disable the trusted memory range - we don't actually supported secure
 741          * memory rendering at this point in time and we don't want to block off
 742          * part of the virtual memory space.
 743          */
 744         gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 745                 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
 746         gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
 747
 748         ret = adreno_hw_init(gpu);
 749         if (ret)
 750                 return ret;
 751
 752         a5xx_preempt_hw_init(gpu);
 753
 754         a5xx_gpmu_ucode_init(gpu);
 755
 756         ret = a5xx_ucode_init(gpu);
 757         if (ret)
 758                 return ret;
 759
 760         /* Disable the interrupts through the initial bringup stage */
 761         gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
 762
 763         /* Clear ME_HALT to start the micro engine */
 764         gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
 765         ret = a5xx_me_init(gpu);
 766         if (ret)
 767                 return ret;
 768
 769         ret = a5xx_power_init(gpu);
 770         if (ret)
 771                 return ret;
 772
 773         /*
 774          * Send a pipeline event stat to get misbehaving counters to start
 775          * ticking correctly
 776          */
 777         if (adreno_is_a530(adreno_gpu)) {
 778                 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
 779                 OUT_RING(gpu->rb[0], 0x0F);
 780
 781                 gpu->funcs->flush(gpu, gpu->rb[0]);
 782                 if (!a5xx_idle(gpu, gpu->rb[0]))
 783                         return -EINVAL;
 784         }
 785
 786         /*
 787          * Try to load a zap shader into the secure world. If successful
 788          * we can use the CP to switch out of secure mode. If not then we
 789          * have no resource but to try to switch ourselves out manually. If we
 790          * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
 791          * be blocked and a permissions violation will soon follow.
 792          */
 793         ret = a5xx_zap_shader_init(gpu);
 794         if (!ret) {
 795                 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
 796                 OUT_RING(gpu->rb[0], 0x00000000);
 797
 798                 gpu->funcs->flush(gpu, gpu->rb[0]);
 799                 if (!a5xx_idle(gpu, gpu->rb[0]))
 800                         return -EINVAL;
 801         } else {
 802                 /* Print a warning so if we die, we know why */
 803                 dev_warn_once(gpu->dev->dev,
 804                         "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
 805                 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
 806         }
 807
 808         /* Last step - yield the ringbuffer */
 809         a5xx_preempt_start(gpu);
 810
 811         return 0;
 812 }
 813
 814 static void a5xx_recover(struct msm_gpu *gpu)
 815 {
 816         int i;
 817
 818         adreno_dump_info(gpu);
 819
 820         for (i = 0; i < 8; i++) {
 821                 printk("CP_SCRATCH_REG%d: %u\n", i,
 822                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
 823         }
 824
 825         if (hang_debug)
 826                 a5xx_dump(gpu);
 827
 828         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
 829         gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
 830         gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
 831         adreno_recover(gpu);
 832 }
 833
 834 static void a5xx_destroy(struct msm_gpu *gpu)
 835 {
 836         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 837         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 838
 839         DBG("%s", gpu->name);
 840
 841         a5xx_preempt_fini(gpu);
 842
 843         if (a5xx_gpu->pm4_bo) {
 844                 if (a5xx_gpu->pm4_iova)
 845                         msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
 846                 drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
 847         }
 848
 849         if (a5xx_gpu->pfp_bo) {
 850                 if (a5xx_gpu->pfp_iova)
 851                         msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
 852                 drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
 853         }
 854
 855         if (a5xx_gpu->gpmu_bo) {
 856                 if (a5xx_gpu->gpmu_iova)
 857                         msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
 858                 drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
 859         }
 860
 861         adreno_gpu_cleanup(adreno_gpu);
 862         kfree(a5xx_gpu);
 863 }
 864
 865 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
 866 {
 867         if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
 868                 return false;
 869
 870         /*
 871          * Nearly every abnormality ends up pausing the GPU and triggering a
 872          * fault so we can safely just watch for this one interrupt to fire
 873          */
 874         return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
 875                 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
 876 }
 877
 878 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 879 {
 880         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 881         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 882
 883         if (ring != a5xx_gpu->cur_ring) {
 884                 WARN(1, "Tried to idle a non-current ringbuffer\n");
 885                 return false;
 886         }
 887
 888         /* wait for CP to drain ringbuffer: */
 889         if (!adreno_idle(gpu, ring))
 890                 return false;
 891
 892         if (spin_until(_a5xx_check_idle(gpu))) {
 893                 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
 894                         gpu->name, __builtin_return_address(0),
 895                         gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 896                         gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
 897                         gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
 898                         gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
 899                 return false;
 900         }
 901
 902         return true;
 903 }
 904
 905 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
 906 {
 907         struct msm_gpu *gpu = arg;
 908         pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
 909                         iova, flags,
 910                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
 911                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
 912                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
 913                         gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
 914
 915         return -EFAULT;
 916 }
 917
 918 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
 919 {
 920         u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
 921
 922         if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
 923                 u32 val;
 924
 925                 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
 926
 927                 /*
 928                  * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
 929                  * read it twice
 930                  */
 931
 932                 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 933                 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 934
 935                 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
 936                         val);
 937         }
 938
 939         if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
 940                 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
 941                         gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
 942
 943         if (status & A5XX_CP_INT_CP_DMA_ERROR)
 944                 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
 945
 946         if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
 947                 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
 948
 949                 dev_err_ratelimited(gpu->dev->dev,
 950                         "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
 951                         val & (1 << 24) ? "WRITE" : "READ",
 952                         (val & 0xFFFFF) >> 2, val);
 953         }
 954
 955         if (status & A5XX_CP_INT_CP_AHB_ERROR) {
 956                 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
 957                 const char *access[16] = { "reserved", "reserved",
 958                         "timestamp lo", "timestamp hi", "pfp read", "pfp write",
 959                         "", "", "me read", "me write", "", "", "crashdump read",
 960                         "crashdump write" };
 961
 962                 dev_err_ratelimited(gpu->dev->dev,
 963                         "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
 964                         status & 0xFFFFF, access[(status >> 24) & 0xF],
 965                         (status & (1 << 31)), status);
 966         }
 967 }
 968
 969 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
 970 {
 971         if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
 972                 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
 973
 974                 dev_err_ratelimited(gpu->dev->dev,
 975                         "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
 976                         val & (1 << 28) ? "WRITE" : "READ",
 977                         (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
 978                         (val >> 24) & 0xF);
 979
 980                 /* Clear the error */
 981                 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
 982
 983                 /* Clear the interrupt */
 984                 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
 985                         A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
 986         }
 987
 988         if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
 989                 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
 990
 991         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
 992                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
 993                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
 994
 995         if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
 996                 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
 997                         gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
 998
 999         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1000                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1001                         gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1002
1003         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1004                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1005
1006         if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1007                 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1008 }
1009
1010 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1011 {
1012         uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1013
1014         addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1015
1016         dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1017                 addr);
1018 }
1019
1020 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1021 {
1022         dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1023 }
1024
1025 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1026 {
1027         struct drm_device *dev = gpu->dev;
1028         struct msm_drm_private *priv = dev->dev_private;
1029         struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1030
1031         dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1032                 ring ? ring->id : -1, ring ? ring->seqno : 0,
1033                 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1034                 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1035                 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1036                 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1037                 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1038                 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1039                 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1040
1041         /* Turn off the hangcheck timer to keep it from bothering us */
1042         del_timer(&gpu->hangcheck_timer);
1043
1044         queue_work(priv->wq, &gpu->recover_work);
1045 }
1046
1047 #define RBBM_ERROR_MASK \
1048         (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1049         A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1050         A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1051         A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1052         A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1053         A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1054
1055 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1056 {
1057         u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1058
1059         /*
1060          * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1061          * before the source is cleared the interrupt will storm.
1062          */
1063         gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1064                 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1065
1066         /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1067         if (status & RBBM_ERROR_MASK)
1068                 a5xx_rbbm_err_irq(gpu, status);
1069
1070         if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1071                 a5xx_cp_err_irq(gpu);
1072
1073         if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1074                 a5xx_fault_detect_irq(gpu);
1075
1076         if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1077                 a5xx_uche_err_irq(gpu);
1078
1079         if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1080                 a5xx_gpmu_err_irq(gpu);
1081
1082         if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1083                 a5xx_preempt_trigger(gpu);
1084                 msm_gpu_retire(gpu);
1085         }
1086
1087         if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1088                 a5xx_preempt_irq(gpu);
1089
1090         return IRQ_HANDLED;
1091 }
1092
1093 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1094         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1095         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1096         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1097         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1098                 REG_A5XX_CP_RB_RPTR_ADDR_HI),
1099         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1100         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1101         REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1102 };
1103
1104 static const u32 a5xx_registers[] = {
1105         0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1106         0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1107         0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1108         0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1109         0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1110         0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1111         0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1112         0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1113         0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1114         0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1115         0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1116         0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1117         0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1118         0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1119         0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1120         0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1121         0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1122         0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1123         0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1124         0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1125         0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1126         0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1127         0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1128         0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1129         0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1130         0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1131         0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1132         0xAC60, 0xAC60, ~0,
1133 };
1134
1135 static void a5xx_dump(struct msm_gpu *gpu)
1136 {
1137         dev_info(gpu->dev->dev, "status:   %08x\n",
1138                 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1139         adreno_dump(gpu);
1140 }
1141
1142 static int a5xx_pm_resume(struct msm_gpu *gpu)
1143 {
1144         int ret;
1145
1146         /* Turn on the core power */
1147         ret = msm_gpu_pm_resume(gpu);
1148         if (ret)
1149                 return ret;
1150
1151         /* Turn the RBCCU domain first to limit the chances of voltage droop */
1152         gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1153
1154         /* Wait 3 usecs before polling */
1155         udelay(3);
1156
1157         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1158                 (1 << 20), (1 << 20));
1159         if (ret) {
1160                 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1161                         gpu->name,
1162                         gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1163                 return ret;
1164         }
1165
1166         /* Turn on the SP domain */
1167         gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1168         ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1169                 (1 << 20), (1 << 20));
1170         if (ret)
1171                 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1172                         gpu->name);
1173
1174         return ret;
1175 }
1176
1177 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1178 {
1179         /* Clear the VBIF pipe before shutting down */
1180         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1181         spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1182
1183         gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1184
1185         /*
1186          * Reset the VBIF before power collapse to avoid issue with FIFO
1187          * entries
1188          */
1189         gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1190         gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1191
1192         return msm_gpu_pm_suspend(gpu);
1193 }
1194
1195 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1196 {
1197         *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
1198                 REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
1199
1200         return 0;
1201 }
1202
1203 struct a5xx_crashdumper {
1204         void *ptr;
1205         struct drm_gem_object *bo;
1206         u64 iova;
1207 };
1208
1209 struct a5xx_gpu_state {
1210         struct msm_gpu_state base;
1211         u32 *hlsqregs;
1212 };
1213
1214 #define gpu_poll_timeout(gpu, addr, val, cond, interval, timeout) \
1215         readl_poll_timeout((gpu)->mmio + ((addr) << 2), val, cond, \
1216                 interval, timeout)
1217
1218 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1219                 struct a5xx_crashdumper *dumper)
1220 {
1221         dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1222                 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1223                 &dumper->bo, &dumper->iova);
1224
1225         if (IS_ERR(dumper->ptr))
1226                 return PTR_ERR(dumper->ptr);
1227
1228         return 0;
1229 }
1230
1231 static void a5xx_crashdumper_free(struct msm_gpu *gpu,
1232                 struct a5xx_crashdumper *dumper)
1233 {
1234         msm_gem_put_iova(dumper->bo, gpu->aspace);
1235         msm_gem_put_vaddr(dumper->bo);
1236
1237         drm_gem_object_unreference(dumper->bo);
1238 }
1239
1240 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1241                 struct a5xx_crashdumper *dumper)
1242 {
1243         u32 val;
1244
1245         if (IS_ERR_OR_NULL(dumper->ptr))
1246                 return -EINVAL;
1247
1248         gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1249                 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1250
1251         gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1252
1253         return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1254                 val & 0x04, 100, 10000);
1255 }
1256
1257 /*
1258  * These are a list of the registers that need to be read through the HLSQ
1259  * aperture through the crashdumper.  These are not nominally accessible from
1260  * the CPU on a secure platform.
1261  */
1262 static const struct {
1263         u32 type;
1264         u32 regoffset;
1265         u32 count;
1266 } a5xx_hlsq_aperture_regs[] = {
1267         { 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1268         { 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1269         { 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1270         { 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1271         { 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1272         { 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1273         { 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1274         { 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1275         { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1276         { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1277         { 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1278         { 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1279         { 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1280         { 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1281         { 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1282 };
1283
1284 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1285                 struct a5xx_gpu_state *a5xx_state)
1286 {
1287         struct a5xx_crashdumper dumper = { 0 };
1288         u32 offset, count = 0;
1289         u64 *ptr;
1290         int i;
1291
1292         if (a5xx_crashdumper_init(gpu, &dumper))
1293                 return;
1294
1295         /* The script will be written at offset 0 */
1296         ptr = dumper.ptr;
1297
1298         /* Start writing the data at offset 256k */
1299         offset = dumper.iova + (256 * SZ_1K);
1300
1301         /* Count how many additional registers to get from the HLSQ aperture */
1302         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1303                 count += a5xx_hlsq_aperture_regs[i].count;
1304
1305         a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1306         if (!a5xx_state->hlsqregs)
1307                 return;
1308
1309         /* Build the crashdump script */
1310         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1311                 u32 type = a5xx_hlsq_aperture_regs[i].type;
1312                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1313
1314                 /* Write the register to select the desired bank */
1315                 *ptr++ = ((u64) type << 8);
1316                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1317                         (1 << 21) | 1;
1318
1319                 *ptr++ = offset;
1320                 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1321                         | c;
1322
1323                 offset += c * sizeof(u32);
1324         }
1325
1326         /* Write two zeros to close off the script */
1327         *ptr++ = 0;
1328         *ptr++ = 0;
1329
1330         if (a5xx_crashdumper_run(gpu, &dumper)) {
1331                 kfree(a5xx_state->hlsqregs);
1332                 a5xx_crashdumper_free(gpu, &dumper);
1333                 return;
1334         }
1335
1336         /* Copy the data from the crashdumper to the state */
1337         memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1338                 count * sizeof(u32));
1339
1340         a5xx_crashdumper_free(gpu, &dumper);
1341 }
1342
1343 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1344 {
1345         struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1346                         GFP_KERNEL);
1347
1348         if (!a5xx_state)
1349                 return ERR_PTR(-ENOMEM);
1350
1351         /* Temporarily disable hardware clock gating before reading the hw */
1352         a5xx_set_hwcg(gpu, false);
1353
1354         /* First get the generic state from the adreno core */
1355         adreno_gpu_state_get(gpu, &(a5xx_state->base));
1356
1357         a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1358
1359         /* Get the HLSQ regs with the help of the crashdumper */
1360         a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1361
1362         a5xx_set_hwcg(gpu, true);
1363
1364         return &a5xx_state->base;
1365 }
1366
1367 static void a5xx_gpu_state_destroy(struct kref *kref)
1368 {
1369         struct msm_gpu_state *state = container_of(kref,
1370                 struct msm_gpu_state, ref);
1371         struct a5xx_gpu_state *a5xx_state = container_of(state,
1372                 struct a5xx_gpu_state, base);
1373
1374         kfree(a5xx_state->hlsqregs);
1375
1376         adreno_gpu_state_destroy(state);
1377         kfree(a5xx_state);
1378 }
1379
1380 int a5xx_gpu_state_put(struct msm_gpu_state *state)
1381 {
1382         if (IS_ERR_OR_NULL(state))
1383                 return 1;
1384
1385         return kref_put(&state->ref, a5xx_gpu_state_destroy);
1386 }
1387
1388
1389 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1390 void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1391                 struct drm_printer *p)
1392 {
1393         int i, j;
1394         u32 pos = 0;
1395         struct a5xx_gpu_state *a5xx_state = container_of(state,
1396                 struct a5xx_gpu_state, base);
1397
1398         if (IS_ERR_OR_NULL(state))
1399                 return;
1400
1401         adreno_show(gpu, state, p);
1402
1403         /* Dump the additional a5xx HLSQ registers */
1404         if (!a5xx_state->hlsqregs)
1405                 return;
1406
1407         drm_printf(p, "registers-hlsq:\n");
1408
1409         for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1410                 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1411                 u32 c = a5xx_hlsq_aperture_regs[i].count;
1412
1413                 for (j = 0; j < c; j++, pos++, o++) {
1414                         /*
1415                          * To keep the crashdump simple we pull the entire range
1416                          * for each register type but not all of the registers
1417                          * in the range are valid. Fortunately invalid registers
1418                          * stick out like a sore thumb with a value of
1419                          * 0xdeadbeef
1420                          */
1421                         if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1422                                 continue;
1423
1424                         drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1425                                 o << 2, a5xx_state->hlsqregs[pos]);
1426                 }
1427         }
1428 }
1429 #endif
1430
1431 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1432 {
1433         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1434         struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1435
1436         return a5xx_gpu->cur_ring;
1437 }
1438
1439 static int a5xx_gpu_busy(struct msm_gpu *gpu, uint64_t *value)
1440 {
1441         *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1442                 REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1443
1444         return 0;
1445 }
1446
1447 static const struct adreno_gpu_funcs funcs = {
1448         .base = {
1449                 .get_param = adreno_get_param,
1450                 .hw_init = a5xx_hw_init,
1451                 .pm_suspend = a5xx_pm_suspend,
1452                 .pm_resume = a5xx_pm_resume,
1453                 .recover = a5xx_recover,
1454                 .submit = a5xx_submit,
1455                 .flush = a5xx_flush,
1456                 .active_ring = a5xx_active_ring,
1457                 .irq = a5xx_irq,
1458                 .destroy = a5xx_destroy,
1459 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1460                 .show = a5xx_show,
1461 #endif
1462 #if defined(CONFIG_DEBUG_FS)
1463                 .debugfs_init = a5xx_debugfs_init,
1464 #endif
1465                 .gpu_busy = a5xx_gpu_busy,
1466                 .gpu_state_get = a5xx_gpu_state_get,
1467                 .gpu_state_put = a5xx_gpu_state_put,
1468         },
1469         .get_timestamp = a5xx_get_timestamp,
1470 };
1471
1472 static void check_speed_bin(struct device *dev)
1473 {
1474         struct nvmem_cell *cell;
1475         u32 val;
1476
1477         /*
1478          * If the OPP table specifies a opp-supported-hw property then we have
1479          * to set something with dev_pm_opp_set_supported_hw() or the table
1480          * doesn't get populated so pick an arbitrary value that should
1481          * ensure the default frequencies are selected but not conflict with any
1482          * actual bins
1483          */
1484         val = 0x80;
1485
1486         cell = nvmem_cell_get(dev, "speed_bin");
1487
1488         if (!IS_ERR(cell)) {
1489                 void *buf = nvmem_cell_read(cell, NULL);
1490
1491                 if (!IS_ERR(buf)) {
1492                         u8 bin = *((u8 *) buf);
1493
1494                         val = (1 << bin);
1495                         kfree(buf);
1496                 }
1497
1498                 nvmem_cell_put(cell);
1499         }
1500
1501         dev_pm_opp_set_supported_hw(dev, &val, 1);
1502 }
1503
1504 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1505 {
1506         struct msm_drm_private *priv = dev->dev_private;
1507         struct platform_device *pdev = priv->gpu_pdev;
1508         struct a5xx_gpu *a5xx_gpu = NULL;
1509         struct adreno_gpu *adreno_gpu;
1510         struct msm_gpu *gpu;
1511         int ret;
1512
1513         if (!pdev) {
1514                 dev_err(dev->dev, "No A5XX device is defined\n");
1515                 return ERR_PTR(-ENXIO);
1516         }
1517
1518         a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1519         if (!a5xx_gpu)
1520                 return ERR_PTR(-ENOMEM);
1521
1522         adreno_gpu = &a5xx_gpu->base;
1523         gpu = &adreno_gpu->base;
1524
1525         adreno_gpu->registers = a5xx_registers;
1526         adreno_gpu->reg_offsets = a5xx_register_offsets;
1527
1528         a5xx_gpu->lm_leakage = 0x4E001A;
1529
1530         check_speed_bin(&pdev->dev);
1531
1532         /* Restricting nr_rings to 1 to temporarily disable preemption */
1533         ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
1534         if (ret) {
1535                 a5xx_destroy(&(a5xx_gpu->base.base));
1536                 return ERR_PTR(ret);
1537         }
1538
1539         if (gpu->aspace)
1540                 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1541
1542         /* Set up the preemption specific bits and pieces for each ringbuffer */
1543         a5xx_preempt_init(gpu);
1544
1545         return gpu;
1546 }