GNU Linux-libre 4.14.295-gnu1
[releases.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include <drm/drmP.h>
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_MEC_HPD_SIZE 2048
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81
82 /* BPM Register Address*/
83 enum {
84         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89         BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength        14
93
94 /*(DEBLOBBED)*/
95
96 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
97 {
98         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
99         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
100         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
101         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
102         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
103         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
104         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
105         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
106         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
107         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
108         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
109         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
110         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
111         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
112         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
113         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
114 };
115
116 static const u32 golden_settings_tonga_a11[] =
117 {
118         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
119         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
120         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
121         mmGB_GPU_ID, 0x0000000f, 0x00000000,
122         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
123         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
124         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
125         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
126         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
127         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
128         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
129         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
130         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
131         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
132         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
133         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
134 };
135
136 static const u32 tonga_golden_common_all[] =
137 {
138         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
139         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
140         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
141         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
142         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
143         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
144         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
145         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
146 };
147
148 static const u32 tonga_mgcg_cgcg_init[] =
149 {
150         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
151         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
152         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
153         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
154         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
155         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
156         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
157         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
158         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
159         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
160         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
161         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
162         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
163         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
164         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
165         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
166         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
167         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
168         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
169         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
170         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
171         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
172         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
173         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
174         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
175         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
176         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
177         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
178         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
179         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
180         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
181         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
182         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
183         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
184         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
185         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
186         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
187         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
188         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
189         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
190         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
191         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
192         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
193         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
194         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
195         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
196         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
197         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
198         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
199         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
200         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
201         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
202         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
203         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
204         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
205         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
206         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
207         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
208         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
209         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
210         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
211         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
212         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
213         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
214         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
215         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
216         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
217         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
218         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
219         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
220         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
221         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
222         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
223         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
224         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
225 };
226
227 static const u32 golden_settings_polaris11_a11[] =
228 {
229         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
230         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
231         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
232         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
233         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
234         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
235         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
236         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
237         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
238         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
239         mmSQ_CONFIG, 0x07f80000, 0x01180000,
240         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
241         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
242         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
243         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
244         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
245         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
246 };
247
248 static const u32 polaris11_golden_common_all[] =
249 {
250         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
251         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
252         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
253         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
254         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
255         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
256 };
257
258 static const u32 golden_settings_polaris10_a11[] =
259 {
260         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
261         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
262         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
263         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
264         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
265         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
266         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
267         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
268         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
269         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
270         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
271         mmSQ_CONFIG, 0x07f80000, 0x07180000,
272         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
273         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
274         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
275         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
276         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
277 };
278
279 static const u32 polaris10_golden_common_all[] =
280 {
281         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
282         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
283         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
284         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
285         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
286         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
287         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
288         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
289 };
290
291 static const u32 fiji_golden_common_all[] =
292 {
293         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
294         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
295         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
296         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
297         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
298         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
299         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
300         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
301         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
302         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
303 };
304
305 static const u32 golden_settings_fiji_a10[] =
306 {
307         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
308         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
309         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
310         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
311         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
312         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
313         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
314         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
315         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
316         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
317         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
318 };
319
320 static const u32 fiji_mgcg_cgcg_init[] =
321 {
322         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
323         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
324         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
325         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
326         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
327         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
328         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
329         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
330         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
331         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
332         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
333         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
334         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
335         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
336         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
337         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
338         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
339         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
340         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
341         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
342         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
343         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
344         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
345         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
346         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
347         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
348         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
349         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
350         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
351         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
352         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
353         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
354         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
355         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
356         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
357 };
358
359 static const u32 golden_settings_iceland_a11[] =
360 {
361         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
362         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
363         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
364         mmGB_GPU_ID, 0x0000000f, 0x00000000,
365         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
366         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
367         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
368         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
369         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
370         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
371         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
372         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
373         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
374         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
375         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
376         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
377 };
378
379 static const u32 iceland_golden_common_all[] =
380 {
381         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
382         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
383         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
384         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
385         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
386         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
387         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
388         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
389 };
390
391 static const u32 iceland_mgcg_cgcg_init[] =
392 {
393         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
394         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
395         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
396         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
397         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
398         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
399         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
400         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
401         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
402         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
403         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
404         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
405         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
406         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
407         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
408         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
409         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
410         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
411         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
412         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
413         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
414         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
415         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
416         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
417         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
418         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
419         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
420         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
421         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
422         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
423         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
424         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
425         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
426         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
427         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
428         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
429         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
430         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
431         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
432         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
433         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
434         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
435         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
436         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
437         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
438         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
439         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
440         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
441         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
442         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
443         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
444         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
445         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
446         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
447         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
448         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
449         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
450         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
451         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
452         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
453         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
454         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
455         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
456         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
457 };
458
459 static const u32 cz_golden_settings_a11[] =
460 {
461         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
462         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
463         mmGB_GPU_ID, 0x0000000f, 0x00000000,
464         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
465         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
466         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
467         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
468         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
469         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
470         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
471         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
472         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
473 };
474
475 static const u32 cz_golden_common_all[] =
476 {
477         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
478         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
479         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
480         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
481         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
482         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
483         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
484         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
485 };
486
487 static const u32 cz_mgcg_cgcg_init[] =
488 {
489         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
490         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
491         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
492         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
493         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
494         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
495         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
496         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
497         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
498         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
499         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
500         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
501         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
502         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
503         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
504         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
505         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
506         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
507         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
508         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
509         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
510         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
511         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
512         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
514         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
515         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
516         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
517         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
518         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
519         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
520         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
521         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
522         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
523         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
524         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
525         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
526         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
527         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
528         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
529         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
530         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
531         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
532         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
533         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
534         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
535         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
538         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
543         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
548         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
553         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
558         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
561         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
562         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
563         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
564 };
565
566 static const u32 stoney_golden_settings_a11[] =
567 {
568         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
569         mmGB_GPU_ID, 0x0000000f, 0x00000000,
570         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
571         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
572         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
573         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
574         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
575         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
576         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
577         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
578 };
579
580 static const u32 stoney_golden_common_all[] =
581 {
582         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
583         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
584         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
585         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
586         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
587         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
588         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
589         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
590 };
591
592 static const u32 stoney_mgcg_cgcg_init[] =
593 {
594         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
595         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
596         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
597         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
598         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
599 };
600
601 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
602 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
603 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
604 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
605 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
606 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
607 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
608 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
609
610 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
611 {
612         switch (adev->asic_type) {
613         case CHIP_TOPAZ:
614                 amdgpu_program_register_sequence(adev,
615                                                  iceland_mgcg_cgcg_init,
616                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
617                 amdgpu_program_register_sequence(adev,
618                                                  golden_settings_iceland_a11,
619                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
620                 amdgpu_program_register_sequence(adev,
621                                                  iceland_golden_common_all,
622                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
623                 break;
624         case CHIP_FIJI:
625                 amdgpu_program_register_sequence(adev,
626                                                  fiji_mgcg_cgcg_init,
627                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
628                 amdgpu_program_register_sequence(adev,
629                                                  golden_settings_fiji_a10,
630                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
631                 amdgpu_program_register_sequence(adev,
632                                                  fiji_golden_common_all,
633                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
634                 break;
635
636         case CHIP_TONGA:
637                 amdgpu_program_register_sequence(adev,
638                                                  tonga_mgcg_cgcg_init,
639                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
640                 amdgpu_program_register_sequence(adev,
641                                                  golden_settings_tonga_a11,
642                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
643                 amdgpu_program_register_sequence(adev,
644                                                  tonga_golden_common_all,
645                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
646                 break;
647         case CHIP_POLARIS11:
648         case CHIP_POLARIS12:
649                 amdgpu_program_register_sequence(adev,
650                                                  golden_settings_polaris11_a11,
651                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
652                 amdgpu_program_register_sequence(adev,
653                                                  polaris11_golden_common_all,
654                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
655                 break;
656         case CHIP_POLARIS10:
657                 amdgpu_program_register_sequence(adev,
658                                                  golden_settings_polaris10_a11,
659                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
660                 amdgpu_program_register_sequence(adev,
661                                                  polaris10_golden_common_all,
662                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
663                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
664                 if (adev->pdev->revision == 0xc7 &&
665                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
666                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
667                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
668                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
669                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
670                 }
671                 break;
672         case CHIP_CARRIZO:
673                 amdgpu_program_register_sequence(adev,
674                                                  cz_mgcg_cgcg_init,
675                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
676                 amdgpu_program_register_sequence(adev,
677                                                  cz_golden_settings_a11,
678                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
679                 amdgpu_program_register_sequence(adev,
680                                                  cz_golden_common_all,
681                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
682                 break;
683         case CHIP_STONEY:
684                 amdgpu_program_register_sequence(adev,
685                                                  stoney_mgcg_cgcg_init,
686                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
687                 amdgpu_program_register_sequence(adev,
688                                                  stoney_golden_settings_a11,
689                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
690                 amdgpu_program_register_sequence(adev,
691                                                  stoney_golden_common_all,
692                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
693                 break;
694         default:
695                 break;
696         }
697 }
698
699 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
700 {
701         adev->gfx.scratch.num_reg = 8;
702         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
703         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
704 }
705
706 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
707 {
708         struct amdgpu_device *adev = ring->adev;
709         uint32_t scratch;
710         uint32_t tmp = 0;
711         unsigned i;
712         int r;
713
714         r = amdgpu_gfx_scratch_get(adev, &scratch);
715         if (r) {
716                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
717                 return r;
718         }
719         WREG32(scratch, 0xCAFEDEAD);
720         r = amdgpu_ring_alloc(ring, 3);
721         if (r) {
722                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
723                           ring->idx, r);
724                 amdgpu_gfx_scratch_free(adev, scratch);
725                 return r;
726         }
727         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
728         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
729         amdgpu_ring_write(ring, 0xDEADBEEF);
730         amdgpu_ring_commit(ring);
731
732         for (i = 0; i < adev->usec_timeout; i++) {
733                 tmp = RREG32(scratch);
734                 if (tmp == 0xDEADBEEF)
735                         break;
736                 DRM_UDELAY(1);
737         }
738         if (i < adev->usec_timeout) {
739                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
740                          ring->idx, i);
741         } else {
742                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
743                           ring->idx, scratch, tmp);
744                 r = -EINVAL;
745         }
746         amdgpu_gfx_scratch_free(adev, scratch);
747         return r;
748 }
749
750 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
751 {
752         struct amdgpu_device *adev = ring->adev;
753         struct amdgpu_ib ib;
754         struct dma_fence *f = NULL;
755         uint32_t scratch;
756         uint32_t tmp = 0;
757         long r;
758
759         r = amdgpu_gfx_scratch_get(adev, &scratch);
760         if (r) {
761                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
762                 return r;
763         }
764         WREG32(scratch, 0xCAFEDEAD);
765         memset(&ib, 0, sizeof(ib));
766         r = amdgpu_ib_get(adev, NULL, 256, &ib);
767         if (r) {
768                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
769                 goto err1;
770         }
771         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
772         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
773         ib.ptr[2] = 0xDEADBEEF;
774         ib.length_dw = 3;
775
776         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
777         if (r)
778                 goto err2;
779
780         r = dma_fence_wait_timeout(f, false, timeout);
781         if (r == 0) {
782                 DRM_ERROR("amdgpu: IB test timed out.\n");
783                 r = -ETIMEDOUT;
784                 goto err2;
785         } else if (r < 0) {
786                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
787                 goto err2;
788         }
789         tmp = RREG32(scratch);
790         if (tmp == 0xDEADBEEF) {
791                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
792                 r = 0;
793         } else {
794                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
795                           scratch, tmp);
796                 r = -EINVAL;
797         }
798 err2:
799         amdgpu_ib_free(adev, &ib, NULL);
800         dma_fence_put(f);
801 err1:
802         amdgpu_gfx_scratch_free(adev, scratch);
803         return r;
804 }
805
806
807 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
808 {
809         release_firmware(adev->gfx.pfp_fw);
810         adev->gfx.pfp_fw = NULL;
811         release_firmware(adev->gfx.me_fw);
812         adev->gfx.me_fw = NULL;
813         release_firmware(adev->gfx.ce_fw);
814         adev->gfx.ce_fw = NULL;
815         release_firmware(adev->gfx.rlc_fw);
816         adev->gfx.rlc_fw = NULL;
817         release_firmware(adev->gfx.mec_fw);
818         adev->gfx.mec_fw = NULL;
819         if ((adev->asic_type != CHIP_STONEY) &&
820             (adev->asic_type != CHIP_TOPAZ))
821                 release_firmware(adev->gfx.mec2_fw);
822         adev->gfx.mec2_fw = NULL;
823
824         kfree(adev->gfx.rlc.register_list_format);
825 }
826
827 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
828 {
829         const char *chip_name;
830         char fw_name[30];
831         int err;
832         struct amdgpu_firmware_info *info = NULL;
833         const struct common_firmware_header *header = NULL;
834         const struct gfx_firmware_header_v1_0 *cp_hdr;
835         const struct rlc_firmware_header_v2_0 *rlc_hdr;
836         unsigned int *tmp = NULL, i;
837
838         DRM_DEBUG("\n");
839
840         switch (adev->asic_type) {
841         case CHIP_TOPAZ:
842                 chip_name = "topaz";
843                 break;
844         case CHIP_TONGA:
845                 chip_name = "tonga";
846                 break;
847         case CHIP_CARRIZO:
848                 chip_name = "carrizo";
849                 break;
850         case CHIP_FIJI:
851                 chip_name = "fiji";
852                 break;
853         case CHIP_POLARIS11:
854                 chip_name = "polaris11";
855                 break;
856         case CHIP_POLARIS10:
857                 chip_name = "polaris10";
858                 break;
859         case CHIP_POLARIS12:
860                 chip_name = "polaris12";
861                 break;
862         case CHIP_STONEY:
863                 chip_name = "stoney";
864                 break;
865         default:
866                 BUG();
867         }
868
869         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
870         err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
871         if (err)
872                 goto out;
873         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
874         if (err)
875                 goto out;
876         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
877         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
878         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
879
880         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
881         err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
882         if (err)
883                 goto out;
884         err = amdgpu_ucode_validate(adev->gfx.me_fw);
885         if (err)
886                 goto out;
887         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
888         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
889
890         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
891
892         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
893         err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
894         if (err)
895                 goto out;
896         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
897         if (err)
898                 goto out;
899         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
900         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
901         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
902
903         /*
904          * Support for MCBP/Virtualization in combination with chained IBs is
905          * formal released on feature version #46
906          */
907         if (adev->gfx.ce_feature_version >= 46 &&
908             adev->gfx.pfp_feature_version >= 46) {
909                 adev->virt.chained_ib_support = true;
910                 DRM_INFO("Chained IB support enabled!\n");
911         } else
912                 adev->virt.chained_ib_support = false;
913
914         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
915         err = reject_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
916         if (err)
917                 goto out;
918         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
919         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
920         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
921         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
922
923         adev->gfx.rlc.save_and_restore_offset =
924                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
925         adev->gfx.rlc.clear_state_descriptor_offset =
926                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
927         adev->gfx.rlc.avail_scratch_ram_locations =
928                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
929         adev->gfx.rlc.reg_restore_list_size =
930                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
931         adev->gfx.rlc.reg_list_format_start =
932                         le32_to_cpu(rlc_hdr->reg_list_format_start);
933         adev->gfx.rlc.reg_list_format_separate_start =
934                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
935         adev->gfx.rlc.starting_offsets_start =
936                         le32_to_cpu(rlc_hdr->starting_offsets_start);
937         adev->gfx.rlc.reg_list_format_size_bytes =
938                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
939         adev->gfx.rlc.reg_list_size_bytes =
940                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
941
942         adev->gfx.rlc.register_list_format =
943                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
944                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
945
946         if (!adev->gfx.rlc.register_list_format) {
947                 err = -ENOMEM;
948                 goto out;
949         }
950
951         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
952                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
953         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
954                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
955
956         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
957
958         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
959                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
960         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
961                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
962
963         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
964         err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
965         if (err)
966                 goto out;
967         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
968         if (err)
969                 goto out;
970         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
971         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
972         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
973
974         if ((adev->asic_type != CHIP_STONEY) &&
975             (adev->asic_type != CHIP_TOPAZ)) {
976                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
977                 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
978                 if (!err) {
979                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
980                         if (err)
981                                 goto out;
982                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
983                                 adev->gfx.mec2_fw->data;
984                         adev->gfx.mec2_fw_version =
985                                 le32_to_cpu(cp_hdr->header.ucode_version);
986                         adev->gfx.mec2_feature_version =
987                                 le32_to_cpu(cp_hdr->ucode_feature_version);
988                 } else {
989                         err = 0;
990                         adev->gfx.mec2_fw = NULL;
991                 }
992         }
993
994         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
995                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
996                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
997                 info->fw = adev->gfx.pfp_fw;
998                 header = (const struct common_firmware_header *)info->fw->data;
999                 adev->firmware.fw_size +=
1000                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1001
1002                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1003                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1004                 info->fw = adev->gfx.me_fw;
1005                 header = (const struct common_firmware_header *)info->fw->data;
1006                 adev->firmware.fw_size +=
1007                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1008
1009                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1010                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1011                 info->fw = adev->gfx.ce_fw;
1012                 header = (const struct common_firmware_header *)info->fw->data;
1013                 adev->firmware.fw_size +=
1014                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1015
1016                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1017                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1018                 info->fw = adev->gfx.rlc_fw;
1019                 header = (const struct common_firmware_header *)info->fw->data;
1020                 adev->firmware.fw_size +=
1021                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1022
1023                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1024                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1025                 info->fw = adev->gfx.mec_fw;
1026                 header = (const struct common_firmware_header *)info->fw->data;
1027                 adev->firmware.fw_size +=
1028                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1029
1030                 /* we need account JT in */
1031                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1032                 adev->firmware.fw_size +=
1033                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1034
1035                 if (amdgpu_sriov_vf(adev)) {
1036                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1037                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1038                         info->fw = adev->gfx.mec_fw;
1039                         adev->firmware.fw_size +=
1040                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1041                 }
1042
1043                 if (adev->gfx.mec2_fw) {
1044                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1045                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1046                         info->fw = adev->gfx.mec2_fw;
1047                         header = (const struct common_firmware_header *)info->fw->data;
1048                         adev->firmware.fw_size +=
1049                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050                 }
1051
1052         }
1053
1054 out:
1055         if (err) {
1056                 dev_err(adev->dev,
1057                         "gfx8: Failed to load firmware \"%s\"\n",
1058                         fw_name);
1059                 release_firmware(adev->gfx.pfp_fw);
1060                 adev->gfx.pfp_fw = NULL;
1061                 release_firmware(adev->gfx.me_fw);
1062                 adev->gfx.me_fw = NULL;
1063                 release_firmware(adev->gfx.ce_fw);
1064                 adev->gfx.ce_fw = NULL;
1065                 release_firmware(adev->gfx.rlc_fw);
1066                 adev->gfx.rlc_fw = NULL;
1067                 release_firmware(adev->gfx.mec_fw);
1068                 adev->gfx.mec_fw = NULL;
1069                 release_firmware(adev->gfx.mec2_fw);
1070                 adev->gfx.mec2_fw = NULL;
1071         }
1072         return err;
1073 }
1074
1075 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1076                                     volatile u32 *buffer)
1077 {
1078         u32 count = 0, i;
1079         const struct cs_section_def *sect = NULL;
1080         const struct cs_extent_def *ext = NULL;
1081
1082         if (adev->gfx.rlc.cs_data == NULL)
1083                 return;
1084         if (buffer == NULL)
1085                 return;
1086
1087         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1088         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1089
1090         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1091         buffer[count++] = cpu_to_le32(0x80000000);
1092         buffer[count++] = cpu_to_le32(0x80000000);
1093
1094         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1095                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1096                         if (sect->id == SECT_CONTEXT) {
1097                                 buffer[count++] =
1098                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1099                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1100                                                 PACKET3_SET_CONTEXT_REG_START);
1101                                 for (i = 0; i < ext->reg_count; i++)
1102                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1103                         } else {
1104                                 return;
1105                         }
1106                 }
1107         }
1108
1109         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1110         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1111                         PACKET3_SET_CONTEXT_REG_START);
1112         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1113         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1114
1115         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1116         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1117
1118         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1119         buffer[count++] = cpu_to_le32(0);
1120 }
1121
1122 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1123 {
1124         const __le32 *fw_data;
1125         volatile u32 *dst_ptr;
1126         int me, i, max_me = 4;
1127         u32 bo_offset = 0;
1128         u32 table_offset, table_size;
1129
1130         if (adev->asic_type == CHIP_CARRIZO)
1131                 max_me = 5;
1132
1133         /* write the cp table buffer */
1134         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1135         for (me = 0; me < max_me; me++) {
1136                 if (me == 0) {
1137                         const struct gfx_firmware_header_v1_0 *hdr =
1138                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1139                         fw_data = (const __le32 *)
1140                                 (adev->gfx.ce_fw->data +
1141                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1142                         table_offset = le32_to_cpu(hdr->jt_offset);
1143                         table_size = le32_to_cpu(hdr->jt_size);
1144                 } else if (me == 1) {
1145                         const struct gfx_firmware_header_v1_0 *hdr =
1146                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1147                         fw_data = (const __le32 *)
1148                                 (adev->gfx.pfp_fw->data +
1149                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1150                         table_offset = le32_to_cpu(hdr->jt_offset);
1151                         table_size = le32_to_cpu(hdr->jt_size);
1152                 } else if (me == 2) {
1153                         const struct gfx_firmware_header_v1_0 *hdr =
1154                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1155                         fw_data = (const __le32 *)
1156                                 (adev->gfx.me_fw->data +
1157                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1158                         table_offset = le32_to_cpu(hdr->jt_offset);
1159                         table_size = le32_to_cpu(hdr->jt_size);
1160                 } else if (me == 3) {
1161                         const struct gfx_firmware_header_v1_0 *hdr =
1162                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1163                         fw_data = (const __le32 *)
1164                                 (adev->gfx.mec_fw->data +
1165                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1166                         table_offset = le32_to_cpu(hdr->jt_offset);
1167                         table_size = le32_to_cpu(hdr->jt_size);
1168                 } else  if (me == 4) {
1169                         const struct gfx_firmware_header_v1_0 *hdr =
1170                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1171                         fw_data = (const __le32 *)
1172                                 (adev->gfx.mec2_fw->data +
1173                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1174                         table_offset = le32_to_cpu(hdr->jt_offset);
1175                         table_size = le32_to_cpu(hdr->jt_size);
1176                 }
1177
1178                 for (i = 0; i < table_size; i ++) {
1179                         dst_ptr[bo_offset + i] =
1180                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1181                 }
1182
1183                 bo_offset += table_size;
1184         }
1185 }
1186
1187 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1188 {
1189         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1190         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1191 }
1192
1193 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1194 {
1195         volatile u32 *dst_ptr;
1196         u32 dws;
1197         const struct cs_section_def *cs_data;
1198         int r;
1199
1200         adev->gfx.rlc.cs_data = vi_cs_data;
1201
1202         cs_data = adev->gfx.rlc.cs_data;
1203
1204         if (cs_data) {
1205                 /* clear state block */
1206                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1207
1208                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1209                                               AMDGPU_GEM_DOMAIN_VRAM,
1210                                               &adev->gfx.rlc.clear_state_obj,
1211                                               &adev->gfx.rlc.clear_state_gpu_addr,
1212                                               (void **)&adev->gfx.rlc.cs_ptr);
1213                 if (r) {
1214                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1215                         gfx_v8_0_rlc_fini(adev);
1216                         return r;
1217                 }
1218
1219                 /* set up the cs buffer */
1220                 dst_ptr = adev->gfx.rlc.cs_ptr;
1221                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1222                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1223                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1224         }
1225
1226         if ((adev->asic_type == CHIP_CARRIZO) ||
1227             (adev->asic_type == CHIP_STONEY)) {
1228                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1229                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1230                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1231                                               &adev->gfx.rlc.cp_table_obj,
1232                                               &adev->gfx.rlc.cp_table_gpu_addr,
1233                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1234                 if (r) {
1235                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1236                         return r;
1237                 }
1238
1239                 cz_init_cp_jump_table(adev);
1240
1241                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1242                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1243         }
1244
1245         return 0;
1246 }
1247
1248 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1249 {
1250         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1251 }
1252
1253 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1254 {
1255         int r;
1256         u32 *hpd;
1257         size_t mec_hpd_size;
1258
1259         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1260
1261         /* take ownership of the relevant compute queues */
1262         amdgpu_gfx_compute_queue_acquire(adev);
1263
1264         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1265
1266         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1267                                       AMDGPU_GEM_DOMAIN_GTT,
1268                                       &adev->gfx.mec.hpd_eop_obj,
1269                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1270                                       (void **)&hpd);
1271         if (r) {
1272                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1273                 return r;
1274         }
1275
1276         memset(hpd, 0, mec_hpd_size);
1277
1278         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1279         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1280
1281         return 0;
1282 }
1283
1284 static const u32 vgpr_init_compute_shader[] =
1285 {
1286         0x7e000209, 0x7e020208,
1287         0x7e040207, 0x7e060206,
1288         0x7e080205, 0x7e0a0204,
1289         0x7e0c0203, 0x7e0e0202,
1290         0x7e100201, 0x7e120200,
1291         0x7e140209, 0x7e160208,
1292         0x7e180207, 0x7e1a0206,
1293         0x7e1c0205, 0x7e1e0204,
1294         0x7e200203, 0x7e220202,
1295         0x7e240201, 0x7e260200,
1296         0x7e280209, 0x7e2a0208,
1297         0x7e2c0207, 0x7e2e0206,
1298         0x7e300205, 0x7e320204,
1299         0x7e340203, 0x7e360202,
1300         0x7e380201, 0x7e3a0200,
1301         0x7e3c0209, 0x7e3e0208,
1302         0x7e400207, 0x7e420206,
1303         0x7e440205, 0x7e460204,
1304         0x7e480203, 0x7e4a0202,
1305         0x7e4c0201, 0x7e4e0200,
1306         0x7e500209, 0x7e520208,
1307         0x7e540207, 0x7e560206,
1308         0x7e580205, 0x7e5a0204,
1309         0x7e5c0203, 0x7e5e0202,
1310         0x7e600201, 0x7e620200,
1311         0x7e640209, 0x7e660208,
1312         0x7e680207, 0x7e6a0206,
1313         0x7e6c0205, 0x7e6e0204,
1314         0x7e700203, 0x7e720202,
1315         0x7e740201, 0x7e760200,
1316         0x7e780209, 0x7e7a0208,
1317         0x7e7c0207, 0x7e7e0206,
1318         0xbf8a0000, 0xbf810000,
1319 };
1320
1321 static const u32 sgpr_init_compute_shader[] =
1322 {
1323         0xbe8a0100, 0xbe8c0102,
1324         0xbe8e0104, 0xbe900106,
1325         0xbe920108, 0xbe940100,
1326         0xbe960102, 0xbe980104,
1327         0xbe9a0106, 0xbe9c0108,
1328         0xbe9e0100, 0xbea00102,
1329         0xbea20104, 0xbea40106,
1330         0xbea60108, 0xbea80100,
1331         0xbeaa0102, 0xbeac0104,
1332         0xbeae0106, 0xbeb00108,
1333         0xbeb20100, 0xbeb40102,
1334         0xbeb60104, 0xbeb80106,
1335         0xbeba0108, 0xbebc0100,
1336         0xbebe0102, 0xbec00104,
1337         0xbec20106, 0xbec40108,
1338         0xbec60100, 0xbec80102,
1339         0xbee60004, 0xbee70005,
1340         0xbeea0006, 0xbeeb0007,
1341         0xbee80008, 0xbee90009,
1342         0xbefc0000, 0xbf8a0000,
1343         0xbf810000, 0x00000000,
1344 };
1345
1346 static const u32 vgpr_init_regs[] =
1347 {
1348         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1349         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1350         mmCOMPUTE_NUM_THREAD_X, 256*4,
1351         mmCOMPUTE_NUM_THREAD_Y, 1,
1352         mmCOMPUTE_NUM_THREAD_Z, 1,
1353         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1354         mmCOMPUTE_PGM_RSRC2, 20,
1355         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1356         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1357         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1358         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1359         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1360         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1361         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1362         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1363         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1364         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1365 };
1366
1367 static const u32 sgpr1_init_regs[] =
1368 {
1369         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1370         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1371         mmCOMPUTE_NUM_THREAD_X, 256*5,
1372         mmCOMPUTE_NUM_THREAD_Y, 1,
1373         mmCOMPUTE_NUM_THREAD_Z, 1,
1374         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1375         mmCOMPUTE_PGM_RSRC2, 20,
1376         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1377         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1378         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1379         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1380         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1381         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1382         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1383         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1384         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1385         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1386 };
1387
1388 static const u32 sgpr2_init_regs[] =
1389 {
1390         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1391         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1392         mmCOMPUTE_NUM_THREAD_X, 256*5,
1393         mmCOMPUTE_NUM_THREAD_Y, 1,
1394         mmCOMPUTE_NUM_THREAD_Z, 1,
1395         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1396         mmCOMPUTE_PGM_RSRC2, 20,
1397         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1398         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1399         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1400         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1401         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1402         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1403         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1404         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1405         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1406         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1407 };
1408
1409 static const u32 sec_ded_counter_registers[] =
1410 {
1411         mmCPC_EDC_ATC_CNT,
1412         mmCPC_EDC_SCRATCH_CNT,
1413         mmCPC_EDC_UCODE_CNT,
1414         mmCPF_EDC_ATC_CNT,
1415         mmCPF_EDC_ROQ_CNT,
1416         mmCPF_EDC_TAG_CNT,
1417         mmCPG_EDC_ATC_CNT,
1418         mmCPG_EDC_DMA_CNT,
1419         mmCPG_EDC_TAG_CNT,
1420         mmDC_EDC_CSINVOC_CNT,
1421         mmDC_EDC_RESTORE_CNT,
1422         mmDC_EDC_STATE_CNT,
1423         mmGDS_EDC_CNT,
1424         mmGDS_EDC_GRBM_CNT,
1425         mmGDS_EDC_OA_DED,
1426         mmSPI_EDC_CNT,
1427         mmSQC_ATC_EDC_GATCL1_CNT,
1428         mmSQC_EDC_CNT,
1429         mmSQ_EDC_DED_CNT,
1430         mmSQ_EDC_INFO,
1431         mmSQ_EDC_SEC_CNT,
1432         mmTCC_EDC_CNT,
1433         mmTCP_ATC_EDC_GATCL1_CNT,
1434         mmTCP_EDC_CNT,
1435         mmTD_EDC_CNT
1436 };
1437
1438 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1439 {
1440         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1441         struct amdgpu_ib ib;
1442         struct dma_fence *f = NULL;
1443         int r, i;
1444         u32 tmp;
1445         unsigned total_size, vgpr_offset, sgpr_offset;
1446         u64 gpu_addr;
1447
1448         /* only supported on CZ */
1449         if (adev->asic_type != CHIP_CARRIZO)
1450                 return 0;
1451
1452         /* bail if the compute ring is not ready */
1453         if (!ring->ready)
1454                 return 0;
1455
1456         tmp = RREG32(mmGB_EDC_MODE);
1457         WREG32(mmGB_EDC_MODE, 0);
1458
1459         total_size =
1460                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1461         total_size +=
1462                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1463         total_size +=
1464                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1465         total_size = ALIGN(total_size, 256);
1466         vgpr_offset = total_size;
1467         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1468         sgpr_offset = total_size;
1469         total_size += sizeof(sgpr_init_compute_shader);
1470
1471         /* allocate an indirect buffer to put the commands in */
1472         memset(&ib, 0, sizeof(ib));
1473         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1474         if (r) {
1475                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1476                 return r;
1477         }
1478
1479         /* load the compute shaders */
1480         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1481                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1482
1483         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1484                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1485
1486         /* init the ib length to 0 */
1487         ib.length_dw = 0;
1488
1489         /* VGPR */
1490         /* write the register state for the compute dispatch */
1491         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1492                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1493                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1494                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1495         }
1496         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1497         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1498         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1499         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1500         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1501         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1502
1503         /* write dispatch packet */
1504         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1505         ib.ptr[ib.length_dw++] = 8; /* x */
1506         ib.ptr[ib.length_dw++] = 1; /* y */
1507         ib.ptr[ib.length_dw++] = 1; /* z */
1508         ib.ptr[ib.length_dw++] =
1509                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1510
1511         /* write CS partial flush packet */
1512         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1513         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1514
1515         /* SGPR1 */
1516         /* write the register state for the compute dispatch */
1517         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1518                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1519                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1520                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1521         }
1522         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1523         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1524         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1525         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1526         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1527         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1528
1529         /* write dispatch packet */
1530         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1531         ib.ptr[ib.length_dw++] = 8; /* x */
1532         ib.ptr[ib.length_dw++] = 1; /* y */
1533         ib.ptr[ib.length_dw++] = 1; /* z */
1534         ib.ptr[ib.length_dw++] =
1535                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1536
1537         /* write CS partial flush packet */
1538         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1539         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1540
1541         /* SGPR2 */
1542         /* write the register state for the compute dispatch */
1543         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1544                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1545                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1546                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1547         }
1548         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1549         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1550         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1551         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1552         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1553         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1554
1555         /* write dispatch packet */
1556         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1557         ib.ptr[ib.length_dw++] = 8; /* x */
1558         ib.ptr[ib.length_dw++] = 1; /* y */
1559         ib.ptr[ib.length_dw++] = 1; /* z */
1560         ib.ptr[ib.length_dw++] =
1561                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1562
1563         /* write CS partial flush packet */
1564         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1565         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1566
1567         /* shedule the ib on the ring */
1568         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1569         if (r) {
1570                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1571                 goto fail;
1572         }
1573
1574         /* wait for the GPU to finish processing the IB */
1575         r = dma_fence_wait(f, false);
1576         if (r) {
1577                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1578                 goto fail;
1579         }
1580
1581         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1582         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1583         WREG32(mmGB_EDC_MODE, tmp);
1584
1585         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1586         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1587         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1588
1589
1590         /* read back registers to clear the counters */
1591         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1592                 RREG32(sec_ded_counter_registers[i]);
1593
1594 fail:
1595         amdgpu_ib_free(adev, &ib, NULL);
1596         dma_fence_put(f);
1597
1598         return r;
1599 }
1600
1601 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1602 {
1603         u32 gb_addr_config;
1604         u32 mc_shared_chmap, mc_arb_ramcfg;
1605         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1606         u32 tmp;
1607         int ret;
1608
1609         switch (adev->asic_type) {
1610         case CHIP_TOPAZ:
1611                 adev->gfx.config.max_shader_engines = 1;
1612                 adev->gfx.config.max_tile_pipes = 2;
1613                 adev->gfx.config.max_cu_per_sh = 6;
1614                 adev->gfx.config.max_sh_per_se = 1;
1615                 adev->gfx.config.max_backends_per_se = 2;
1616                 adev->gfx.config.max_texture_channel_caches = 2;
1617                 adev->gfx.config.max_gprs = 256;
1618                 adev->gfx.config.max_gs_threads = 32;
1619                 adev->gfx.config.max_hw_contexts = 8;
1620
1621                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1622                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1623                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1624                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1625                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1626                 break;
1627         case CHIP_FIJI:
1628                 adev->gfx.config.max_shader_engines = 4;
1629                 adev->gfx.config.max_tile_pipes = 16;
1630                 adev->gfx.config.max_cu_per_sh = 16;
1631                 adev->gfx.config.max_sh_per_se = 1;
1632                 adev->gfx.config.max_backends_per_se = 4;
1633                 adev->gfx.config.max_texture_channel_caches = 16;
1634                 adev->gfx.config.max_gprs = 256;
1635                 adev->gfx.config.max_gs_threads = 32;
1636                 adev->gfx.config.max_hw_contexts = 8;
1637
1638                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1639                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1640                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1641                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1642                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1643                 break;
1644         case CHIP_POLARIS11:
1645         case CHIP_POLARIS12:
1646                 ret = amdgpu_atombios_get_gfx_info(adev);
1647                 if (ret)
1648                         return ret;
1649                 adev->gfx.config.max_gprs = 256;
1650                 adev->gfx.config.max_gs_threads = 32;
1651                 adev->gfx.config.max_hw_contexts = 8;
1652
1653                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1654                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1655                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1656                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1657                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1658                 break;
1659         case CHIP_POLARIS10:
1660                 ret = amdgpu_atombios_get_gfx_info(adev);
1661                 if (ret)
1662                         return ret;
1663                 adev->gfx.config.max_gprs = 256;
1664                 adev->gfx.config.max_gs_threads = 32;
1665                 adev->gfx.config.max_hw_contexts = 8;
1666
1667                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1668                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1669                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1670                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1671                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1672                 break;
1673         case CHIP_TONGA:
1674                 adev->gfx.config.max_shader_engines = 4;
1675                 adev->gfx.config.max_tile_pipes = 8;
1676                 adev->gfx.config.max_cu_per_sh = 8;
1677                 adev->gfx.config.max_sh_per_se = 1;
1678                 adev->gfx.config.max_backends_per_se = 2;
1679                 adev->gfx.config.max_texture_channel_caches = 8;
1680                 adev->gfx.config.max_gprs = 256;
1681                 adev->gfx.config.max_gs_threads = 32;
1682                 adev->gfx.config.max_hw_contexts = 8;
1683
1684                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1685                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1686                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1687                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1688                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1689                 break;
1690         case CHIP_CARRIZO:
1691                 adev->gfx.config.max_shader_engines = 1;
1692                 adev->gfx.config.max_tile_pipes = 2;
1693                 adev->gfx.config.max_sh_per_se = 1;
1694                 adev->gfx.config.max_backends_per_se = 2;
1695                 adev->gfx.config.max_cu_per_sh = 8;
1696                 adev->gfx.config.max_texture_channel_caches = 2;
1697                 adev->gfx.config.max_gprs = 256;
1698                 adev->gfx.config.max_gs_threads = 32;
1699                 adev->gfx.config.max_hw_contexts = 8;
1700
1701                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1702                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1703                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1704                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1705                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1706                 break;
1707         case CHIP_STONEY:
1708                 adev->gfx.config.max_shader_engines = 1;
1709                 adev->gfx.config.max_tile_pipes = 2;
1710                 adev->gfx.config.max_sh_per_se = 1;
1711                 adev->gfx.config.max_backends_per_se = 1;
1712                 adev->gfx.config.max_cu_per_sh = 3;
1713                 adev->gfx.config.max_texture_channel_caches = 2;
1714                 adev->gfx.config.max_gprs = 256;
1715                 adev->gfx.config.max_gs_threads = 16;
1716                 adev->gfx.config.max_hw_contexts = 8;
1717
1718                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1719                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1720                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1721                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1722                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1723                 break;
1724         default:
1725                 adev->gfx.config.max_shader_engines = 2;
1726                 adev->gfx.config.max_tile_pipes = 4;
1727                 adev->gfx.config.max_cu_per_sh = 2;
1728                 adev->gfx.config.max_sh_per_se = 1;
1729                 adev->gfx.config.max_backends_per_se = 2;
1730                 adev->gfx.config.max_texture_channel_caches = 4;
1731                 adev->gfx.config.max_gprs = 256;
1732                 adev->gfx.config.max_gs_threads = 32;
1733                 adev->gfx.config.max_hw_contexts = 8;
1734
1735                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1736                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1737                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1738                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1739                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1740                 break;
1741         }
1742
1743         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1744         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1745         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1746
1747         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1748         adev->gfx.config.mem_max_burst_length_bytes = 256;
1749         if (adev->flags & AMD_IS_APU) {
1750                 /* Get memory bank mapping mode. */
1751                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1752                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1753                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1754
1755                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1756                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1757                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1758
1759                 /* Validate settings in case only one DIMM installed. */
1760                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1761                         dimm00_addr_map = 0;
1762                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1763                         dimm01_addr_map = 0;
1764                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1765                         dimm10_addr_map = 0;
1766                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1767                         dimm11_addr_map = 0;
1768
1769                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1770                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1771                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1772                         adev->gfx.config.mem_row_size_in_kb = 2;
1773                 else
1774                         adev->gfx.config.mem_row_size_in_kb = 1;
1775         } else {
1776                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1777                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1778                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1779                         adev->gfx.config.mem_row_size_in_kb = 4;
1780         }
1781
1782         adev->gfx.config.shader_engine_tile_size = 32;
1783         adev->gfx.config.num_gpus = 1;
1784         adev->gfx.config.multi_gpu_tile_size = 64;
1785
1786         /* fix up row size */
1787         switch (adev->gfx.config.mem_row_size_in_kb) {
1788         case 1:
1789         default:
1790                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1791                 break;
1792         case 2:
1793                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1794                 break;
1795         case 4:
1796                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1797                 break;
1798         }
1799         adev->gfx.config.gb_addr_config = gb_addr_config;
1800
1801         return 0;
1802 }
1803
1804 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1805                                         int mec, int pipe, int queue)
1806 {
1807         int r;
1808         unsigned irq_type;
1809         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1810
1811         ring = &adev->gfx.compute_ring[ring_id];
1812
1813         /* mec0 is me1 */
1814         ring->me = mec + 1;
1815         ring->pipe = pipe;
1816         ring->queue = queue;
1817
1818         ring->ring_obj = NULL;
1819         ring->use_doorbell = true;
1820         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1821         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1822                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1823         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1824
1825         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1826                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1827                 + ring->pipe;
1828
1829         /* type-2 packets are deprecated on MEC, use type-3 instead */
1830         r = amdgpu_ring_init(adev, ring, 1024,
1831                         &adev->gfx.eop_irq, irq_type);
1832         if (r)
1833                 return r;
1834
1835
1836         return 0;
1837 }
1838
1839 static int gfx_v8_0_sw_init(void *handle)
1840 {
1841         int i, j, k, r, ring_id;
1842         struct amdgpu_ring *ring;
1843         struct amdgpu_kiq *kiq;
1844         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1845
1846         switch (adev->asic_type) {
1847         case CHIP_FIJI:
1848         case CHIP_TONGA:
1849         case CHIP_POLARIS11:
1850         case CHIP_POLARIS12:
1851         case CHIP_POLARIS10:
1852         case CHIP_CARRIZO:
1853                 adev->gfx.mec.num_mec = 2;
1854                 break;
1855         case CHIP_TOPAZ:
1856         case CHIP_STONEY:
1857         default:
1858                 adev->gfx.mec.num_mec = 1;
1859                 break;
1860         }
1861
1862         adev->gfx.mec.num_pipe_per_mec = 4;
1863         adev->gfx.mec.num_queue_per_pipe = 8;
1864
1865         /* KIQ event */
1866         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1867         if (r)
1868                 return r;
1869
1870         /* EOP Event */
1871         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1872         if (r)
1873                 return r;
1874
1875         /* Privileged reg */
1876         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1877                               &adev->gfx.priv_reg_irq);
1878         if (r)
1879                 return r;
1880
1881         /* Privileged inst */
1882         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1883                               &adev->gfx.priv_inst_irq);
1884         if (r)
1885                 return r;
1886
1887         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1888
1889         gfx_v8_0_scratch_init(adev);
1890
1891         r = gfx_v8_0_init_microcode(adev);
1892         if (r) {
1893                 DRM_ERROR("Failed to load gfx firmware!\n");
1894                 return r;
1895         }
1896
1897         r = gfx_v8_0_rlc_init(adev);
1898         if (r) {
1899                 DRM_ERROR("Failed to init rlc BOs!\n");
1900                 return r;
1901         }
1902
1903         r = gfx_v8_0_mec_init(adev);
1904         if (r) {
1905                 DRM_ERROR("Failed to init MEC BOs!\n");
1906                 return r;
1907         }
1908
1909         /* set up the gfx ring */
1910         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1911                 ring = &adev->gfx.gfx_ring[i];
1912                 ring->ring_obj = NULL;
1913                 sprintf(ring->name, "gfx");
1914                 /* no gfx doorbells on iceland */
1915                 if (adev->asic_type != CHIP_TOPAZ) {
1916                         ring->use_doorbell = true;
1917                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1918                 }
1919
1920                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1921                                      AMDGPU_CP_IRQ_GFX_EOP);
1922                 if (r)
1923                         return r;
1924         }
1925
1926
1927         /* set up the compute queues - allocate horizontally across pipes */
1928         ring_id = 0;
1929         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1930                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1931                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1932                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1933                                         continue;
1934
1935                                 r = gfx_v8_0_compute_ring_init(adev,
1936                                                                 ring_id,
1937                                                                 i, k, j);
1938                                 if (r)
1939                                         return r;
1940
1941                                 ring_id++;
1942                         }
1943                 }
1944         }
1945
1946         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
1947         if (r) {
1948                 DRM_ERROR("Failed to init KIQ BOs!\n");
1949                 return r;
1950         }
1951
1952         kiq = &adev->gfx.kiq;
1953         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1954         if (r)
1955                 return r;
1956
1957         /* create MQD for all compute queues as well as KIQ for SRIOV case */
1958         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
1959         if (r)
1960                 return r;
1961
1962         /* reserve GDS, GWS and OA resource for gfx */
1963         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
1964                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
1965                                     &adev->gds.gds_gfx_bo, NULL, NULL);
1966         if (r)
1967                 return r;
1968
1969         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
1970                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
1971                                     &adev->gds.gws_gfx_bo, NULL, NULL);
1972         if (r)
1973                 return r;
1974
1975         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
1976                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
1977                                     &adev->gds.oa_gfx_bo, NULL, NULL);
1978         if (r)
1979                 return r;
1980
1981         adev->gfx.ce_ram_size = 0x8000;
1982
1983         r = gfx_v8_0_gpu_early_init(adev);
1984         if (r)
1985                 return r;
1986
1987         return 0;
1988 }
1989
1990 static int gfx_v8_0_sw_fini(void *handle)
1991 {
1992         int i;
1993         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1994
1995         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
1996         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
1997         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
1998
1999         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2000                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2001         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2002                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2003
2004         amdgpu_gfx_compute_mqd_sw_fini(adev);
2005         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2006         amdgpu_gfx_kiq_fini(adev);
2007
2008         gfx_v8_0_mec_fini(adev);
2009         gfx_v8_0_rlc_fini(adev);
2010         gfx_v8_0_free_microcode(adev);
2011
2012         return 0;
2013 }
2014
2015 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2016 {
2017         uint32_t *modearray, *mod2array;
2018         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2019         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2020         u32 reg_offset;
2021
2022         modearray = adev->gfx.config.tile_mode_array;
2023         mod2array = adev->gfx.config.macrotile_mode_array;
2024
2025         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2026                 modearray[reg_offset] = 0;
2027
2028         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2029                 mod2array[reg_offset] = 0;
2030
2031         switch (adev->asic_type) {
2032         case CHIP_TOPAZ:
2033                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2034                                 PIPE_CONFIG(ADDR_SURF_P2) |
2035                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2036                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2037                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2038                                 PIPE_CONFIG(ADDR_SURF_P2) |
2039                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2040                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2041                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2042                                 PIPE_CONFIG(ADDR_SURF_P2) |
2043                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2044                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2045                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2046                                 PIPE_CONFIG(ADDR_SURF_P2) |
2047                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2048                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2049                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2050                                 PIPE_CONFIG(ADDR_SURF_P2) |
2051                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2052                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2053                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2054                                 PIPE_CONFIG(ADDR_SURF_P2) |
2055                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2056                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2057                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2058                                 PIPE_CONFIG(ADDR_SURF_P2) |
2059                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2060                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2061                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2062                                 PIPE_CONFIG(ADDR_SURF_P2));
2063                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2064                                 PIPE_CONFIG(ADDR_SURF_P2) |
2065                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2066                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2067                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2068                                  PIPE_CONFIG(ADDR_SURF_P2) |
2069                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2070                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2072                                  PIPE_CONFIG(ADDR_SURF_P2) |
2073                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2074                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2075                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2076                                  PIPE_CONFIG(ADDR_SURF_P2) |
2077                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2079                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2080                                  PIPE_CONFIG(ADDR_SURF_P2) |
2081                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2082                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2083                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2084                                  PIPE_CONFIG(ADDR_SURF_P2) |
2085                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2086                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2087                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2088                                  PIPE_CONFIG(ADDR_SURF_P2) |
2089                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2091                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2092                                  PIPE_CONFIG(ADDR_SURF_P2) |
2093                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2094                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2095                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2096                                  PIPE_CONFIG(ADDR_SURF_P2) |
2097                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2098                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2099                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2100                                  PIPE_CONFIG(ADDR_SURF_P2) |
2101                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2102                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2103                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2104                                  PIPE_CONFIG(ADDR_SURF_P2) |
2105                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2106                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2107                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2108                                  PIPE_CONFIG(ADDR_SURF_P2) |
2109                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2110                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2111                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2112                                  PIPE_CONFIG(ADDR_SURF_P2) |
2113                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2114                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2115                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2116                                  PIPE_CONFIG(ADDR_SURF_P2) |
2117                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2118                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2119                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2120                                  PIPE_CONFIG(ADDR_SURF_P2) |
2121                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2122                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2123                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2124                                  PIPE_CONFIG(ADDR_SURF_P2) |
2125                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2126                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2127                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2128                                  PIPE_CONFIG(ADDR_SURF_P2) |
2129                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2130                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2131                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2132                                  PIPE_CONFIG(ADDR_SURF_P2) |
2133                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2134                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2135
2136                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2137                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2138                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2139                                 NUM_BANKS(ADDR_SURF_8_BANK));
2140                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2141                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2142                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2143                                 NUM_BANKS(ADDR_SURF_8_BANK));
2144                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2145                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2146                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2147                                 NUM_BANKS(ADDR_SURF_8_BANK));
2148                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2149                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2150                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2151                                 NUM_BANKS(ADDR_SURF_8_BANK));
2152                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2153                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2154                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2155                                 NUM_BANKS(ADDR_SURF_8_BANK));
2156                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2157                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2158                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2159                                 NUM_BANKS(ADDR_SURF_8_BANK));
2160                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2161                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2162                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2163                                 NUM_BANKS(ADDR_SURF_8_BANK));
2164                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2165                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2166                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2167                                 NUM_BANKS(ADDR_SURF_16_BANK));
2168                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2169                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2170                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2171                                 NUM_BANKS(ADDR_SURF_16_BANK));
2172                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2173                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2174                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2175                                  NUM_BANKS(ADDR_SURF_16_BANK));
2176                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2177                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2178                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2179                                  NUM_BANKS(ADDR_SURF_16_BANK));
2180                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2181                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2182                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2183                                  NUM_BANKS(ADDR_SURF_16_BANK));
2184                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2185                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2186                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2187                                  NUM_BANKS(ADDR_SURF_16_BANK));
2188                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2189                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2190                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2191                                  NUM_BANKS(ADDR_SURF_8_BANK));
2192
2193                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2194                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2195                             reg_offset != 23)
2196                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2197
2198                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2199                         if (reg_offset != 7)
2200                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2201
2202                 break;
2203         case CHIP_FIJI:
2204                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2205                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2206                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2207                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2208                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2209                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2210                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2211                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2212                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2213                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2214                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2215                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2216                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2217                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2218                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2219                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2220                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2221                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2222                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2223                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2224                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2225                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2226                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2227                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2228                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2229                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2230                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2231                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2233                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2234                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2235                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2237                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2238                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2239                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2240                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2241                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2242                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2244                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2245                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2246                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2247                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2250                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2251                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2252                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2253                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2254                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2255                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2256                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2257                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2258                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2260                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2261                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2263                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2264                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2265                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2266                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2267                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2270                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2271                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2274                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2275                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2276                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2278                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2279                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2280                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2281                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2282                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2283                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2284                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2285                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2286                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2287                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2289                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2291                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2295                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2296                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2299                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2303                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2307                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2311                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2314                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2317                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2318                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2319                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2321                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2322                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2324                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2326
2327                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2329                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2330                                 NUM_BANKS(ADDR_SURF_8_BANK));
2331                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2333                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2334                                 NUM_BANKS(ADDR_SURF_8_BANK));
2335                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2337                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2338                                 NUM_BANKS(ADDR_SURF_8_BANK));
2339                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2341                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342                                 NUM_BANKS(ADDR_SURF_8_BANK));
2343                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2345                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2346                                 NUM_BANKS(ADDR_SURF_8_BANK));
2347                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2349                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2350                                 NUM_BANKS(ADDR_SURF_8_BANK));
2351                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2354                                 NUM_BANKS(ADDR_SURF_8_BANK));
2355                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2357                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358                                 NUM_BANKS(ADDR_SURF_8_BANK));
2359                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2360                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2361                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2362                                 NUM_BANKS(ADDR_SURF_8_BANK));
2363                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2364                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2365                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2366                                  NUM_BANKS(ADDR_SURF_8_BANK));
2367                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2370                                  NUM_BANKS(ADDR_SURF_8_BANK));
2371                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2374                                  NUM_BANKS(ADDR_SURF_8_BANK));
2375                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2377                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2378                                  NUM_BANKS(ADDR_SURF_8_BANK));
2379                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382                                  NUM_BANKS(ADDR_SURF_4_BANK));
2383
2384                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2385                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2386
2387                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2388                         if (reg_offset != 7)
2389                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2390
2391                 break;
2392         case CHIP_TONGA:
2393                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2395                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2396                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2397                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2399                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2400                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2401                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2403                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2404                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2405                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2407                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2408                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2409                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2411                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2412                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2413                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2415                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2416                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2418                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2419                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2420                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2421                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2423                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2424                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2425                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2426                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2427                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2428                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2429                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2430                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2432                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2439                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2443                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2444                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2445                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2447                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2448                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2449                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2450                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2452                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2453                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2454                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2456                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2457                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2458                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2459                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2461                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2462                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2463                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2464                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2465                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2466                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2467                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2468                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2470                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2471                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2472                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2474                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2475                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2476                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2478                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2479                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2480                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2482                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2483                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2484                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2485                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2486                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2487                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2488                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2490                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2491                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2492                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2494                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2495                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2496                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2498                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2499                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2500                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2502                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2503                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2506                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2507                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2508                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2510                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2511                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2512                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2513                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2514                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2515
2516                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2518                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2519                                 NUM_BANKS(ADDR_SURF_16_BANK));
2520                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523                                 NUM_BANKS(ADDR_SURF_16_BANK));
2524                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2526                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2527                                 NUM_BANKS(ADDR_SURF_16_BANK));
2528                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2530                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2531                                 NUM_BANKS(ADDR_SURF_16_BANK));
2532                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2534                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2535                                 NUM_BANKS(ADDR_SURF_16_BANK));
2536                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2538                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2539                                 NUM_BANKS(ADDR_SURF_16_BANK));
2540                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2542                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2543                                 NUM_BANKS(ADDR_SURF_16_BANK));
2544                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2546                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2547                                 NUM_BANKS(ADDR_SURF_16_BANK));
2548                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2550                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2551                                 NUM_BANKS(ADDR_SURF_16_BANK));
2552                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2554                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2555                                  NUM_BANKS(ADDR_SURF_16_BANK));
2556                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2558                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2559                                  NUM_BANKS(ADDR_SURF_16_BANK));
2560                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2562                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2563                                  NUM_BANKS(ADDR_SURF_8_BANK));
2564                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2566                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2567                                  NUM_BANKS(ADDR_SURF_4_BANK));
2568                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2570                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2571                                  NUM_BANKS(ADDR_SURF_4_BANK));
2572
2573                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2574                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2575
2576                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2577                         if (reg_offset != 7)
2578                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2579
2580                 break;
2581         case CHIP_POLARIS11:
2582         case CHIP_POLARIS12:
2583                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2585                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2586                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2587                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2588                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2589                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2590                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2591                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2592                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2593                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2594                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2595                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2597                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2598                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2599                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2600                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2601                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2602                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2603                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2604                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2606                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2607                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2608                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2609                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2610                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2611                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2612                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2613                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2614                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2615                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2616                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2617                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2618                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2619                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2620                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2623                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2624                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2625                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2626                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2627                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2628                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2629                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2631                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2632                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2633                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2634                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2636                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2637                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2638                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2639                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2640                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2641                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2642                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2643                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2644                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2645                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2646                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2647                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2649                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2650                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2652                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2653                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2654                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2656                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2657                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2658                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2660                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2661                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2662                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2664                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2665                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2666                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2668                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2669                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2670                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2672                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2673                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2674                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2676                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2677                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2678                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2680                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2681                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2682                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2684                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2685                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2686                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2688                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2689                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2690                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2692                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2693                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2696                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2700                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2701                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2702                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2704                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2705
2706                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2708                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2709                                 NUM_BANKS(ADDR_SURF_16_BANK));
2710
2711                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2712                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2713                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2714                                 NUM_BANKS(ADDR_SURF_16_BANK));
2715
2716                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2718                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2719                                 NUM_BANKS(ADDR_SURF_16_BANK));
2720
2721                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2723                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2724                                 NUM_BANKS(ADDR_SURF_16_BANK));
2725
2726                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2729                                 NUM_BANKS(ADDR_SURF_16_BANK));
2730
2731                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2734                                 NUM_BANKS(ADDR_SURF_16_BANK));
2735
2736                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2738                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2739                                 NUM_BANKS(ADDR_SURF_16_BANK));
2740
2741                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2742                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2743                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2744                                 NUM_BANKS(ADDR_SURF_16_BANK));
2745
2746                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2747                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2748                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2749                                 NUM_BANKS(ADDR_SURF_16_BANK));
2750
2751                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2753                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2754                                 NUM_BANKS(ADDR_SURF_16_BANK));
2755
2756                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2758                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2759                                 NUM_BANKS(ADDR_SURF_16_BANK));
2760
2761                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2763                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2764                                 NUM_BANKS(ADDR_SURF_16_BANK));
2765
2766                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2768                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2769                                 NUM_BANKS(ADDR_SURF_8_BANK));
2770
2771                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2773                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2774                                 NUM_BANKS(ADDR_SURF_4_BANK));
2775
2776                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2777                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2778
2779                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2780                         if (reg_offset != 7)
2781                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2782
2783                 break;
2784         case CHIP_POLARIS10:
2785                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2787                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2788                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2789                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2790                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2791                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2792                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2793                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2794                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2795                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2796                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2797                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2799                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2800                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2801                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2802                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2803                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2804                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2805                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2806                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2807                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2808                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2809                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2810                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2811                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2812                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2813                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2814                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2816                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2817                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2818                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2819                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2820                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2821                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2822                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2823                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2824                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2825                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2826                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2827                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2828                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2829                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2830                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2831                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2832                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2834                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2835                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2836                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2837                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2838                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2839                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2841                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2842                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2843                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2844                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2845                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2846                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2848                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2849                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2850                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2851                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2852                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2855                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2856                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2857                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2858                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2859                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2860                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2861                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2862                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2863                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2864                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2866                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2867                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2868                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2869                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2870                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2871                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2872                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2874                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2875                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2876                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2878                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2879                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2880                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2882                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2883                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2884                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2886                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2887                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2888                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2890                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2891                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2892                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2894                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2895                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2898                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2899                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2900                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2902                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2903                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2904                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2905                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2906                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2907
2908                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2909                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2910                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2911                                 NUM_BANKS(ADDR_SURF_16_BANK));
2912
2913                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2914                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2915                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2916                                 NUM_BANKS(ADDR_SURF_16_BANK));
2917
2918                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2920                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2921                                 NUM_BANKS(ADDR_SURF_16_BANK));
2922
2923                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2925                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2926                                 NUM_BANKS(ADDR_SURF_16_BANK));
2927
2928                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2929                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2930                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2931                                 NUM_BANKS(ADDR_SURF_16_BANK));
2932
2933                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2935                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2936                                 NUM_BANKS(ADDR_SURF_16_BANK));
2937
2938                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2941                                 NUM_BANKS(ADDR_SURF_16_BANK));
2942
2943                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2944                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2945                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2946                                 NUM_BANKS(ADDR_SURF_16_BANK));
2947
2948                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2949                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2950                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2951                                 NUM_BANKS(ADDR_SURF_16_BANK));
2952
2953                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2954                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2955                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2956                                 NUM_BANKS(ADDR_SURF_16_BANK));
2957
2958                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2959                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2960                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2961                                 NUM_BANKS(ADDR_SURF_16_BANK));
2962
2963                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2965                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2966                                 NUM_BANKS(ADDR_SURF_8_BANK));
2967
2968                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2970                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2971                                 NUM_BANKS(ADDR_SURF_4_BANK));
2972
2973                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2974                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2975                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2976                                 NUM_BANKS(ADDR_SURF_4_BANK));
2977
2978                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2979                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2980
2981                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2982                         if (reg_offset != 7)
2983                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2984
2985                 break;
2986         case CHIP_STONEY:
2987                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2988                                 PIPE_CONFIG(ADDR_SURF_P2) |
2989                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2990                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2991                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2992                                 PIPE_CONFIG(ADDR_SURF_P2) |
2993                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2994                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2995                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2996                                 PIPE_CONFIG(ADDR_SURF_P2) |
2997                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2998                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2999                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3000                                 PIPE_CONFIG(ADDR_SURF_P2) |
3001                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3002                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3003                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3004                                 PIPE_CONFIG(ADDR_SURF_P2) |
3005                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3006                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3007                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3008                                 PIPE_CONFIG(ADDR_SURF_P2) |
3009                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3010                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3011                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3012                                 PIPE_CONFIG(ADDR_SURF_P2) |
3013                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3014                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3015                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3016                                 PIPE_CONFIG(ADDR_SURF_P2));
3017                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3018                                 PIPE_CONFIG(ADDR_SURF_P2) |
3019                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3020                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3022                                  PIPE_CONFIG(ADDR_SURF_P2) |
3023                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3024                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3025                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026                                  PIPE_CONFIG(ADDR_SURF_P2) |
3027                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3028                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3029                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3030                                  PIPE_CONFIG(ADDR_SURF_P2) |
3031                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3033                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3034                                  PIPE_CONFIG(ADDR_SURF_P2) |
3035                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3036                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3037                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3038                                  PIPE_CONFIG(ADDR_SURF_P2) |
3039                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3040                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3041                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3042                                  PIPE_CONFIG(ADDR_SURF_P2) |
3043                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3044                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3045                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3046                                  PIPE_CONFIG(ADDR_SURF_P2) |
3047                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3048                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3049                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3050                                  PIPE_CONFIG(ADDR_SURF_P2) |
3051                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3052                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3053                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3054                                  PIPE_CONFIG(ADDR_SURF_P2) |
3055                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3056                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3057                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3058                                  PIPE_CONFIG(ADDR_SURF_P2) |
3059                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3060                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3061                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3062                                  PIPE_CONFIG(ADDR_SURF_P2) |
3063                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3064                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3065                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3066                                  PIPE_CONFIG(ADDR_SURF_P2) |
3067                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3068                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3069                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3070                                  PIPE_CONFIG(ADDR_SURF_P2) |
3071                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3072                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3073                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3074                                  PIPE_CONFIG(ADDR_SURF_P2) |
3075                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3076                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3077                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3078                                  PIPE_CONFIG(ADDR_SURF_P2) |
3079                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3080                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3081                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3082                                  PIPE_CONFIG(ADDR_SURF_P2) |
3083                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3084                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3085                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3086                                  PIPE_CONFIG(ADDR_SURF_P2) |
3087                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3088                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3089
3090                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3091                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3092                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3093                                 NUM_BANKS(ADDR_SURF_8_BANK));
3094                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3095                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3096                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3097                                 NUM_BANKS(ADDR_SURF_8_BANK));
3098                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3099                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3100                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3101                                 NUM_BANKS(ADDR_SURF_8_BANK));
3102                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3103                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3104                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3105                                 NUM_BANKS(ADDR_SURF_8_BANK));
3106                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3107                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3108                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3109                                 NUM_BANKS(ADDR_SURF_8_BANK));
3110                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3111                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3112                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3113                                 NUM_BANKS(ADDR_SURF_8_BANK));
3114                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3115                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3116                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3117                                 NUM_BANKS(ADDR_SURF_8_BANK));
3118                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3119                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3120                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3121                                 NUM_BANKS(ADDR_SURF_16_BANK));
3122                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3123                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3124                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3125                                 NUM_BANKS(ADDR_SURF_16_BANK));
3126                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3127                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3128                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3129                                  NUM_BANKS(ADDR_SURF_16_BANK));
3130                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3131                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3132                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3133                                  NUM_BANKS(ADDR_SURF_16_BANK));
3134                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3135                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3136                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3137                                  NUM_BANKS(ADDR_SURF_16_BANK));
3138                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3139                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3140                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3141                                  NUM_BANKS(ADDR_SURF_16_BANK));
3142                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3143                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3144                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3145                                  NUM_BANKS(ADDR_SURF_8_BANK));
3146
3147                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3148                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3149                             reg_offset != 23)
3150                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3151
3152                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3153                         if (reg_offset != 7)
3154                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3155
3156                 break;
3157         default:
3158                 dev_warn(adev->dev,
3159                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3160                          adev->asic_type);
3161
3162         case CHIP_CARRIZO:
3163                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3164                                 PIPE_CONFIG(ADDR_SURF_P2) |
3165                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3166                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3167                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3168                                 PIPE_CONFIG(ADDR_SURF_P2) |
3169                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3170                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3171                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3172                                 PIPE_CONFIG(ADDR_SURF_P2) |
3173                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3174                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3175                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3176                                 PIPE_CONFIG(ADDR_SURF_P2) |
3177                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3178                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3179                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3180                                 PIPE_CONFIG(ADDR_SURF_P2) |
3181                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3182                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3183                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3184                                 PIPE_CONFIG(ADDR_SURF_P2) |
3185                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3186                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3187                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3188                                 PIPE_CONFIG(ADDR_SURF_P2) |
3189                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3190                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3191                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3192                                 PIPE_CONFIG(ADDR_SURF_P2));
3193                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3194                                 PIPE_CONFIG(ADDR_SURF_P2) |
3195                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3196                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3197                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3198                                  PIPE_CONFIG(ADDR_SURF_P2) |
3199                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3200                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3201                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3202                                  PIPE_CONFIG(ADDR_SURF_P2) |
3203                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3204                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3205                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3206                                  PIPE_CONFIG(ADDR_SURF_P2) |
3207                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3208                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3209                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3210                                  PIPE_CONFIG(ADDR_SURF_P2) |
3211                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3212                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3213                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3214                                  PIPE_CONFIG(ADDR_SURF_P2) |
3215                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3216                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3217                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3218                                  PIPE_CONFIG(ADDR_SURF_P2) |
3219                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3220                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3221                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3222                                  PIPE_CONFIG(ADDR_SURF_P2) |
3223                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3224                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3225                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3226                                  PIPE_CONFIG(ADDR_SURF_P2) |
3227                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3228                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3229                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3230                                  PIPE_CONFIG(ADDR_SURF_P2) |
3231                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3232                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3233                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3234                                  PIPE_CONFIG(ADDR_SURF_P2) |
3235                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3236                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3237                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3238                                  PIPE_CONFIG(ADDR_SURF_P2) |
3239                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3240                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3241                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3242                                  PIPE_CONFIG(ADDR_SURF_P2) |
3243                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3244                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3245                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3246                                  PIPE_CONFIG(ADDR_SURF_P2) |
3247                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3248                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3249                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3250                                  PIPE_CONFIG(ADDR_SURF_P2) |
3251                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3252                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3253                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3254                                  PIPE_CONFIG(ADDR_SURF_P2) |
3255                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3256                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3257                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3258                                  PIPE_CONFIG(ADDR_SURF_P2) |
3259                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3260                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3261                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3262                                  PIPE_CONFIG(ADDR_SURF_P2) |
3263                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3264                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3265
3266                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3267                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3268                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3269                                 NUM_BANKS(ADDR_SURF_8_BANK));
3270                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3271                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3272                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3273                                 NUM_BANKS(ADDR_SURF_8_BANK));
3274                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3275                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3276                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3277                                 NUM_BANKS(ADDR_SURF_8_BANK));
3278                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3279                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3280                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3281                                 NUM_BANKS(ADDR_SURF_8_BANK));
3282                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3283                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3284                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3285                                 NUM_BANKS(ADDR_SURF_8_BANK));
3286                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3287                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3288                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3289                                 NUM_BANKS(ADDR_SURF_8_BANK));
3290                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3291                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3292                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3293                                 NUM_BANKS(ADDR_SURF_8_BANK));
3294                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3295                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3296                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3297                                 NUM_BANKS(ADDR_SURF_16_BANK));
3298                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3299                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3300                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3301                                 NUM_BANKS(ADDR_SURF_16_BANK));
3302                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3303                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3304                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3305                                  NUM_BANKS(ADDR_SURF_16_BANK));
3306                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3307                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3308                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3309                                  NUM_BANKS(ADDR_SURF_16_BANK));
3310                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3311                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3312                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3313                                  NUM_BANKS(ADDR_SURF_16_BANK));
3314                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3315                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3316                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3317                                  NUM_BANKS(ADDR_SURF_16_BANK));
3318                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3319                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3320                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3321                                  NUM_BANKS(ADDR_SURF_8_BANK));
3322
3323                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3324                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3325                             reg_offset != 23)
3326                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3327
3328                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3329                         if (reg_offset != 7)
3330                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3331
3332                 break;
3333         }
3334 }
3335
3336 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3337                                   u32 se_num, u32 sh_num, u32 instance)
3338 {
3339         u32 data;
3340
3341         if (instance == 0xffffffff)
3342                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3343         else
3344                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3345
3346         if (se_num == 0xffffffff)
3347                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3348         else
3349                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3350
3351         if (sh_num == 0xffffffff)
3352                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3353         else
3354                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3355
3356         WREG32(mmGRBM_GFX_INDEX, data);
3357 }
3358
3359 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3360 {
3361         u32 data, mask;
3362
3363         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3364                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3365
3366         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3367
3368         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3369                                          adev->gfx.config.max_sh_per_se);
3370
3371         return (~data) & mask;
3372 }
3373
3374 static void
3375 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3376 {
3377         switch (adev->asic_type) {
3378         case CHIP_FIJI:
3379                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3380                           RB_XSEL2(1) | PKR_MAP(2) |
3381                           PKR_XSEL(1) | PKR_YSEL(1) |
3382                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3383                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3384                            SE_PAIR_YSEL(2);
3385                 break;
3386         case CHIP_TONGA:
3387         case CHIP_POLARIS10:
3388                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3389                           SE_XSEL(1) | SE_YSEL(1);
3390                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3391                            SE_PAIR_YSEL(2);
3392                 break;
3393         case CHIP_TOPAZ:
3394         case CHIP_CARRIZO:
3395                 *rconf |= RB_MAP_PKR0(2);
3396                 *rconf1 |= 0x0;
3397                 break;
3398         case CHIP_POLARIS11:
3399         case CHIP_POLARIS12:
3400                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3401                           SE_XSEL(1) | SE_YSEL(1);
3402                 *rconf1 |= 0x0;
3403                 break;
3404         case CHIP_STONEY:
3405                 *rconf |= 0x0;
3406                 *rconf1 |= 0x0;
3407                 break;
3408         default:
3409                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3410                 break;
3411         }
3412 }
3413
3414 static void
3415 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3416                                         u32 raster_config, u32 raster_config_1,
3417                                         unsigned rb_mask, unsigned num_rb)
3418 {
3419         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3420         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3421         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3422         unsigned rb_per_se = num_rb / num_se;
3423         unsigned se_mask[4];
3424         unsigned se;
3425
3426         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3427         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3428         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3429         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3430
3431         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3432         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3433         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3434
3435         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3436                              (!se_mask[2] && !se_mask[3]))) {
3437                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3438
3439                 if (!se_mask[0] && !se_mask[1]) {
3440                         raster_config_1 |=
3441                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3442                 } else {
3443                         raster_config_1 |=
3444                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3445                 }
3446         }
3447
3448         for (se = 0; se < num_se; se++) {
3449                 unsigned raster_config_se = raster_config;
3450                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3451                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3452                 int idx = (se / 2) * 2;
3453
3454                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3455                         raster_config_se &= ~SE_MAP_MASK;
3456
3457                         if (!se_mask[idx]) {
3458                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3459                         } else {
3460                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3461                         }
3462                 }
3463
3464                 pkr0_mask &= rb_mask;
3465                 pkr1_mask &= rb_mask;
3466                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3467                         raster_config_se &= ~PKR_MAP_MASK;
3468
3469                         if (!pkr0_mask) {
3470                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3471                         } else {
3472                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3473                         }
3474                 }
3475
3476                 if (rb_per_se >= 2) {
3477                         unsigned rb0_mask = 1 << (se * rb_per_se);
3478                         unsigned rb1_mask = rb0_mask << 1;
3479
3480                         rb0_mask &= rb_mask;
3481                         rb1_mask &= rb_mask;
3482                         if (!rb0_mask || !rb1_mask) {
3483                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3484
3485                                 if (!rb0_mask) {
3486                                         raster_config_se |=
3487                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3488                                 } else {
3489                                         raster_config_se |=
3490                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3491                                 }
3492                         }
3493
3494                         if (rb_per_se > 2) {
3495                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3496                                 rb1_mask = rb0_mask << 1;
3497                                 rb0_mask &= rb_mask;
3498                                 rb1_mask &= rb_mask;
3499                                 if (!rb0_mask || !rb1_mask) {
3500                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3501
3502                                         if (!rb0_mask) {
3503                                                 raster_config_se |=
3504                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3505                                         } else {
3506                                                 raster_config_se |=
3507                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3508                                         }
3509                                 }
3510                         }
3511                 }
3512
3513                 /* GRBM_GFX_INDEX has a different offset on VI */
3514                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3515                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3516                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3517         }
3518
3519         /* GRBM_GFX_INDEX has a different offset on VI */
3520         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3521 }
3522
3523 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3524 {
3525         int i, j;
3526         u32 data;
3527         u32 raster_config = 0, raster_config_1 = 0;
3528         u32 active_rbs = 0;
3529         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3530                                         adev->gfx.config.max_sh_per_se;
3531         unsigned num_rb_pipes;
3532
3533         mutex_lock(&adev->grbm_idx_mutex);
3534         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3535                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3536                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3537                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3538                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3539                                                rb_bitmap_width_per_sh);
3540                 }
3541         }
3542         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3543
3544         adev->gfx.config.backend_enable_mask = active_rbs;
3545         adev->gfx.config.num_rbs = hweight32(active_rbs);
3546
3547         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3548                              adev->gfx.config.max_shader_engines, 16);
3549
3550         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3551
3552         if (!adev->gfx.config.backend_enable_mask ||
3553                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3554                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3555                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3556         } else {
3557                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3558                                                         adev->gfx.config.backend_enable_mask,
3559                                                         num_rb_pipes);
3560         }
3561
3562         /* cache the values for userspace */
3563         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3564                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3565                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3566                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3567                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3568                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3569                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3570                         adev->gfx.config.rb_config[i][j].raster_config =
3571                                 RREG32(mmPA_SC_RASTER_CONFIG);
3572                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3573                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3574                 }
3575         }
3576         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3577         mutex_unlock(&adev->grbm_idx_mutex);
3578 }
3579
3580 /**
3581  * gfx_v8_0_init_compute_vmid - gart enable
3582  *
3583  * @adev: amdgpu_device pointer
3584  *
3585  * Initialize compute vmid sh_mem registers
3586  *
3587  */
3588 #define DEFAULT_SH_MEM_BASES    (0x6000)
3589 #define FIRST_COMPUTE_VMID      (8)
3590 #define LAST_COMPUTE_VMID       (16)
3591 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3592 {
3593         int i;
3594         uint32_t sh_mem_config;
3595         uint32_t sh_mem_bases;
3596
3597         /*
3598          * Configure apertures:
3599          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3600          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3601          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3602          */
3603         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3604
3605         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3606                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3607                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3608                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3609                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3610                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3611
3612         mutex_lock(&adev->srbm_mutex);
3613         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3614                 vi_srbm_select(adev, 0, 0, 0, i);
3615                 /* CP and shaders */
3616                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3617                 WREG32(mmSH_MEM_APE1_BASE, 1);
3618                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3619                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3620         }
3621         vi_srbm_select(adev, 0, 0, 0, 0);
3622         mutex_unlock(&adev->srbm_mutex);
3623 }
3624
3625 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3626 {
3627         switch (adev->asic_type) {
3628         default:
3629                 adev->gfx.config.double_offchip_lds_buf = 1;
3630                 break;
3631         case CHIP_CARRIZO:
3632         case CHIP_STONEY:
3633                 adev->gfx.config.double_offchip_lds_buf = 0;
3634                 break;
3635         }
3636 }
3637
3638 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3639 {
3640         u32 tmp, sh_static_mem_cfg;
3641         int i;
3642
3643         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3644         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3645         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3646         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3647
3648         gfx_v8_0_tiling_mode_table_init(adev);
3649         gfx_v8_0_setup_rb(adev);
3650         gfx_v8_0_get_cu_info(adev);
3651         gfx_v8_0_config_init(adev);
3652
3653         /* XXX SH_MEM regs */
3654         /* where to put LDS, scratch, GPUVM in FSA64 space */
3655         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3656                                    SWIZZLE_ENABLE, 1);
3657         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3658                                    ELEMENT_SIZE, 1);
3659         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3660                                    INDEX_STRIDE, 3);
3661         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3662
3663         mutex_lock(&adev->srbm_mutex);
3664         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3665                 vi_srbm_select(adev, 0, 0, 0, i);
3666                 /* CP and shaders */
3667                 if (i == 0) {
3668                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3669                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3670                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3671                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3672                         WREG32(mmSH_MEM_CONFIG, tmp);
3673                         WREG32(mmSH_MEM_BASES, 0);
3674                 } else {
3675                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3676                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3677                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3678                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3679                         WREG32(mmSH_MEM_CONFIG, tmp);
3680                         tmp = adev->mc.shared_aperture_start >> 48;
3681                         WREG32(mmSH_MEM_BASES, tmp);
3682                 }
3683
3684                 WREG32(mmSH_MEM_APE1_BASE, 1);
3685                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3686         }
3687         vi_srbm_select(adev, 0, 0, 0, 0);
3688         mutex_unlock(&adev->srbm_mutex);
3689
3690         gfx_v8_0_init_compute_vmid(adev);
3691
3692         mutex_lock(&adev->grbm_idx_mutex);
3693         /*
3694          * making sure that the following register writes will be broadcasted
3695          * to all the shaders
3696          */
3697         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3698
3699         WREG32(mmPA_SC_FIFO_SIZE,
3700                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3701                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3702                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3703                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3704                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3705                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3706                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3707                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3708
3709         tmp = RREG32(mmSPI_ARB_PRIORITY);
3710         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3711         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3712         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3713         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3714         WREG32(mmSPI_ARB_PRIORITY, tmp);
3715
3716         mutex_unlock(&adev->grbm_idx_mutex);
3717
3718 }
3719
3720 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3721 {
3722         u32 i, j, k;
3723         u32 mask;
3724
3725         mutex_lock(&adev->grbm_idx_mutex);
3726         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3727                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3728                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3729                         for (k = 0; k < adev->usec_timeout; k++) {
3730                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3731                                         break;
3732                                 udelay(1);
3733                         }
3734                 }
3735         }
3736         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3737         mutex_unlock(&adev->grbm_idx_mutex);
3738
3739         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3740                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3741                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3742                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3743         for (k = 0; k < adev->usec_timeout; k++) {
3744                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3745                         break;
3746                 udelay(1);
3747         }
3748 }
3749
3750 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3751                                                bool enable)
3752 {
3753         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3754
3755         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3756         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3757         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3758         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3759
3760         WREG32(mmCP_INT_CNTL_RING0, tmp);
3761 }
3762
3763 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3764 {
3765         /* csib */
3766         WREG32(mmRLC_CSIB_ADDR_HI,
3767                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3768         WREG32(mmRLC_CSIB_ADDR_LO,
3769                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3770         WREG32(mmRLC_CSIB_LENGTH,
3771                         adev->gfx.rlc.clear_state_size);
3772 }
3773
3774 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3775                                 int ind_offset,
3776                                 int list_size,
3777                                 int *unique_indices,
3778                                 int *indices_count,
3779                                 int max_indices,
3780                                 int *ind_start_offsets,
3781                                 int *offset_count,
3782                                 int max_offset)
3783 {
3784         int indices;
3785         bool new_entry = true;
3786
3787         for (; ind_offset < list_size; ind_offset++) {
3788
3789                 if (new_entry) {
3790                         new_entry = false;
3791                         ind_start_offsets[*offset_count] = ind_offset;
3792                         *offset_count = *offset_count + 1;
3793                         BUG_ON(*offset_count >= max_offset);
3794                 }
3795
3796                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3797                         new_entry = true;
3798                         continue;
3799                 }
3800
3801                 ind_offset += 2;
3802
3803                 /* look for the matching indice */
3804                 for (indices = 0;
3805                         indices < *indices_count;
3806                         indices++) {
3807                         if (unique_indices[indices] ==
3808                                 register_list_format[ind_offset])
3809                                 break;
3810                 }
3811
3812                 if (indices >= *indices_count) {
3813                         unique_indices[*indices_count] =
3814                                 register_list_format[ind_offset];
3815                         indices = *indices_count;
3816                         *indices_count = *indices_count + 1;
3817                         BUG_ON(*indices_count >= max_indices);
3818                 }
3819
3820                 register_list_format[ind_offset] = indices;
3821         }
3822 }
3823
3824 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3825 {
3826         int i, temp, data;
3827         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3828         int indices_count = 0;
3829         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3830         int offset_count = 0;
3831
3832         int list_size;
3833         unsigned int *register_list_format =
3834                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3835         if (!register_list_format)
3836                 return -ENOMEM;
3837         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3838                         adev->gfx.rlc.reg_list_format_size_bytes);
3839
3840         gfx_v8_0_parse_ind_reg_list(register_list_format,
3841                                 RLC_FormatDirectRegListLength,
3842                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3843                                 unique_indices,
3844                                 &indices_count,
3845                                 sizeof(unique_indices) / sizeof(int),
3846                                 indirect_start_offsets,
3847                                 &offset_count,
3848                                 sizeof(indirect_start_offsets)/sizeof(int));
3849
3850         /* save and restore list */
3851         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3852
3853         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3854         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3855                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3856
3857         /* indirect list */
3858         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3859         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3860                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3861
3862         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3863         list_size = list_size >> 1;
3864         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3865         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3866
3867         /* starting offsets starts */
3868         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3869                 adev->gfx.rlc.starting_offsets_start);
3870         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3871                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3872                                 indirect_start_offsets[i]);
3873
3874         /* unique indices */
3875         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3876         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3877         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3878                 if (unique_indices[i] != 0) {
3879                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3880                         WREG32(data + i, unique_indices[i] >> 20);
3881                 }
3882         }
3883         kfree(register_list_format);
3884
3885         return 0;
3886 }
3887
3888 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3889 {
3890         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3891 }
3892
3893 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3894 {
3895         uint32_t data;
3896
3897         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3898
3899         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3900         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3901         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3902         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3903         WREG32(mmRLC_PG_DELAY, data);
3904
3905         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3906         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3907
3908 }
3909
3910 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3911                                                 bool enable)
3912 {
3913         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3914 }
3915
3916 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3917                                                   bool enable)
3918 {
3919         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3920 }
3921
3922 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3923 {
3924         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
3925 }
3926
3927 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3928 {
3929         if ((adev->asic_type == CHIP_CARRIZO) ||
3930             (adev->asic_type == CHIP_STONEY)) {
3931                 gfx_v8_0_init_csb(adev);
3932                 gfx_v8_0_init_save_restore_list(adev);
3933                 gfx_v8_0_enable_save_restore_machine(adev);
3934                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3935                 gfx_v8_0_init_power_gating(adev);
3936                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3937         } else if ((adev->asic_type == CHIP_POLARIS11) ||
3938                    (adev->asic_type == CHIP_POLARIS12)) {
3939                 gfx_v8_0_init_csb(adev);
3940                 gfx_v8_0_init_save_restore_list(adev);
3941                 gfx_v8_0_enable_save_restore_machine(adev);
3942                 gfx_v8_0_init_power_gating(adev);
3943         }
3944
3945 }
3946
3947 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3948 {
3949         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
3950
3951         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3952         gfx_v8_0_wait_for_rlc_serdes(adev);
3953 }
3954
3955 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3956 {
3957         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3958         udelay(50);
3959
3960         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3961         udelay(50);
3962 }
3963
3964 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3965 {
3966         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
3967
3968         /* carrizo do enable cp interrupt after cp inited */
3969         if (!(adev->flags & AMD_IS_APU))
3970                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3971
3972         udelay(50);
3973 }
3974
3975 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3976 {
3977         const struct rlc_firmware_header_v2_0 *hdr;
3978         const __le32 *fw_data;
3979         unsigned i, fw_size;
3980
3981         if (!adev->gfx.rlc_fw)
3982                 return -EINVAL;
3983
3984         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3985         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3986
3987         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3988                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3989         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3990
3991         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3992         for (i = 0; i < fw_size; i++)
3993                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3994         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3995
3996         return 0;
3997 }
3998
3999 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4000 {
4001         int r;
4002         u32 tmp;
4003
4004         gfx_v8_0_rlc_stop(adev);
4005
4006         /* disable CG */
4007         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4008         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4009                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4010         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4011         if (adev->asic_type == CHIP_POLARIS11 ||
4012             adev->asic_type == CHIP_POLARIS10 ||
4013             adev->asic_type == CHIP_POLARIS12) {
4014                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4015                 tmp &= ~0x3;
4016                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4017         }
4018
4019         /* disable PG */
4020         WREG32(mmRLC_PG_CNTL, 0);
4021
4022         gfx_v8_0_rlc_reset(adev);
4023         gfx_v8_0_init_pg(adev);
4024
4025         if (!adev->pp_enabled) {
4026                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4027                         /* legacy rlc firmware loading */
4028                         r = gfx_v8_0_rlc_load_microcode(adev);
4029                         if (r)
4030                                 return r;
4031                 } else {
4032                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4033                                                         AMDGPU_UCODE_ID_RLC_G);
4034                         if (r)
4035                                 return -EINVAL;
4036                 }
4037         }
4038
4039         gfx_v8_0_rlc_start(adev);
4040
4041         return 0;
4042 }
4043
4044 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4045 {
4046         int i;
4047         u32 tmp = RREG32(mmCP_ME_CNTL);
4048
4049         if (enable) {
4050                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4051                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4052                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4053         } else {
4054                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4055                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4056                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4057                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4058                         adev->gfx.gfx_ring[i].ready = false;
4059         }
4060         WREG32(mmCP_ME_CNTL, tmp);
4061         udelay(50);
4062 }
4063
4064 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4065 {
4066         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4067         const struct gfx_firmware_header_v1_0 *ce_hdr;
4068         const struct gfx_firmware_header_v1_0 *me_hdr;
4069         const __le32 *fw_data;
4070         unsigned i, fw_size;
4071
4072         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4073                 return -EINVAL;
4074
4075         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4076                 adev->gfx.pfp_fw->data;
4077         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4078                 adev->gfx.ce_fw->data;
4079         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4080                 adev->gfx.me_fw->data;
4081
4082         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4083         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4084         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4085
4086         gfx_v8_0_cp_gfx_enable(adev, false);
4087
4088         /* PFP */
4089         fw_data = (const __le32 *)
4090                 (adev->gfx.pfp_fw->data +
4091                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4092         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4093         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4094         for (i = 0; i < fw_size; i++)
4095                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4096         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4097
4098         /* CE */
4099         fw_data = (const __le32 *)
4100                 (adev->gfx.ce_fw->data +
4101                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4102         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4103         WREG32(mmCP_CE_UCODE_ADDR, 0);
4104         for (i = 0; i < fw_size; i++)
4105                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4106         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4107
4108         /* ME */
4109         fw_data = (const __le32 *)
4110                 (adev->gfx.me_fw->data +
4111                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4112         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4113         WREG32(mmCP_ME_RAM_WADDR, 0);
4114         for (i = 0; i < fw_size; i++)
4115                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4116         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4117
4118         return 0;
4119 }
4120
4121 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4122 {
4123         u32 count = 0;
4124         const struct cs_section_def *sect = NULL;
4125         const struct cs_extent_def *ext = NULL;
4126
4127         /* begin clear state */
4128         count += 2;
4129         /* context control state */
4130         count += 3;
4131
4132         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4133                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4134                         if (sect->id == SECT_CONTEXT)
4135                                 count += 2 + ext->reg_count;
4136                         else
4137                                 return 0;
4138                 }
4139         }
4140         /* pa_sc_raster_config/pa_sc_raster_config1 */
4141         count += 4;
4142         /* end clear state */
4143         count += 2;
4144         /* clear state */
4145         count += 2;
4146
4147         return count;
4148 }
4149
4150 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4151 {
4152         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4153         const struct cs_section_def *sect = NULL;
4154         const struct cs_extent_def *ext = NULL;
4155         int r, i;
4156
4157         /* init the CP */
4158         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4159         WREG32(mmCP_ENDIAN_SWAP, 0);
4160         WREG32(mmCP_DEVICE_ID, 1);
4161
4162         gfx_v8_0_cp_gfx_enable(adev, true);
4163
4164         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4165         if (r) {
4166                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4167                 return r;
4168         }
4169
4170         /* clear state buffer */
4171         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4172         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4173
4174         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4175         amdgpu_ring_write(ring, 0x80000000);
4176         amdgpu_ring_write(ring, 0x80000000);
4177
4178         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4179                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4180                         if (sect->id == SECT_CONTEXT) {
4181                                 amdgpu_ring_write(ring,
4182                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4183                                                ext->reg_count));
4184                                 amdgpu_ring_write(ring,
4185                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4186                                 for (i = 0; i < ext->reg_count; i++)
4187                                         amdgpu_ring_write(ring, ext->extent[i]);
4188                         }
4189                 }
4190         }
4191
4192         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4193         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4194         switch (adev->asic_type) {
4195         case CHIP_TONGA:
4196         case CHIP_POLARIS10:
4197                 amdgpu_ring_write(ring, 0x16000012);
4198                 amdgpu_ring_write(ring, 0x0000002A);
4199                 break;
4200         case CHIP_POLARIS11:
4201         case CHIP_POLARIS12:
4202                 amdgpu_ring_write(ring, 0x16000012);
4203                 amdgpu_ring_write(ring, 0x00000000);
4204                 break;
4205         case CHIP_FIJI:
4206                 amdgpu_ring_write(ring, 0x3a00161a);
4207                 amdgpu_ring_write(ring, 0x0000002e);
4208                 break;
4209         case CHIP_CARRIZO:
4210                 amdgpu_ring_write(ring, 0x00000002);
4211                 amdgpu_ring_write(ring, 0x00000000);
4212                 break;
4213         case CHIP_TOPAZ:
4214                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4215                                 0x00000000 : 0x00000002);
4216                 amdgpu_ring_write(ring, 0x00000000);
4217                 break;
4218         case CHIP_STONEY:
4219                 amdgpu_ring_write(ring, 0x00000000);
4220                 amdgpu_ring_write(ring, 0x00000000);
4221                 break;
4222         default:
4223                 BUG();
4224         }
4225
4226         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4227         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4228
4229         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4230         amdgpu_ring_write(ring, 0);
4231
4232         /* init the CE partitions */
4233         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4234         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4235         amdgpu_ring_write(ring, 0x8000);
4236         amdgpu_ring_write(ring, 0x8000);
4237
4238         amdgpu_ring_commit(ring);
4239
4240         return 0;
4241 }
4242 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4243 {
4244         u32 tmp;
4245         /* no gfx doorbells on iceland */
4246         if (adev->asic_type == CHIP_TOPAZ)
4247                 return;
4248
4249         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4250
4251         if (ring->use_doorbell) {
4252                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4253                                 DOORBELL_OFFSET, ring->doorbell_index);
4254                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4255                                                 DOORBELL_HIT, 0);
4256                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4257                                             DOORBELL_EN, 1);
4258         } else {
4259                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4260         }
4261
4262         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4263
4264         if (adev->flags & AMD_IS_APU)
4265                 return;
4266
4267         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4268                                         DOORBELL_RANGE_LOWER,
4269                                         AMDGPU_DOORBELL_GFX_RING0);
4270         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4271
4272         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4273                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4274 }
4275
4276 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4277 {
4278         struct amdgpu_ring *ring;
4279         u32 tmp;
4280         u32 rb_bufsz;
4281         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4282         int r;
4283
4284         /* Set the write pointer delay */
4285         WREG32(mmCP_RB_WPTR_DELAY, 0);
4286
4287         /* set the RB to use vmid 0 */
4288         WREG32(mmCP_RB_VMID, 0);
4289
4290         /* Set ring buffer size */
4291         ring = &adev->gfx.gfx_ring[0];
4292         rb_bufsz = order_base_2(ring->ring_size / 8);
4293         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4294         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4295         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4296         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4297 #ifdef __BIG_ENDIAN
4298         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4299 #endif
4300         WREG32(mmCP_RB0_CNTL, tmp);
4301
4302         /* Initialize the ring buffer's read and write pointers */
4303         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4304         ring->wptr = 0;
4305         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4306
4307         /* set the wb address wether it's enabled or not */
4308         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4309         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4310         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4311
4312         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4313         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4314         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4315         mdelay(1);
4316         WREG32(mmCP_RB0_CNTL, tmp);
4317
4318         rb_addr = ring->gpu_addr >> 8;
4319         WREG32(mmCP_RB0_BASE, rb_addr);
4320         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4321
4322         gfx_v8_0_set_cpg_door_bell(adev, ring);
4323         /* start the ring */
4324         amdgpu_ring_clear_ring(ring);
4325         gfx_v8_0_cp_gfx_start(adev);
4326         ring->ready = true;
4327         r = amdgpu_ring_test_ring(ring);
4328         if (r)
4329                 ring->ready = false;
4330
4331         return r;
4332 }
4333
4334 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4335 {
4336         int i;
4337
4338         if (enable) {
4339                 WREG32(mmCP_MEC_CNTL, 0);
4340         } else {
4341                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4342                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4343                         adev->gfx.compute_ring[i].ready = false;
4344                 adev->gfx.kiq.ring.ready = false;
4345         }
4346         udelay(50);
4347 }
4348
4349 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4350 {
4351         const struct gfx_firmware_header_v1_0 *mec_hdr;
4352         const __le32 *fw_data;
4353         unsigned i, fw_size;
4354
4355         if (!adev->gfx.mec_fw)
4356                 return -EINVAL;
4357
4358         gfx_v8_0_cp_compute_enable(adev, false);
4359
4360         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4361         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4362
4363         fw_data = (const __le32 *)
4364                 (adev->gfx.mec_fw->data +
4365                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4366         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4367
4368         /* MEC1 */
4369         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4370         for (i = 0; i < fw_size; i++)
4371                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4372         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4373
4374         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4375         if (adev->gfx.mec2_fw) {
4376                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4377
4378                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4379                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4380
4381                 fw_data = (const __le32 *)
4382                         (adev->gfx.mec2_fw->data +
4383                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4384                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4385
4386                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4387                 for (i = 0; i < fw_size; i++)
4388                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4389                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4390         }
4391
4392         return 0;
4393 }
4394
4395 /* KIQ functions */
4396 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4397 {
4398         uint32_t tmp;
4399         struct amdgpu_device *adev = ring->adev;
4400
4401         /* tell RLC which is KIQ queue */
4402         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4403         tmp &= 0xffffff00;
4404         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4405         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4406         tmp |= 0x80;
4407         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4408 }
4409
4410 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4411 {
4412         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4413         uint32_t scratch, tmp = 0;
4414         uint64_t queue_mask = 0;
4415         int r, i;
4416
4417         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4418                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4419                         continue;
4420
4421                 /* This situation may be hit in the future if a new HW
4422                  * generation exposes more than 64 queues. If so, the
4423                  * definition of queue_mask needs updating */
4424                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4425                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4426                         break;
4427                 }
4428
4429                 queue_mask |= (1ull << i);
4430         }
4431
4432         r = amdgpu_gfx_scratch_get(adev, &scratch);
4433         if (r) {
4434                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4435                 return r;
4436         }
4437         WREG32(scratch, 0xCAFEDEAD);
4438
4439         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4440         if (r) {
4441                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4442                 amdgpu_gfx_scratch_free(adev, scratch);
4443                 return r;
4444         }
4445         /* set resources */
4446         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4447         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4448         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4449         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4450         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4451         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4452         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4453         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4454         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4455                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4456                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4457                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4458
4459                 /* map queues */
4460                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4461                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4462                 amdgpu_ring_write(kiq_ring,
4463                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4464                 amdgpu_ring_write(kiq_ring,
4465                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4466                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4467                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4468                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4469                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4470                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4471                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4472                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4473         }
4474         /* write to scratch for completion */
4475         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4476         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4477         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4478         amdgpu_ring_commit(kiq_ring);
4479
4480         for (i = 0; i < adev->usec_timeout; i++) {
4481                 tmp = RREG32(scratch);
4482                 if (tmp == 0xDEADBEEF)
4483                         break;
4484                 DRM_UDELAY(1);
4485         }
4486         if (i >= adev->usec_timeout) {
4487                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4488                           scratch, tmp);
4489                 r = -EINVAL;
4490         }
4491         amdgpu_gfx_scratch_free(adev, scratch);
4492
4493         return r;
4494 }
4495
4496 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4497 {
4498         int i, r = 0;
4499
4500         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4501                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4502                 for (i = 0; i < adev->usec_timeout; i++) {
4503                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4504                                 break;
4505                         udelay(1);
4506                 }
4507                 if (i == adev->usec_timeout)
4508                         r = -ETIMEDOUT;
4509         }
4510         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4511         WREG32(mmCP_HQD_PQ_RPTR, 0);
4512         WREG32(mmCP_HQD_PQ_WPTR, 0);
4513
4514         return r;
4515 }
4516
4517 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4518 {
4519         struct amdgpu_device *adev = ring->adev;
4520         struct vi_mqd *mqd = ring->mqd_ptr;
4521         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4522         uint32_t tmp;
4523
4524         mqd->header = 0xC0310800;
4525         mqd->compute_pipelinestat_enable = 0x00000001;
4526         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4527         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4528         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4529         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4530         mqd->compute_misc_reserved = 0x00000003;
4531         if (!(adev->flags & AMD_IS_APU)) {
4532                 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4533                                              + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4534                 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4535                                              + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4536         }
4537         eop_base_addr = ring->eop_gpu_addr >> 8;
4538         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4539         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4540
4541         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4542         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4543         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4544                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4545
4546         mqd->cp_hqd_eop_control = tmp;
4547
4548         /* enable doorbell? */
4549         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4550                             CP_HQD_PQ_DOORBELL_CONTROL,
4551                             DOORBELL_EN,
4552                             ring->use_doorbell ? 1 : 0);
4553
4554         mqd->cp_hqd_pq_doorbell_control = tmp;
4555
4556         /* set the pointer to the MQD */
4557         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4558         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4559
4560         /* set MQD vmid to 0 */
4561         tmp = RREG32(mmCP_MQD_CONTROL);
4562         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4563         mqd->cp_mqd_control = tmp;
4564
4565         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4566         hqd_gpu_addr = ring->gpu_addr >> 8;
4567         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4568         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4569
4570         /* set up the HQD, this is similar to CP_RB0_CNTL */
4571         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4572         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4573                             (order_base_2(ring->ring_size / 4) - 1));
4574         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4575                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4576 #ifdef __BIG_ENDIAN
4577         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4578 #endif
4579         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4580         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4581         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4582         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4583         mqd->cp_hqd_pq_control = tmp;
4584
4585         /* set the wb address whether it's enabled or not */
4586         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4587         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4588         mqd->cp_hqd_pq_rptr_report_addr_hi =
4589                 upper_32_bits(wb_gpu_addr) & 0xffff;
4590
4591         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4592         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4593         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4594         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4595
4596         tmp = 0;
4597         /* enable the doorbell if requested */
4598         if (ring->use_doorbell) {
4599                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4600                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4601                                 DOORBELL_OFFSET, ring->doorbell_index);
4602
4603                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4604                                          DOORBELL_EN, 1);
4605                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4606                                          DOORBELL_SOURCE, 0);
4607                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4608                                          DOORBELL_HIT, 0);
4609         }
4610
4611         mqd->cp_hqd_pq_doorbell_control = tmp;
4612
4613         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4614         ring->wptr = 0;
4615         mqd->cp_hqd_pq_wptr = ring->wptr;
4616         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4617
4618         /* set the vmid for the queue */
4619         mqd->cp_hqd_vmid = 0;
4620
4621         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4622         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4623         mqd->cp_hqd_persistent_state = tmp;
4624
4625         /* set MTYPE */
4626         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4627         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4628         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4629         mqd->cp_hqd_ib_control = tmp;
4630
4631         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4632         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4633         mqd->cp_hqd_iq_timer = tmp;
4634
4635         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4636         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4637         mqd->cp_hqd_ctx_save_control = tmp;
4638
4639         /* defaults */
4640         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4641         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4642         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4643         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4644         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4645         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4646         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4647         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4648         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4649         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4650         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4651         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4652         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4653         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4654         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4655
4656         /* activate the queue */
4657         mqd->cp_hqd_active = 1;
4658
4659         return 0;
4660 }
4661
4662 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4663                         struct vi_mqd *mqd)
4664 {
4665         uint32_t mqd_reg;
4666         uint32_t *mqd_data;
4667
4668         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4669         mqd_data = &mqd->cp_mqd_base_addr_lo;
4670
4671         /* disable wptr polling */
4672         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4673
4674         /* program all HQD registers */
4675         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4676                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4677
4678         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4679          * This is safe since EOP RPTR==WPTR for any inactive HQD
4680          * on ASICs that do not support context-save.
4681          * EOP writes/reads can start anywhere in the ring.
4682          */
4683         if (adev->asic_type != CHIP_TONGA) {
4684                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4685                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4686                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4687         }
4688
4689         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4690                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4691
4692         /* activate the HQD */
4693         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4694                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4695
4696         return 0;
4697 }
4698
4699 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4700 {
4701         struct amdgpu_device *adev = ring->adev;
4702         struct vi_mqd *mqd = ring->mqd_ptr;
4703         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4704
4705         gfx_v8_0_kiq_setting(ring);
4706
4707         if (adev->gfx.in_reset) { /* for GPU_RESET case */
4708                 /* reset MQD to a clean status */
4709                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4710                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4711
4712                 /* reset ring buffer */
4713                 ring->wptr = 0;
4714                 amdgpu_ring_clear_ring(ring);
4715                 mutex_lock(&adev->srbm_mutex);
4716                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4717                 gfx_v8_0_mqd_commit(adev, mqd);
4718                 vi_srbm_select(adev, 0, 0, 0, 0);
4719                 mutex_unlock(&adev->srbm_mutex);
4720         } else {
4721                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4722                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4723                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4724                 mutex_lock(&adev->srbm_mutex);
4725                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4726                 gfx_v8_0_mqd_init(ring);
4727                 gfx_v8_0_mqd_commit(adev, mqd);
4728                 vi_srbm_select(adev, 0, 0, 0, 0);
4729                 mutex_unlock(&adev->srbm_mutex);
4730
4731                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4732                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4733         }
4734
4735         return 0;
4736 }
4737
4738 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4739 {
4740         struct amdgpu_device *adev = ring->adev;
4741         struct vi_mqd *mqd = ring->mqd_ptr;
4742         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4743
4744         if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
4745                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4746                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4747                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4748                 mutex_lock(&adev->srbm_mutex);
4749                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4750                 gfx_v8_0_mqd_init(ring);
4751                 vi_srbm_select(adev, 0, 0, 0, 0);
4752                 mutex_unlock(&adev->srbm_mutex);
4753
4754                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4755                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4756         } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4757                 /* reset MQD to a clean status */
4758                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4759                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4760                 /* reset ring buffer */
4761                 ring->wptr = 0;
4762                 amdgpu_ring_clear_ring(ring);
4763         } else {
4764                 amdgpu_ring_clear_ring(ring);
4765         }
4766         return 0;
4767 }
4768
4769 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4770 {
4771         if (adev->asic_type > CHIP_TONGA) {
4772                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4773                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4774         }
4775         /* enable doorbells */
4776         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4777 }
4778
4779 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4780 {
4781         struct amdgpu_ring *ring = NULL;
4782         int r = 0, i;
4783
4784         gfx_v8_0_cp_compute_enable(adev, true);
4785
4786         ring = &adev->gfx.kiq.ring;
4787
4788         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4789         if (unlikely(r != 0))
4790                 goto done;
4791
4792         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4793         if (!r) {
4794                 r = gfx_v8_0_kiq_init_queue(ring);
4795                 amdgpu_bo_kunmap(ring->mqd_obj);
4796                 ring->mqd_ptr = NULL;
4797         }
4798         amdgpu_bo_unreserve(ring->mqd_obj);
4799         if (r)
4800                 goto done;
4801
4802         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4803                 ring = &adev->gfx.compute_ring[i];
4804
4805                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4806                 if (unlikely(r != 0))
4807                         goto done;
4808                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4809                 if (!r) {
4810                         r = gfx_v8_0_kcq_init_queue(ring);
4811                         amdgpu_bo_kunmap(ring->mqd_obj);
4812                         ring->mqd_ptr = NULL;
4813                 }
4814                 amdgpu_bo_unreserve(ring->mqd_obj);
4815                 if (r)
4816                         goto done;
4817         }
4818
4819         gfx_v8_0_set_mec_doorbell_range(adev);
4820
4821         r = gfx_v8_0_kiq_kcq_enable(adev);
4822         if (r)
4823                 goto done;
4824
4825         /* Test KIQ */
4826         ring = &adev->gfx.kiq.ring;
4827         ring->ready = true;
4828         r = amdgpu_ring_test_ring(ring);
4829         if (r) {
4830                 ring->ready = false;
4831                 goto done;
4832         }
4833
4834         /* Test KCQs */
4835         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4836                 ring = &adev->gfx.compute_ring[i];
4837                 ring->ready = true;
4838                 r = amdgpu_ring_test_ring(ring);
4839                 if (r)
4840                         ring->ready = false;
4841         }
4842
4843 done:
4844         return r;
4845 }
4846
4847 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4848 {
4849         int r;
4850
4851         if (!(adev->flags & AMD_IS_APU))
4852                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4853
4854         if (!adev->pp_enabled) {
4855                 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4856                         /* legacy firmware loading */
4857                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4858                         if (r)
4859                                 return r;
4860
4861                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4862                         if (r)
4863                                 return r;
4864                 } else {
4865                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4866                                                         AMDGPU_UCODE_ID_CP_CE);
4867                         if (r)
4868                                 return -EINVAL;
4869
4870                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4871                                                         AMDGPU_UCODE_ID_CP_PFP);
4872                         if (r)
4873                                 return -EINVAL;
4874
4875                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4876                                                         AMDGPU_UCODE_ID_CP_ME);
4877                         if (r)
4878                                 return -EINVAL;
4879
4880                         if (adev->asic_type == CHIP_TOPAZ) {
4881                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4882                                 if (r)
4883                                         return r;
4884                         } else {
4885                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4886                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4887                                 if (r)
4888                                         return -EINVAL;
4889                         }
4890                 }
4891         }
4892
4893         r = gfx_v8_0_cp_gfx_resume(adev);
4894         if (r)
4895                 return r;
4896
4897         r = gfx_v8_0_kiq_resume(adev);
4898         if (r)
4899                 return r;
4900
4901         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4902
4903         return 0;
4904 }
4905
4906 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4907 {
4908         gfx_v8_0_cp_gfx_enable(adev, enable);
4909         gfx_v8_0_cp_compute_enable(adev, enable);
4910 }
4911
4912 static int gfx_v8_0_hw_init(void *handle)
4913 {
4914         int r;
4915         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4916
4917         gfx_v8_0_init_golden_registers(adev);
4918         gfx_v8_0_gpu_init(adev);
4919
4920         r = gfx_v8_0_rlc_resume(adev);
4921         if (r)
4922                 return r;
4923
4924         r = gfx_v8_0_cp_resume(adev);
4925
4926         return r;
4927 }
4928
4929 static int gfx_v8_0_hw_fini(void *handle)
4930 {
4931         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4932
4933         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4934         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4935         if (amdgpu_sriov_vf(adev)) {
4936                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4937                 return 0;
4938         }
4939         gfx_v8_0_cp_enable(adev, false);
4940         gfx_v8_0_rlc_stop(adev);
4941
4942         amdgpu_set_powergating_state(adev,
4943                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4944
4945         return 0;
4946 }
4947
4948 static int gfx_v8_0_suspend(void *handle)
4949 {
4950         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4951         adev->gfx.in_suspend = true;
4952         return gfx_v8_0_hw_fini(adev);
4953 }
4954
4955 static int gfx_v8_0_resume(void *handle)
4956 {
4957         int r;
4958         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4959
4960         r = gfx_v8_0_hw_init(adev);
4961         adev->gfx.in_suspend = false;
4962         return r;
4963 }
4964
4965 static bool gfx_v8_0_is_idle(void *handle)
4966 {
4967         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4968
4969         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4970                 return false;
4971         else
4972                 return true;
4973 }
4974
4975 static int gfx_v8_0_wait_for_idle(void *handle)
4976 {
4977         unsigned i;
4978         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4979
4980         for (i = 0; i < adev->usec_timeout; i++) {
4981                 if (gfx_v8_0_is_idle(handle))
4982                         return 0;
4983
4984                 udelay(1);
4985         }
4986         return -ETIMEDOUT;
4987 }
4988
4989 static bool gfx_v8_0_check_soft_reset(void *handle)
4990 {
4991         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4992         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4993         u32 tmp;
4994
4995         /* GRBM_STATUS */
4996         tmp = RREG32(mmGRBM_STATUS);
4997         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4998                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4999                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5000                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5001                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5002                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5003                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5004                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5005                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5006                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5007                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5008                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5009                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5010         }
5011
5012         /* GRBM_STATUS2 */
5013         tmp = RREG32(mmGRBM_STATUS2);
5014         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5015                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5016                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5017
5018         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5019             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5020             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5021                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5022                                                 SOFT_RESET_CPF, 1);
5023                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5024                                                 SOFT_RESET_CPC, 1);
5025                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5026                                                 SOFT_RESET_CPG, 1);
5027                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5028                                                 SOFT_RESET_GRBM, 1);
5029         }
5030
5031         /* SRBM_STATUS */
5032         tmp = RREG32(mmSRBM_STATUS);
5033         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5034                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5035                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5036         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5037                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5038                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5039
5040         if (grbm_soft_reset || srbm_soft_reset) {
5041                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5042                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5043                 return true;
5044         } else {
5045                 adev->gfx.grbm_soft_reset = 0;
5046                 adev->gfx.srbm_soft_reset = 0;
5047                 return false;
5048         }
5049 }
5050
5051 static int gfx_v8_0_pre_soft_reset(void *handle)
5052 {
5053         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5054         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5055
5056         if ((!adev->gfx.grbm_soft_reset) &&
5057             (!adev->gfx.srbm_soft_reset))
5058                 return 0;
5059
5060         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5061         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5062
5063         /* stop the rlc */
5064         gfx_v8_0_rlc_stop(adev);
5065
5066         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5067             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5068                 /* Disable GFX parsing/prefetching */
5069                 gfx_v8_0_cp_gfx_enable(adev, false);
5070
5071         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5072             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5073             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5074             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5075                 int i;
5076
5077                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5078                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5079
5080                         mutex_lock(&adev->srbm_mutex);
5081                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5082                         gfx_v8_0_deactivate_hqd(adev, 2);
5083                         vi_srbm_select(adev, 0, 0, 0, 0);
5084                         mutex_unlock(&adev->srbm_mutex);
5085                 }
5086                 /* Disable MEC parsing/prefetching */
5087                 gfx_v8_0_cp_compute_enable(adev, false);
5088         }
5089
5090        return 0;
5091 }
5092
5093 static int gfx_v8_0_soft_reset(void *handle)
5094 {
5095         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5096         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5097         u32 tmp;
5098
5099         if ((!adev->gfx.grbm_soft_reset) &&
5100             (!adev->gfx.srbm_soft_reset))
5101                 return 0;
5102
5103         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5104         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5105
5106         if (grbm_soft_reset || srbm_soft_reset) {
5107                 tmp = RREG32(mmGMCON_DEBUG);
5108                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5109                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5110                 WREG32(mmGMCON_DEBUG, tmp);
5111                 udelay(50);
5112         }
5113
5114         if (grbm_soft_reset) {
5115                 tmp = RREG32(mmGRBM_SOFT_RESET);
5116                 tmp |= grbm_soft_reset;
5117                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5118                 WREG32(mmGRBM_SOFT_RESET, tmp);
5119                 tmp = RREG32(mmGRBM_SOFT_RESET);
5120
5121                 udelay(50);
5122
5123                 tmp &= ~grbm_soft_reset;
5124                 WREG32(mmGRBM_SOFT_RESET, tmp);
5125                 tmp = RREG32(mmGRBM_SOFT_RESET);
5126         }
5127
5128         if (srbm_soft_reset) {
5129                 tmp = RREG32(mmSRBM_SOFT_RESET);
5130                 tmp |= srbm_soft_reset;
5131                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5132                 WREG32(mmSRBM_SOFT_RESET, tmp);
5133                 tmp = RREG32(mmSRBM_SOFT_RESET);
5134
5135                 udelay(50);
5136
5137                 tmp &= ~srbm_soft_reset;
5138                 WREG32(mmSRBM_SOFT_RESET, tmp);
5139                 tmp = RREG32(mmSRBM_SOFT_RESET);
5140         }
5141
5142         if (grbm_soft_reset || srbm_soft_reset) {
5143                 tmp = RREG32(mmGMCON_DEBUG);
5144                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5145                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5146                 WREG32(mmGMCON_DEBUG, tmp);
5147         }
5148
5149         /* Wait a little for things to settle down */
5150         udelay(50);
5151
5152         return 0;
5153 }
5154
5155 static int gfx_v8_0_post_soft_reset(void *handle)
5156 {
5157         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5158         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5159
5160         if ((!adev->gfx.grbm_soft_reset) &&
5161             (!adev->gfx.srbm_soft_reset))
5162                 return 0;
5163
5164         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5165         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5166
5167         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5168             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5169                 gfx_v8_0_cp_gfx_resume(adev);
5170
5171         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5172             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5173             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5174             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5175                 int i;
5176
5177                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5178                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5179
5180                         mutex_lock(&adev->srbm_mutex);
5181                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5182                         gfx_v8_0_deactivate_hqd(adev, 2);
5183                         vi_srbm_select(adev, 0, 0, 0, 0);
5184                         mutex_unlock(&adev->srbm_mutex);
5185                 }
5186                 gfx_v8_0_kiq_resume(adev);
5187         }
5188         gfx_v8_0_rlc_start(adev);
5189
5190         return 0;
5191 }
5192
5193 /**
5194  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5195  *
5196  * @adev: amdgpu_device pointer
5197  *
5198  * Fetches a GPU clock counter snapshot.
5199  * Returns the 64 bit clock counter snapshot.
5200  */
5201 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5202 {
5203         uint64_t clock;
5204
5205         mutex_lock(&adev->gfx.gpu_clock_mutex);
5206         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5207         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5208                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5209         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5210         return clock;
5211 }
5212
5213 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5214                                           uint32_t vmid,
5215                                           uint32_t gds_base, uint32_t gds_size,
5216                                           uint32_t gws_base, uint32_t gws_size,
5217                                           uint32_t oa_base, uint32_t oa_size)
5218 {
5219         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5220         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5221
5222         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5223         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5224
5225         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5226         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5227
5228         /* GDS Base */
5229         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5230         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5231                                 WRITE_DATA_DST_SEL(0)));
5232         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5233         amdgpu_ring_write(ring, 0);
5234         amdgpu_ring_write(ring, gds_base);
5235
5236         /* GDS Size */
5237         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5238         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5239                                 WRITE_DATA_DST_SEL(0)));
5240         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5241         amdgpu_ring_write(ring, 0);
5242         amdgpu_ring_write(ring, gds_size);
5243
5244         /* GWS */
5245         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5246         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5247                                 WRITE_DATA_DST_SEL(0)));
5248         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5249         amdgpu_ring_write(ring, 0);
5250         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5251
5252         /* OA */
5253         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5254         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5255                                 WRITE_DATA_DST_SEL(0)));
5256         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5257         amdgpu_ring_write(ring, 0);
5258         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5259 }
5260
5261 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5262 {
5263         WREG32(mmSQ_IND_INDEX,
5264                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5265                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5266                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5267                 (SQ_IND_INDEX__FORCE_READ_MASK));
5268         return RREG32(mmSQ_IND_DATA);
5269 }
5270
5271 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5272                            uint32_t wave, uint32_t thread,
5273                            uint32_t regno, uint32_t num, uint32_t *out)
5274 {
5275         WREG32(mmSQ_IND_INDEX,
5276                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5277                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5278                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5279                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5280                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5281                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5282         while (num--)
5283                 *(out++) = RREG32(mmSQ_IND_DATA);
5284 }
5285
5286 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5287 {
5288         /* type 0 wave data */
5289         dst[(*no_fields)++] = 0;
5290         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5291         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5292         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5293         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5294         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5295         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5296         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5297         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5298         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5299         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5300         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5301         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5302         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5303         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5304         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5305         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5306         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5307         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5308 }
5309
5310 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5311                                      uint32_t wave, uint32_t start,
5312                                      uint32_t size, uint32_t *dst)
5313 {
5314         wave_read_regs(
5315                 adev, simd, wave, 0,
5316                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5317 }
5318
5319
5320 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5321         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5322         .select_se_sh = &gfx_v8_0_select_se_sh,
5323         .read_wave_data = &gfx_v8_0_read_wave_data,
5324         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5325 };
5326
5327 static int gfx_v8_0_early_init(void *handle)
5328 {
5329         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5330
5331         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5332         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5333         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5334         gfx_v8_0_set_ring_funcs(adev);
5335         gfx_v8_0_set_irq_funcs(adev);
5336         gfx_v8_0_set_gds_init(adev);
5337         gfx_v8_0_set_rlc_funcs(adev);
5338
5339         return 0;
5340 }
5341
5342 static int gfx_v8_0_late_init(void *handle)
5343 {
5344         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5345         int r;
5346
5347         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5348         if (r)
5349                 return r;
5350
5351         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5352         if (r)
5353                 return r;
5354
5355         /* requires IBs so do in late init after IB pool is initialized */
5356         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5357         if (r)
5358                 return r;
5359
5360         amdgpu_set_powergating_state(adev,
5361                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5362
5363         return 0;
5364 }
5365
5366 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5367                                                        bool enable)
5368 {
5369         if ((adev->asic_type == CHIP_POLARIS11) ||
5370             (adev->asic_type == CHIP_POLARIS12))
5371                 /* Send msg to SMU via Powerplay */
5372                 amdgpu_set_powergating_state(adev,
5373                                              AMD_IP_BLOCK_TYPE_SMC,
5374                                              enable ?
5375                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5376
5377         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5378 }
5379
5380 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5381                                                         bool enable)
5382 {
5383         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5384 }
5385
5386 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5387                 bool enable)
5388 {
5389         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5390 }
5391
5392 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5393                                           bool enable)
5394 {
5395         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5396 }
5397
5398 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5399                                                 bool enable)
5400 {
5401         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5402
5403         /* Read any GFX register to wake up GFX. */
5404         if (!enable)
5405                 RREG32(mmDB_RENDER_CONTROL);
5406 }
5407
5408 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5409                                           bool enable)
5410 {
5411         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5412                 cz_enable_gfx_cg_power_gating(adev, true);
5413                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5414                         cz_enable_gfx_pipeline_power_gating(adev, true);
5415         } else {
5416                 cz_enable_gfx_cg_power_gating(adev, false);
5417                 cz_enable_gfx_pipeline_power_gating(adev, false);
5418         }
5419 }
5420
5421 static int gfx_v8_0_set_powergating_state(void *handle,
5422                                           enum amd_powergating_state state)
5423 {
5424         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5425         bool enable = (state == AMD_PG_STATE_GATE);
5426
5427         if (amdgpu_sriov_vf(adev))
5428                 return 0;
5429
5430         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5431                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5432                                 AMD_PG_SUPPORT_CP |
5433                                 AMD_PG_SUPPORT_GFX_DMG))
5434                 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5435         switch (adev->asic_type) {
5436         case CHIP_CARRIZO:
5437         case CHIP_STONEY:
5438
5439                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5440                         cz_enable_sck_slow_down_on_power_up(adev, true);
5441                         cz_enable_sck_slow_down_on_power_down(adev, true);
5442                 } else {
5443                         cz_enable_sck_slow_down_on_power_up(adev, false);
5444                         cz_enable_sck_slow_down_on_power_down(adev, false);
5445                 }
5446                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5447                         cz_enable_cp_power_gating(adev, true);
5448                 else
5449                         cz_enable_cp_power_gating(adev, false);
5450
5451                 cz_update_gfx_cg_power_gating(adev, enable);
5452
5453                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5454                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5455                 else
5456                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5457
5458                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5459                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5460                 else
5461                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5462                 break;
5463         case CHIP_POLARIS11:
5464         case CHIP_POLARIS12:
5465                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5466                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5467                 else
5468                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5469
5470                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5471                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5472                 else
5473                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5474
5475                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5476                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5477                 else
5478                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5479                 break;
5480         default:
5481                 break;
5482         }
5483         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5484                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5485                                 AMD_PG_SUPPORT_CP |
5486                                 AMD_PG_SUPPORT_GFX_DMG))
5487                 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5488         return 0;
5489 }
5490
5491 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5492 {
5493         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5494         int data;
5495
5496         if (amdgpu_sriov_vf(adev))
5497                 *flags = 0;
5498
5499         /* AMD_CG_SUPPORT_GFX_MGCG */
5500         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5501         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5502                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5503
5504         /* AMD_CG_SUPPORT_GFX_CGLG */
5505         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5506         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5507                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5508
5509         /* AMD_CG_SUPPORT_GFX_CGLS */
5510         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5511                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5512
5513         /* AMD_CG_SUPPORT_GFX_CGTS */
5514         data = RREG32(mmCGTS_SM_CTRL_REG);
5515         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5516                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5517
5518         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5519         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5520                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5521
5522         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5523         data = RREG32(mmRLC_MEM_SLP_CNTL);
5524         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5525                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5526
5527         /* AMD_CG_SUPPORT_GFX_CP_LS */
5528         data = RREG32(mmCP_MEM_SLP_CNTL);
5529         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5530                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5531 }
5532
5533 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5534                                      uint32_t reg_addr, uint32_t cmd)
5535 {
5536         uint32_t data;
5537
5538         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5539
5540         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5541         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5542
5543         data = RREG32(mmRLC_SERDES_WR_CTRL);
5544         if (adev->asic_type == CHIP_STONEY)
5545                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5546                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5547                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5548                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5549                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5550                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5551                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5552                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5553                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5554         else
5555                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5556                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5557                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5558                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5559                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5560                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5561                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5562                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5563                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5564                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5565                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5566         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5567                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5568                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5569                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5570
5571         WREG32(mmRLC_SERDES_WR_CTRL, data);
5572 }
5573
5574 #define MSG_ENTER_RLC_SAFE_MODE     1
5575 #define MSG_EXIT_RLC_SAFE_MODE      0
5576 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5577 #define RLC_GPR_REG2__REQ__SHIFT 0
5578 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5579 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5580
5581 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5582 {
5583         u32 data;
5584         unsigned i;
5585
5586         data = RREG32(mmRLC_CNTL);
5587         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5588                 return;
5589
5590         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5591                 data |= RLC_SAFE_MODE__CMD_MASK;
5592                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5593                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5594                 WREG32(mmRLC_SAFE_MODE, data);
5595
5596                 for (i = 0; i < adev->usec_timeout; i++) {
5597                         if ((RREG32(mmRLC_GPM_STAT) &
5598                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5599                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5600                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5601                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5602                                 break;
5603                         udelay(1);
5604                 }
5605
5606                 for (i = 0; i < adev->usec_timeout; i++) {
5607                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5608                                 break;
5609                         udelay(1);
5610                 }
5611                 adev->gfx.rlc.in_safe_mode = true;
5612         }
5613 }
5614
5615 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5616 {
5617         u32 data = 0;
5618         unsigned i;
5619
5620         data = RREG32(mmRLC_CNTL);
5621         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5622                 return;
5623
5624         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5625                 if (adev->gfx.rlc.in_safe_mode) {
5626                         data |= RLC_SAFE_MODE__CMD_MASK;
5627                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5628                         WREG32(mmRLC_SAFE_MODE, data);
5629                         adev->gfx.rlc.in_safe_mode = false;
5630                 }
5631         }
5632
5633         for (i = 0; i < adev->usec_timeout; i++) {
5634                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5635                         break;
5636                 udelay(1);
5637         }
5638 }
5639
5640 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5641         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5642         .exit_safe_mode = iceland_exit_rlc_safe_mode
5643 };
5644
5645 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5646                                                       bool enable)
5647 {
5648         uint32_t temp, data;
5649
5650         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5651
5652         /* It is disabled by HW by default */
5653         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5654                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5655                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5656                                 /* 1 - RLC memory Light sleep */
5657                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5658
5659                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5660                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5661                 }
5662
5663                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5664                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5665                 if (adev->flags & AMD_IS_APU)
5666                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5667                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5668                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5669                 else
5670                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5671                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5672                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5673                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5674
5675                 if (temp != data)
5676                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5677
5678                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5679                 gfx_v8_0_wait_for_rlc_serdes(adev);
5680
5681                 /* 5 - clear mgcg override */
5682                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5683
5684                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5685                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5686                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5687                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5688                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5689                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5690                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5691                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5692                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5693                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5694                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5695                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5696                         if (temp != data)
5697                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5698                 }
5699                 udelay(50);
5700
5701                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5702                 gfx_v8_0_wait_for_rlc_serdes(adev);
5703         } else {
5704                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5705                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5706                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5707                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5708                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5709                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5710                 if (temp != data)
5711                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5712
5713                 /* 2 - disable MGLS in RLC */
5714                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5715                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5716                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5717                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5718                 }
5719
5720                 /* 3 - disable MGLS in CP */
5721                 data = RREG32(mmCP_MEM_SLP_CNTL);
5722                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5723                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5724                         WREG32(mmCP_MEM_SLP_CNTL, data);
5725                 }
5726
5727                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5728                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5729                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5730                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5731                 if (temp != data)
5732                         WREG32(mmCGTS_SM_CTRL_REG, data);
5733
5734                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5735                 gfx_v8_0_wait_for_rlc_serdes(adev);
5736
5737                 /* 6 - set mgcg override */
5738                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5739
5740                 udelay(50);
5741
5742                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5743                 gfx_v8_0_wait_for_rlc_serdes(adev);
5744         }
5745
5746         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5747 }
5748
5749 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5750                                                       bool enable)
5751 {
5752         uint32_t temp, temp1, data, data1;
5753
5754         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5755
5756         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5757
5758         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5759                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5760                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5761                 if (temp1 != data1)
5762                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5763
5764                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5765                 gfx_v8_0_wait_for_rlc_serdes(adev);
5766
5767                 /* 2 - clear cgcg override */
5768                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5769
5770                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5771                 gfx_v8_0_wait_for_rlc_serdes(adev);
5772
5773                 /* 3 - write cmd to set CGLS */
5774                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5775
5776                 /* 4 - enable cgcg */
5777                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5778
5779                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5780                         /* enable cgls*/
5781                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5782
5783                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5784                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5785
5786                         if (temp1 != data1)
5787                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5788                 } else {
5789                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5790                 }
5791
5792                 if (temp != data)
5793                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5794
5795                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5796                  * Cmp_busy/GFX_Idle interrupts
5797                  */
5798                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5799         } else {
5800                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5801                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5802
5803                 /* TEST CGCG */
5804                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5805                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5806                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5807                 if (temp1 != data1)
5808                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5809
5810                 /* read gfx register to wake up cgcg */
5811                 RREG32(mmCB_CGTT_SCLK_CTRL);
5812                 RREG32(mmCB_CGTT_SCLK_CTRL);
5813                 RREG32(mmCB_CGTT_SCLK_CTRL);
5814                 RREG32(mmCB_CGTT_SCLK_CTRL);
5815
5816                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5817                 gfx_v8_0_wait_for_rlc_serdes(adev);
5818
5819                 /* write cmd to Set CGCG Overrride */
5820                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5821
5822                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5823                 gfx_v8_0_wait_for_rlc_serdes(adev);
5824
5825                 /* write cmd to Clear CGLS */
5826                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5827
5828                 /* disable cgcg, cgls should be disabled too. */
5829                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5830                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5831                 if (temp != data)
5832                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5833                 /* enable interrupts again for PG */
5834                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5835         }
5836
5837         gfx_v8_0_wait_for_rlc_serdes(adev);
5838
5839         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5840 }
5841 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5842                                             bool enable)
5843 {
5844         if (enable) {
5845                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5846                  * ===  MGCG + MGLS + TS(CG/LS) ===
5847                  */
5848                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5849                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5850         } else {
5851                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5852                  * ===  CGCG + CGLS ===
5853                  */
5854                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5855                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5856         }
5857         return 0;
5858 }
5859
5860 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5861                                           enum amd_clockgating_state state)
5862 {
5863         uint32_t msg_id, pp_state = 0;
5864         uint32_t pp_support_state = 0;
5865         void *pp_handle = adev->powerplay.pp_handle;
5866
5867         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5868                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5869                         pp_support_state = PP_STATE_SUPPORT_LS;
5870                         pp_state = PP_STATE_LS;
5871                 }
5872                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5873                         pp_support_state |= PP_STATE_SUPPORT_CG;
5874                         pp_state |= PP_STATE_CG;
5875                 }
5876                 if (state == AMD_CG_STATE_UNGATE)
5877                         pp_state = 0;
5878
5879                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5880                                 PP_BLOCK_GFX_CG,
5881                                 pp_support_state,
5882                                 pp_state);
5883                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5884         }
5885
5886         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5887                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5888                         pp_support_state = PP_STATE_SUPPORT_LS;
5889                         pp_state = PP_STATE_LS;
5890                 }
5891
5892                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5893                         pp_support_state |= PP_STATE_SUPPORT_CG;
5894                         pp_state |= PP_STATE_CG;
5895                 }
5896
5897                 if (state == AMD_CG_STATE_UNGATE)
5898                         pp_state = 0;
5899
5900                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5901                                 PP_BLOCK_GFX_MG,
5902                                 pp_support_state,
5903                                 pp_state);
5904                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5905         }
5906
5907         return 0;
5908 }
5909
5910 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5911                                           enum amd_clockgating_state state)
5912 {
5913
5914         uint32_t msg_id, pp_state = 0;
5915         uint32_t pp_support_state = 0;
5916         void *pp_handle = adev->powerplay.pp_handle;
5917
5918         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5919                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5920                         pp_support_state = PP_STATE_SUPPORT_LS;
5921                         pp_state = PP_STATE_LS;
5922                 }
5923                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5924                         pp_support_state |= PP_STATE_SUPPORT_CG;
5925                         pp_state |= PP_STATE_CG;
5926                 }
5927                 if (state == AMD_CG_STATE_UNGATE)
5928                         pp_state = 0;
5929
5930                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5931                                 PP_BLOCK_GFX_CG,
5932                                 pp_support_state,
5933                                 pp_state);
5934                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5935         }
5936
5937         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5938                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5939                         pp_support_state = PP_STATE_SUPPORT_LS;
5940                         pp_state = PP_STATE_LS;
5941                 }
5942                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5943                         pp_support_state |= PP_STATE_SUPPORT_CG;
5944                         pp_state |= PP_STATE_CG;
5945                 }
5946                 if (state == AMD_CG_STATE_UNGATE)
5947                         pp_state = 0;
5948
5949                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5950                                 PP_BLOCK_GFX_3D,
5951                                 pp_support_state,
5952                                 pp_state);
5953                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5954         }
5955
5956         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5957                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5958                         pp_support_state = PP_STATE_SUPPORT_LS;
5959                         pp_state = PP_STATE_LS;
5960                 }
5961
5962                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5963                         pp_support_state |= PP_STATE_SUPPORT_CG;
5964                         pp_state |= PP_STATE_CG;
5965                 }
5966
5967                 if (state == AMD_CG_STATE_UNGATE)
5968                         pp_state = 0;
5969
5970                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5971                                 PP_BLOCK_GFX_MG,
5972                                 pp_support_state,
5973                                 pp_state);
5974                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5975         }
5976
5977         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5978                 pp_support_state = PP_STATE_SUPPORT_LS;
5979
5980                 if (state == AMD_CG_STATE_UNGATE)
5981                         pp_state = 0;
5982                 else
5983                         pp_state = PP_STATE_LS;
5984
5985                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5986                                 PP_BLOCK_GFX_RLC,
5987                                 pp_support_state,
5988                                 pp_state);
5989                 amd_set_clockgating_by_smu(pp_handle, msg_id);
5990         }
5991
5992         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5993                 pp_support_state = PP_STATE_SUPPORT_LS;
5994
5995                 if (state == AMD_CG_STATE_UNGATE)
5996                         pp_state = 0;
5997                 else
5998                         pp_state = PP_STATE_LS;
5999                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6000                         PP_BLOCK_GFX_CP,
6001                         pp_support_state,
6002                         pp_state);
6003                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6004         }
6005
6006         return 0;
6007 }
6008
6009 static int gfx_v8_0_set_clockgating_state(void *handle,
6010                                           enum amd_clockgating_state state)
6011 {
6012         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6013
6014         if (amdgpu_sriov_vf(adev))
6015                 return 0;
6016
6017         switch (adev->asic_type) {
6018         case CHIP_FIJI:
6019         case CHIP_CARRIZO:
6020         case CHIP_STONEY:
6021                 gfx_v8_0_update_gfx_clock_gating(adev,
6022                                                  state == AMD_CG_STATE_GATE);
6023                 break;
6024         case CHIP_TONGA:
6025                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6026                 break;
6027         case CHIP_POLARIS10:
6028         case CHIP_POLARIS11:
6029         case CHIP_POLARIS12:
6030                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6031                 break;
6032         default:
6033                 break;
6034         }
6035         return 0;
6036 }
6037
6038 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6039 {
6040         return ring->adev->wb.wb[ring->rptr_offs];
6041 }
6042
6043 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6044 {
6045         struct amdgpu_device *adev = ring->adev;
6046
6047         if (ring->use_doorbell)
6048                 /* XXX check if swapping is necessary on BE */
6049                 return ring->adev->wb.wb[ring->wptr_offs];
6050         else
6051                 return RREG32(mmCP_RB0_WPTR);
6052 }
6053
6054 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6055 {
6056         struct amdgpu_device *adev = ring->adev;
6057
6058         if (ring->use_doorbell) {
6059                 /* XXX check if swapping is necessary on BE */
6060                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6061                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6062         } else {
6063                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6064                 (void)RREG32(mmCP_RB0_WPTR);
6065         }
6066 }
6067
6068 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6069 {
6070         u32 ref_and_mask, reg_mem_engine;
6071
6072         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6073             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6074                 switch (ring->me) {
6075                 case 1:
6076                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6077                         break;
6078                 case 2:
6079                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6080                         break;
6081                 default:
6082                         return;
6083                 }
6084                 reg_mem_engine = 0;
6085         } else {
6086                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6087                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6088         }
6089
6090         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6091         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6092                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6093                                  reg_mem_engine));
6094         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6095         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6096         amdgpu_ring_write(ring, ref_and_mask);
6097         amdgpu_ring_write(ring, ref_and_mask);
6098         amdgpu_ring_write(ring, 0x20); /* poll interval */
6099 }
6100
6101 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6102 {
6103         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6104         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6105                 EVENT_INDEX(4));
6106
6107         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6108         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6109                 EVENT_INDEX(0));
6110 }
6111
6112
6113 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6114 {
6115         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6116         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6117                                  WRITE_DATA_DST_SEL(0) |
6118                                  WR_CONFIRM));
6119         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6120         amdgpu_ring_write(ring, 0);
6121         amdgpu_ring_write(ring, 1);
6122
6123 }
6124
6125 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6126                                       struct amdgpu_ib *ib,
6127                                       unsigned vm_id, bool ctx_switch)
6128 {
6129         u32 header, control = 0;
6130
6131         if (ib->flags & AMDGPU_IB_FLAG_CE)
6132                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6133         else
6134                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6135
6136         control |= ib->length_dw | (vm_id << 24);
6137
6138         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6139                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6140
6141                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6142                         gfx_v8_0_ring_emit_de_meta(ring);
6143         }
6144
6145         amdgpu_ring_write(ring, header);
6146         amdgpu_ring_write(ring,
6147 #ifdef __BIG_ENDIAN
6148                           (2 << 0) |
6149 #endif
6150                           (ib->gpu_addr & 0xFFFFFFFC));
6151         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6152         amdgpu_ring_write(ring, control);
6153 }
6154
6155 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6156                                           struct amdgpu_ib *ib,
6157                                           unsigned vm_id, bool ctx_switch)
6158 {
6159         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6160
6161         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6162         amdgpu_ring_write(ring,
6163 #ifdef __BIG_ENDIAN
6164                                 (2 << 0) |
6165 #endif
6166                                 (ib->gpu_addr & 0xFFFFFFFC));
6167         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6168         amdgpu_ring_write(ring, control);
6169 }
6170
6171 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6172                                          u64 seq, unsigned flags)
6173 {
6174         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6175         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6176
6177         /* EVENT_WRITE_EOP - flush caches, send int */
6178         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6179         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6180                                  EOP_TC_ACTION_EN |
6181                                  EOP_TC_WB_ACTION_EN |
6182                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6183                                  EVENT_INDEX(5)));
6184         amdgpu_ring_write(ring, addr & 0xfffffffc);
6185         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6186                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6187         amdgpu_ring_write(ring, lower_32_bits(seq));
6188         amdgpu_ring_write(ring, upper_32_bits(seq));
6189
6190 }
6191
6192 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6193 {
6194         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6195         uint32_t seq = ring->fence_drv.sync_seq;
6196         uint64_t addr = ring->fence_drv.gpu_addr;
6197
6198         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6199         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6200                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6201                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6202         amdgpu_ring_write(ring, addr & 0xfffffffc);
6203         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6204         amdgpu_ring_write(ring, seq);
6205         amdgpu_ring_write(ring, 0xffffffff);
6206         amdgpu_ring_write(ring, 4); /* poll interval */
6207 }
6208
6209 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6210                                         unsigned vm_id, uint64_t pd_addr)
6211 {
6212         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6213
6214         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6215         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6216                                  WRITE_DATA_DST_SEL(0)) |
6217                                  WR_CONFIRM);
6218         if (vm_id < 8) {
6219                 amdgpu_ring_write(ring,
6220                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6221         } else {
6222                 amdgpu_ring_write(ring,
6223                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6224         }
6225         amdgpu_ring_write(ring, 0);
6226         amdgpu_ring_write(ring, pd_addr >> 12);
6227
6228         /* bits 0-15 are the VM contexts0-15 */
6229         /* invalidate the cache */
6230         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6231         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6232                                  WRITE_DATA_DST_SEL(0)));
6233         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6234         amdgpu_ring_write(ring, 0);
6235         amdgpu_ring_write(ring, 1 << vm_id);
6236
6237         /* wait for the invalidate to complete */
6238         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6239         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6240                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6241                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6242         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6243         amdgpu_ring_write(ring, 0);
6244         amdgpu_ring_write(ring, 0); /* ref */
6245         amdgpu_ring_write(ring, 0); /* mask */
6246         amdgpu_ring_write(ring, 0x20); /* poll interval */
6247
6248         /* compute doesn't have PFP */
6249         if (usepfp) {
6250                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6251                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6252                 amdgpu_ring_write(ring, 0x0);
6253         }
6254 }
6255
6256 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6257 {
6258         return ring->adev->wb.wb[ring->wptr_offs];
6259 }
6260
6261 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6262 {
6263         struct amdgpu_device *adev = ring->adev;
6264
6265         /* XXX check if swapping is necessary on BE */
6266         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6267         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6268 }
6269
6270 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6271                                              u64 addr, u64 seq,
6272                                              unsigned flags)
6273 {
6274         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6275         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6276
6277         /* RELEASE_MEM - flush caches, send int */
6278         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6279         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6280                                  EOP_TC_ACTION_EN |
6281                                  EOP_TC_WB_ACTION_EN |
6282                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6283                                  EVENT_INDEX(5)));
6284         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6285         amdgpu_ring_write(ring, addr & 0xfffffffc);
6286         amdgpu_ring_write(ring, upper_32_bits(addr));
6287         amdgpu_ring_write(ring, lower_32_bits(seq));
6288         amdgpu_ring_write(ring, upper_32_bits(seq));
6289 }
6290
6291 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6292                                          u64 seq, unsigned int flags)
6293 {
6294         /* we only allocate 32bit for each seq wb address */
6295         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6296
6297         /* write fence seq to the "addr" */
6298         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6299         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6300                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6301         amdgpu_ring_write(ring, lower_32_bits(addr));
6302         amdgpu_ring_write(ring, upper_32_bits(addr));
6303         amdgpu_ring_write(ring, lower_32_bits(seq));
6304
6305         if (flags & AMDGPU_FENCE_FLAG_INT) {
6306                 /* set register to trigger INT */
6307                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6308                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6309                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6310                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6311                 amdgpu_ring_write(ring, 0);
6312                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6313         }
6314 }
6315
6316 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6317 {
6318         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6319         amdgpu_ring_write(ring, 0);
6320 }
6321
6322 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6323 {
6324         uint32_t dw2 = 0;
6325
6326         if (amdgpu_sriov_vf(ring->adev))
6327                 gfx_v8_0_ring_emit_ce_meta(ring);
6328
6329         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6330         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6331                 gfx_v8_0_ring_emit_vgt_flush(ring);
6332                 /* set load_global_config & load_global_uconfig */
6333                 dw2 |= 0x8001;
6334                 /* set load_cs_sh_regs */
6335                 dw2 |= 0x01000000;
6336                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6337                 dw2 |= 0x10002;
6338
6339                 /* set load_ce_ram if preamble presented */
6340                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6341                         dw2 |= 0x10000000;
6342         } else {
6343                 /* still load_ce_ram if this is the first time preamble presented
6344                  * although there is no context switch happens.
6345                  */
6346                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6347                         dw2 |= 0x10000000;
6348         }
6349
6350         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6351         amdgpu_ring_write(ring, dw2);
6352         amdgpu_ring_write(ring, 0);
6353 }
6354
6355 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6356 {
6357         unsigned ret;
6358
6359         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6360         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6361         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6362         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6363         ret = ring->wptr & ring->buf_mask;
6364         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6365         return ret;
6366 }
6367
6368 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6369 {
6370         unsigned cur;
6371
6372         BUG_ON(offset > ring->buf_mask);
6373         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6374
6375         cur = (ring->wptr & ring->buf_mask) - 1;
6376         if (likely(cur > offset))
6377                 ring->ring[offset] = cur - offset;
6378         else
6379                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6380 }
6381
6382 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6383 {
6384         struct amdgpu_device *adev = ring->adev;
6385
6386         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6387         amdgpu_ring_write(ring, 0 |     /* src: register*/
6388                                 (5 << 8) |      /* dst: memory */
6389                                 (1 << 20));     /* write confirm */
6390         amdgpu_ring_write(ring, reg);
6391         amdgpu_ring_write(ring, 0);
6392         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6393                                 adev->virt.reg_val_offs * 4));
6394         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6395                                 adev->virt.reg_val_offs * 4));
6396 }
6397
6398 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6399                                   uint32_t val)
6400 {
6401         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6402         amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6403         amdgpu_ring_write(ring, reg);
6404         amdgpu_ring_write(ring, 0);
6405         amdgpu_ring_write(ring, val);
6406 }
6407
6408 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6409                                                  enum amdgpu_interrupt_state state)
6410 {
6411         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6412                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6413 }
6414
6415 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6416                                                      int me, int pipe,
6417                                                      enum amdgpu_interrupt_state state)
6418 {
6419         u32 mec_int_cntl, mec_int_cntl_reg;
6420
6421         /*
6422          * amdgpu controls only the first MEC. That's why this function only
6423          * handles the setting of interrupts for this specific MEC. All other
6424          * pipes' interrupts are set by amdkfd.
6425          */
6426
6427         if (me == 1) {
6428                 switch (pipe) {
6429                 case 0:
6430                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6431                         break;
6432                 case 1:
6433                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6434                         break;
6435                 case 2:
6436                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6437                         break;
6438                 case 3:
6439                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6440                         break;
6441                 default:
6442                         DRM_DEBUG("invalid pipe %d\n", pipe);
6443                         return;
6444                 }
6445         } else {
6446                 DRM_DEBUG("invalid me %d\n", me);
6447                 return;
6448         }
6449
6450         switch (state) {
6451         case AMDGPU_IRQ_STATE_DISABLE:
6452                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6453                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6454                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6455                 break;
6456         case AMDGPU_IRQ_STATE_ENABLE:
6457                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6458                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6459                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6460                 break;
6461         default:
6462                 break;
6463         }
6464 }
6465
6466 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6467                                              struct amdgpu_irq_src *source,
6468                                              unsigned type,
6469                                              enum amdgpu_interrupt_state state)
6470 {
6471         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6472                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6473
6474         return 0;
6475 }
6476
6477 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6478                                               struct amdgpu_irq_src *source,
6479                                               unsigned type,
6480                                               enum amdgpu_interrupt_state state)
6481 {
6482         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6483                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6484
6485         return 0;
6486 }
6487
6488 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6489                                             struct amdgpu_irq_src *src,
6490                                             unsigned type,
6491                                             enum amdgpu_interrupt_state state)
6492 {
6493         switch (type) {
6494         case AMDGPU_CP_IRQ_GFX_EOP:
6495                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6496                 break;
6497         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6498                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6499                 break;
6500         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6501                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6502                 break;
6503         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6504                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6505                 break;
6506         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6507                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6508                 break;
6509         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6510                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6511                 break;
6512         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6513                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6514                 break;
6515         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6516                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6517                 break;
6518         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6519                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6520                 break;
6521         default:
6522                 break;
6523         }
6524         return 0;
6525 }
6526
6527 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6528                             struct amdgpu_irq_src *source,
6529                             struct amdgpu_iv_entry *entry)
6530 {
6531         int i;
6532         u8 me_id, pipe_id, queue_id;
6533         struct amdgpu_ring *ring;
6534
6535         DRM_DEBUG("IH: CP EOP\n");
6536         me_id = (entry->ring_id & 0x0c) >> 2;
6537         pipe_id = (entry->ring_id & 0x03) >> 0;
6538         queue_id = (entry->ring_id & 0x70) >> 4;
6539
6540         switch (me_id) {
6541         case 0:
6542                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6543                 break;
6544         case 1:
6545         case 2:
6546                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6547                         ring = &adev->gfx.compute_ring[i];
6548                         /* Per-queue interrupt is supported for MEC starting from VI.
6549                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6550                           */
6551                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6552                                 amdgpu_fence_process(ring);
6553                 }
6554                 break;
6555         }
6556         return 0;
6557 }
6558
6559 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6560                                  struct amdgpu_irq_src *source,
6561                                  struct amdgpu_iv_entry *entry)
6562 {
6563         DRM_ERROR("Illegal register access in command stream\n");
6564         schedule_work(&adev->reset_work);
6565         return 0;
6566 }
6567
6568 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6569                                   struct amdgpu_irq_src *source,
6570                                   struct amdgpu_iv_entry *entry)
6571 {
6572         DRM_ERROR("Illegal instruction in command stream\n");
6573         schedule_work(&adev->reset_work);
6574         return 0;
6575 }
6576
6577 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6578                                             struct amdgpu_irq_src *src,
6579                                             unsigned int type,
6580                                             enum amdgpu_interrupt_state state)
6581 {
6582         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6583
6584         switch (type) {
6585         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6586                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6587                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6588                 if (ring->me == 1)
6589                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6590                                      ring->pipe,
6591                                      GENERIC2_INT_ENABLE,
6592                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6593                 else
6594                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6595                                      ring->pipe,
6596                                      GENERIC2_INT_ENABLE,
6597                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6598                 break;
6599         default:
6600                 BUG(); /* kiq only support GENERIC2_INT now */
6601                 break;
6602         }
6603         return 0;
6604 }
6605
6606 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6607                             struct amdgpu_irq_src *source,
6608                             struct amdgpu_iv_entry *entry)
6609 {
6610         u8 me_id, pipe_id, queue_id;
6611         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6612
6613         me_id = (entry->ring_id & 0x0c) >> 2;
6614         pipe_id = (entry->ring_id & 0x03) >> 0;
6615         queue_id = (entry->ring_id & 0x70) >> 4;
6616         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6617                    me_id, pipe_id, queue_id);
6618
6619         amdgpu_fence_process(ring);
6620         return 0;
6621 }
6622
6623 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6624         .name = "gfx_v8_0",
6625         .early_init = gfx_v8_0_early_init,
6626         .late_init = gfx_v8_0_late_init,
6627         .sw_init = gfx_v8_0_sw_init,
6628         .sw_fini = gfx_v8_0_sw_fini,
6629         .hw_init = gfx_v8_0_hw_init,
6630         .hw_fini = gfx_v8_0_hw_fini,
6631         .suspend = gfx_v8_0_suspend,
6632         .resume = gfx_v8_0_resume,
6633         .is_idle = gfx_v8_0_is_idle,
6634         .wait_for_idle = gfx_v8_0_wait_for_idle,
6635         .check_soft_reset = gfx_v8_0_check_soft_reset,
6636         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6637         .soft_reset = gfx_v8_0_soft_reset,
6638         .post_soft_reset = gfx_v8_0_post_soft_reset,
6639         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6640         .set_powergating_state = gfx_v8_0_set_powergating_state,
6641         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6642 };
6643
6644 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6645         .type = AMDGPU_RING_TYPE_GFX,
6646         .align_mask = 0xff,
6647         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6648         .support_64bit_ptrs = false,
6649         .get_rptr = gfx_v8_0_ring_get_rptr,
6650         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6651         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6652         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6653                 5 +  /* COND_EXEC */
6654                 7 +  /* PIPELINE_SYNC */
6655                 19 + /* VM_FLUSH */
6656                 8 +  /* FENCE for VM_FLUSH */
6657                 20 + /* GDS switch */
6658                 4 + /* double SWITCH_BUFFER,
6659                        the first COND_EXEC jump to the place just
6660                            prior to this double SWITCH_BUFFER  */
6661                 5 + /* COND_EXEC */
6662                 7 +      /*     HDP_flush */
6663                 4 +      /*     VGT_flush */
6664                 14 + /* CE_META */
6665                 31 + /* DE_META */
6666                 3 + /* CNTX_CTRL */
6667                 5 + /* HDP_INVL */
6668                 8 + 8 + /* FENCE x2 */
6669                 2, /* SWITCH_BUFFER */
6670         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6671         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6672         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6673         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6674         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6675         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6676         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6677         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6678         .test_ring = gfx_v8_0_ring_test_ring,
6679         .test_ib = gfx_v8_0_ring_test_ib,
6680         .insert_nop = amdgpu_ring_insert_nop,
6681         .pad_ib = amdgpu_ring_generic_pad_ib,
6682         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6683         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6684         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6685         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6686 };
6687
6688 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6689         .type = AMDGPU_RING_TYPE_COMPUTE,
6690         .align_mask = 0xff,
6691         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6692         .support_64bit_ptrs = false,
6693         .get_rptr = gfx_v8_0_ring_get_rptr,
6694         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6695         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6696         .emit_frame_size =
6697                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6698                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6699                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6700                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6701                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6702                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6703         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6704         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6705         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6706         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6707         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6708         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6709         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6710         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6711         .test_ring = gfx_v8_0_ring_test_ring,
6712         .test_ib = gfx_v8_0_ring_test_ib,
6713         .insert_nop = amdgpu_ring_insert_nop,
6714         .pad_ib = amdgpu_ring_generic_pad_ib,
6715 };
6716
6717 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6718         .type = AMDGPU_RING_TYPE_KIQ,
6719         .align_mask = 0xff,
6720         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6721         .support_64bit_ptrs = false,
6722         .get_rptr = gfx_v8_0_ring_get_rptr,
6723         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6724         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6725         .emit_frame_size =
6726                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6727                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6728                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6729                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6730                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6731                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6732         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6733         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6734         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6735         .test_ring = gfx_v8_0_ring_test_ring,
6736         .test_ib = gfx_v8_0_ring_test_ib,
6737         .insert_nop = amdgpu_ring_insert_nop,
6738         .pad_ib = amdgpu_ring_generic_pad_ib,
6739         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6740         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6741 };
6742
6743 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6744 {
6745         int i;
6746
6747         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6748
6749         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6750                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6751
6752         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6753                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6754 }
6755
6756 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6757         .set = gfx_v8_0_set_eop_interrupt_state,
6758         .process = gfx_v8_0_eop_irq,
6759 };
6760
6761 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6762         .set = gfx_v8_0_set_priv_reg_fault_state,
6763         .process = gfx_v8_0_priv_reg_irq,
6764 };
6765
6766 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6767         .set = gfx_v8_0_set_priv_inst_fault_state,
6768         .process = gfx_v8_0_priv_inst_irq,
6769 };
6770
6771 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6772         .set = gfx_v8_0_kiq_set_interrupt_state,
6773         .process = gfx_v8_0_kiq_irq,
6774 };
6775
6776 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6777 {
6778         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6779         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6780
6781         adev->gfx.priv_reg_irq.num_types = 1;
6782         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6783
6784         adev->gfx.priv_inst_irq.num_types = 1;
6785         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6786
6787         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6788         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6789 }
6790
6791 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6792 {
6793         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6794 }
6795
6796 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6797 {
6798         /* init asci gds info */
6799         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6800         adev->gds.gws.total_size = 64;
6801         adev->gds.oa.total_size = 16;
6802
6803         if (adev->gds.mem.total_size == 64 * 1024) {
6804                 adev->gds.mem.gfx_partition_size = 4096;
6805                 adev->gds.mem.cs_partition_size = 4096;
6806
6807                 adev->gds.gws.gfx_partition_size = 4;
6808                 adev->gds.gws.cs_partition_size = 4;
6809
6810                 adev->gds.oa.gfx_partition_size = 4;
6811                 adev->gds.oa.cs_partition_size = 1;
6812         } else {
6813                 adev->gds.mem.gfx_partition_size = 1024;
6814                 adev->gds.mem.cs_partition_size = 1024;
6815
6816                 adev->gds.gws.gfx_partition_size = 16;
6817                 adev->gds.gws.cs_partition_size = 16;
6818
6819                 adev->gds.oa.gfx_partition_size = 4;
6820                 adev->gds.oa.cs_partition_size = 4;
6821         }
6822 }
6823
6824 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6825                                                  u32 bitmap)
6826 {
6827         u32 data;
6828
6829         if (!bitmap)
6830                 return;
6831
6832         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6833         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6834
6835         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6836 }
6837
6838 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6839 {
6840         u32 data, mask;
6841
6842         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6843                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6844
6845         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6846
6847         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6848 }
6849
6850 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6851 {
6852         int i, j, k, counter, active_cu_number = 0;
6853         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6854         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6855         unsigned disable_masks[4 * 2];
6856         u32 ao_cu_num;
6857
6858         memset(cu_info, 0, sizeof(*cu_info));
6859
6860         if (adev->flags & AMD_IS_APU)
6861                 ao_cu_num = 2;
6862         else
6863                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
6864
6865         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6866
6867         mutex_lock(&adev->grbm_idx_mutex);
6868         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6869                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6870                         mask = 1;
6871                         ao_bitmap = 0;
6872                         counter = 0;
6873                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6874                         if (i < 4 && j < 2)
6875                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6876                                         adev, disable_masks[i * 2 + j]);
6877                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6878                         cu_info->bitmap[i][j] = bitmap;
6879
6880                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6881                                 if (bitmap & mask) {
6882                                         if (counter < ao_cu_num)
6883                                                 ao_bitmap |= mask;
6884                                         counter ++;
6885                                 }
6886                                 mask <<= 1;
6887                         }
6888                         active_cu_number += counter;
6889                         if (i < 2 && j < 2)
6890                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6891                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
6892                 }
6893         }
6894         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6895         mutex_unlock(&adev->grbm_idx_mutex);
6896
6897         cu_info->number = active_cu_number;
6898         cu_info->ao_cu_mask = ao_cu_mask;
6899 }
6900
6901 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6902 {
6903         .type = AMD_IP_BLOCK_TYPE_GFX,
6904         .major = 8,
6905         .minor = 0,
6906         .rev = 0,
6907         .funcs = &gfx_v8_0_ip_funcs,
6908 };
6909
6910 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6911 {
6912         .type = AMD_IP_BLOCK_TYPE_GFX,
6913         .major = 8,
6914         .minor = 1,
6915         .rev = 0,
6916         .funcs = &gfx_v8_0_ip_funcs,
6917 };
6918
6919 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
6920 {
6921         uint64_t ce_payload_addr;
6922         int cnt_ce;
6923         static union {
6924                 struct vi_ce_ib_state regular;
6925                 struct vi_ce_ib_state_chained_ib chained;
6926         } ce_payload = {};
6927
6928         if (ring->adev->virt.chained_ib_support) {
6929                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6930                                                   offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
6931                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
6932         } else {
6933                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6934                                                   offsetof(struct vi_gfx_meta_data, ce_payload);
6935                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
6936         }
6937
6938         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
6939         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
6940                                 WRITE_DATA_DST_SEL(8) |
6941                                 WR_CONFIRM) |
6942                                 WRITE_DATA_CACHE_POLICY(0));
6943         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
6944         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
6945         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
6946 }
6947
6948 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
6949 {
6950         uint64_t de_payload_addr, gds_addr, csa_addr;
6951         int cnt_de;
6952         static union {
6953                 struct vi_de_ib_state regular;
6954                 struct vi_de_ib_state_chained_ib chained;
6955         } de_payload = {};
6956
6957         csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
6958         gds_addr = csa_addr + 4096;
6959         if (ring->adev->virt.chained_ib_support) {
6960                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
6961                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
6962                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
6963                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
6964         } else {
6965                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
6966                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
6967                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
6968                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
6969         }
6970
6971         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
6972         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
6973                                 WRITE_DATA_DST_SEL(8) |
6974                                 WR_CONFIRM) |
6975                                 WRITE_DATA_CACHE_POLICY(0));
6976         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
6977         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
6978         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
6979 }