2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
51 #include "smu/smu_7_1_3_d.h"
53 #define GFX8_NUM_GFX_RINGS 1
54 #define GFX8_NUM_COMPUTE_RINGS 8
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
79 #define SET_BPM_SERDES_CMD 1
80 #define CLE_BPM_SERDES_CMD 0
82 /* BPM Register Address*/
84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
92 #define RLC_FormatDirectRegListLength 14
96 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
98 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
99 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
100 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
101 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
102 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
103 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
104 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
105 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
106 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
107 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
108 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
109 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
110 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
111 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
112 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
113 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
116 static const u32 golden_settings_tonga_a11[] =
118 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
119 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
120 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
121 mmGB_GPU_ID, 0x0000000f, 0x00000000,
122 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
123 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
124 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
125 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
126 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
127 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
128 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
129 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
130 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
131 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
132 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
135 static const u32 tonga_golden_common_all[] =
137 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
138 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
139 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
140 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
141 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
142 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
143 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
144 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
147 static const u32 tonga_mgcg_cgcg_init[] =
149 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
150 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
151 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
152 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
153 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
154 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
155 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
156 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
157 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
158 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
159 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
160 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
161 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
162 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
163 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
164 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
165 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
166 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
167 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
168 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
169 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
170 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
171 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
172 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
173 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
174 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
175 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
176 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
177 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
178 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
179 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
180 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
181 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
182 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
183 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
184 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
185 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
186 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
187 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
188 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
189 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
190 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
191 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
192 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
193 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
194 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
195 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
196 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
197 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
198 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
199 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
200 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
201 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
202 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
203 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
204 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
205 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
206 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
207 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
208 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
209 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
210 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
211 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
212 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
213 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
214 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
215 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
216 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
217 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
218 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
219 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
220 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
221 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
222 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
223 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
226 static const u32 golden_settings_polaris11_a11[] =
228 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
229 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
230 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
231 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
232 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
233 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
234 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
235 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
236 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
237 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
238 mmSQ_CONFIG, 0x07f80000, 0x01180000,
239 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
240 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
241 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
242 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
243 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
244 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
247 static const u32 polaris11_golden_common_all[] =
249 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
250 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
251 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
252 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
253 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
254 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
257 static const u32 golden_settings_polaris10_a11[] =
259 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
260 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
261 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
262 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
263 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
264 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
265 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
266 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
267 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
268 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
269 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
270 mmSQ_CONFIG, 0x07f80000, 0x07180000,
271 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
272 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
273 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
274 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
275 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
278 static const u32 polaris10_golden_common_all[] =
280 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
281 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
282 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
283 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
284 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
285 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
286 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
287 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
290 static const u32 fiji_golden_common_all[] =
292 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
293 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
294 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
295 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
296 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
297 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
298 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
299 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
300 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
301 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
304 static const u32 golden_settings_fiji_a10[] =
306 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
311 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
312 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
313 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
314 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
315 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
316 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
319 static const u32 fiji_mgcg_cgcg_init[] =
321 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
322 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
323 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
324 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
325 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
326 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
327 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
328 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
329 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
330 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
331 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
332 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
333 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
334 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
335 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
336 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
337 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
338 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
339 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
340 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
341 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
342 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
343 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
344 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
345 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
346 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
347 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
348 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
349 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
350 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
351 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
352 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
353 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
354 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
355 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
358 static const u32 golden_settings_iceland_a11[] =
360 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
361 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
363 mmGB_GPU_ID, 0x0000000f, 0x00000000,
364 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
365 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
366 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
367 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
368 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
369 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
370 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
371 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
372 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
373 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
374 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
375 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
378 static const u32 iceland_golden_common_all[] =
380 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
381 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
382 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
383 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
384 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
385 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
386 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
387 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
390 static const u32 iceland_mgcg_cgcg_init[] =
392 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
393 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
394 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
395 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
396 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
397 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
398 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
399 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
400 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
401 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
402 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
403 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
404 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
405 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
406 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
407 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
408 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
409 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
410 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
411 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
412 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
413 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
414 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
415 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
416 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
417 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
418 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
419 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
420 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
421 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
422 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
423 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
424 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
425 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
426 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
427 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
428 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
429 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
430 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
431 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
432 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
433 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
434 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
435 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
436 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
437 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
438 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
439 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
440 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
441 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
442 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
443 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
444 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
445 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
446 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
447 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
448 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
449 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
450 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
451 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
452 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
453 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
454 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
455 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
458 static const u32 cz_golden_settings_a11[] =
460 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
461 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
462 mmGB_GPU_ID, 0x0000000f, 0x00000000,
463 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
464 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
465 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
466 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
467 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
468 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
469 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
470 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
471 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
474 static const u32 cz_golden_common_all[] =
476 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
478 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
479 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
480 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
481 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
482 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
483 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
486 static const u32 cz_mgcg_cgcg_init[] =
488 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
489 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
490 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
491 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
492 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
493 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
494 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
495 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
496 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
497 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
498 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
499 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
500 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
501 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
502 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
503 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
504 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
505 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
506 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
507 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
508 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
509 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
510 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
511 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
512 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
513 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
514 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
515 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
516 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
517 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
518 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
519 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
520 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
521 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
522 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
523 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
524 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
525 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
526 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
527 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
528 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
529 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
530 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
531 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
532 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
533 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
534 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
535 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
536 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
537 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
538 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
539 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
540 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
541 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
542 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
543 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
544 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
545 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
546 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
547 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
548 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
549 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
550 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
551 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
552 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
553 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
554 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
555 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
556 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
557 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
558 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
559 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
560 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
561 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
562 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
565 static const u32 stoney_golden_settings_a11[] =
567 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
568 mmGB_GPU_ID, 0x0000000f, 0x00000000,
569 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
570 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
571 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
572 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
573 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
574 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
575 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
576 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
579 static const u32 stoney_golden_common_all[] =
581 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
582 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
583 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
584 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
585 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
586 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
587 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
588 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
591 static const u32 stoney_mgcg_cgcg_init[] =
593 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
594 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
595 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
596 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
597 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
600 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
601 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
602 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
603 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
604 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
605 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
607 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
609 switch (adev->asic_type) {
611 amdgpu_program_register_sequence(adev,
612 iceland_mgcg_cgcg_init,
613 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
614 amdgpu_program_register_sequence(adev,
615 golden_settings_iceland_a11,
616 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
617 amdgpu_program_register_sequence(adev,
618 iceland_golden_common_all,
619 (const u32)ARRAY_SIZE(iceland_golden_common_all));
622 amdgpu_program_register_sequence(adev,
624 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
625 amdgpu_program_register_sequence(adev,
626 golden_settings_fiji_a10,
627 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
628 amdgpu_program_register_sequence(adev,
629 fiji_golden_common_all,
630 (const u32)ARRAY_SIZE(fiji_golden_common_all));
634 amdgpu_program_register_sequence(adev,
635 tonga_mgcg_cgcg_init,
636 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
637 amdgpu_program_register_sequence(adev,
638 golden_settings_tonga_a11,
639 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
640 amdgpu_program_register_sequence(adev,
641 tonga_golden_common_all,
642 (const u32)ARRAY_SIZE(tonga_golden_common_all));
645 amdgpu_program_register_sequence(adev,
646 golden_settings_polaris11_a11,
647 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
648 amdgpu_program_register_sequence(adev,
649 polaris11_golden_common_all,
650 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
653 amdgpu_program_register_sequence(adev,
654 golden_settings_polaris10_a11,
655 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
656 amdgpu_program_register_sequence(adev,
657 polaris10_golden_common_all,
658 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
659 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
660 if (adev->pdev->revision == 0xc7 &&
661 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
662 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
663 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
664 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
665 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
669 amdgpu_program_register_sequence(adev,
671 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
672 amdgpu_program_register_sequence(adev,
673 cz_golden_settings_a11,
674 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
675 amdgpu_program_register_sequence(adev,
676 cz_golden_common_all,
677 (const u32)ARRAY_SIZE(cz_golden_common_all));
680 amdgpu_program_register_sequence(adev,
681 stoney_mgcg_cgcg_init,
682 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
683 amdgpu_program_register_sequence(adev,
684 stoney_golden_settings_a11,
685 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
686 amdgpu_program_register_sequence(adev,
687 stoney_golden_common_all,
688 (const u32)ARRAY_SIZE(stoney_golden_common_all));
695 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
699 adev->gfx.scratch.num_reg = 7;
700 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
701 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
702 adev->gfx.scratch.free[i] = true;
703 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
707 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
709 struct amdgpu_device *adev = ring->adev;
715 r = amdgpu_gfx_scratch_get(adev, &scratch);
717 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
720 WREG32(scratch, 0xCAFEDEAD);
721 r = amdgpu_ring_alloc(ring, 3);
723 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
725 amdgpu_gfx_scratch_free(adev, scratch);
728 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
729 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
730 amdgpu_ring_write(ring, 0xDEADBEEF);
731 amdgpu_ring_commit(ring);
733 for (i = 0; i < adev->usec_timeout; i++) {
734 tmp = RREG32(scratch);
735 if (tmp == 0xDEADBEEF)
739 if (i < adev->usec_timeout) {
740 DRM_INFO("ring test on %d succeeded in %d usecs\n",
743 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
744 ring->idx, scratch, tmp);
747 amdgpu_gfx_scratch_free(adev, scratch);
751 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
753 struct amdgpu_device *adev = ring->adev;
755 struct fence *f = NULL;
760 r = amdgpu_gfx_scratch_get(adev, &scratch);
762 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
765 WREG32(scratch, 0xCAFEDEAD);
766 memset(&ib, 0, sizeof(ib));
767 r = amdgpu_ib_get(adev, NULL, 256, &ib);
769 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
772 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
773 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
774 ib.ptr[2] = 0xDEADBEEF;
777 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
781 r = fence_wait_timeout(f, false, timeout);
783 DRM_ERROR("amdgpu: IB test timed out.\n");
787 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
790 tmp = RREG32(scratch);
791 if (tmp == 0xDEADBEEF) {
792 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
795 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
800 amdgpu_ib_free(adev, &ib, NULL);
803 amdgpu_gfx_scratch_free(adev, scratch);
808 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
809 release_firmware(adev->gfx.pfp_fw);
810 adev->gfx.pfp_fw = NULL;
811 release_firmware(adev->gfx.me_fw);
812 adev->gfx.me_fw = NULL;
813 release_firmware(adev->gfx.ce_fw);
814 adev->gfx.ce_fw = NULL;
815 release_firmware(adev->gfx.rlc_fw);
816 adev->gfx.rlc_fw = NULL;
817 release_firmware(adev->gfx.mec_fw);
818 adev->gfx.mec_fw = NULL;
819 if ((adev->asic_type != CHIP_STONEY) &&
820 (adev->asic_type != CHIP_TOPAZ))
821 release_firmware(adev->gfx.mec2_fw);
822 adev->gfx.mec2_fw = NULL;
824 kfree(adev->gfx.rlc.register_list_format);
827 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
829 const char *chip_name;
832 struct amdgpu_firmware_info *info = NULL;
833 const struct common_firmware_header *header = NULL;
834 const struct gfx_firmware_header_v1_0 *cp_hdr;
835 const struct rlc_firmware_header_v2_0 *rlc_hdr;
836 unsigned int *tmp = NULL, i;
840 switch (adev->asic_type) {
848 chip_name = "carrizo";
854 chip_name = "polaris11";
857 chip_name = "polaris10";
860 chip_name = "stoney";
866 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
867 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
870 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
873 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
874 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
875 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
877 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
878 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
881 err = amdgpu_ucode_validate(adev->gfx.me_fw);
884 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
885 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
886 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
888 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
889 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
892 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
895 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
896 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
897 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
899 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
900 err = reject_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
903 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
904 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
905 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
906 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
908 adev->gfx.rlc.save_and_restore_offset =
909 le32_to_cpu(rlc_hdr->save_and_restore_offset);
910 adev->gfx.rlc.clear_state_descriptor_offset =
911 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
912 adev->gfx.rlc.avail_scratch_ram_locations =
913 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
914 adev->gfx.rlc.reg_restore_list_size =
915 le32_to_cpu(rlc_hdr->reg_restore_list_size);
916 adev->gfx.rlc.reg_list_format_start =
917 le32_to_cpu(rlc_hdr->reg_list_format_start);
918 adev->gfx.rlc.reg_list_format_separate_start =
919 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
920 adev->gfx.rlc.starting_offsets_start =
921 le32_to_cpu(rlc_hdr->starting_offsets_start);
922 adev->gfx.rlc.reg_list_format_size_bytes =
923 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
924 adev->gfx.rlc.reg_list_size_bytes =
925 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
927 adev->gfx.rlc.register_list_format =
928 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
929 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
931 if (!adev->gfx.rlc.register_list_format) {
936 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
937 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
938 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
939 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
941 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
943 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
944 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
945 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
946 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
948 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
949 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
952 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
955 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
956 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
957 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
959 if ((adev->asic_type != CHIP_STONEY) &&
960 (adev->asic_type != CHIP_TOPAZ)) {
961 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
962 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
964 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
967 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
968 adev->gfx.mec2_fw->data;
969 adev->gfx.mec2_fw_version =
970 le32_to_cpu(cp_hdr->header.ucode_version);
971 adev->gfx.mec2_feature_version =
972 le32_to_cpu(cp_hdr->ucode_feature_version);
975 adev->gfx.mec2_fw = NULL;
979 if (adev->firmware.smu_load) {
980 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
981 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
982 info->fw = adev->gfx.pfp_fw;
983 header = (const struct common_firmware_header *)info->fw->data;
984 adev->firmware.fw_size +=
985 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
987 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
988 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
989 info->fw = adev->gfx.me_fw;
990 header = (const struct common_firmware_header *)info->fw->data;
991 adev->firmware.fw_size +=
992 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
994 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
995 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
996 info->fw = adev->gfx.ce_fw;
997 header = (const struct common_firmware_header *)info->fw->data;
998 adev->firmware.fw_size +=
999 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1001 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1002 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1003 info->fw = adev->gfx.rlc_fw;
1004 header = (const struct common_firmware_header *)info->fw->data;
1005 adev->firmware.fw_size +=
1006 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1008 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1009 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1010 info->fw = adev->gfx.mec_fw;
1011 header = (const struct common_firmware_header *)info->fw->data;
1012 adev->firmware.fw_size +=
1013 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1015 if (adev->gfx.mec2_fw) {
1016 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1017 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1018 info->fw = adev->gfx.mec2_fw;
1019 header = (const struct common_firmware_header *)info->fw->data;
1020 adev->firmware.fw_size +=
1021 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1029 "gfx8: Failed to load firmware \"%s\"\n",
1031 release_firmware(adev->gfx.pfp_fw);
1032 adev->gfx.pfp_fw = NULL;
1033 release_firmware(adev->gfx.me_fw);
1034 adev->gfx.me_fw = NULL;
1035 release_firmware(adev->gfx.ce_fw);
1036 adev->gfx.ce_fw = NULL;
1037 release_firmware(adev->gfx.rlc_fw);
1038 adev->gfx.rlc_fw = NULL;
1039 release_firmware(adev->gfx.mec_fw);
1040 adev->gfx.mec_fw = NULL;
1041 release_firmware(adev->gfx.mec2_fw);
1042 adev->gfx.mec2_fw = NULL;
1047 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1048 volatile u32 *buffer)
1051 const struct cs_section_def *sect = NULL;
1052 const struct cs_extent_def *ext = NULL;
1054 if (adev->gfx.rlc.cs_data == NULL)
1059 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1060 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1062 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1063 buffer[count++] = cpu_to_le32(0x80000000);
1064 buffer[count++] = cpu_to_le32(0x80000000);
1066 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1067 for (ext = sect->section; ext->extent != NULL; ++ext) {
1068 if (sect->id == SECT_CONTEXT) {
1070 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1071 buffer[count++] = cpu_to_le32(ext->reg_index -
1072 PACKET3_SET_CONTEXT_REG_START);
1073 for (i = 0; i < ext->reg_count; i++)
1074 buffer[count++] = cpu_to_le32(ext->extent[i]);
1081 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1082 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1083 PACKET3_SET_CONTEXT_REG_START);
1084 switch (adev->asic_type) {
1086 case CHIP_POLARIS10:
1087 buffer[count++] = cpu_to_le32(0x16000012);
1088 buffer[count++] = cpu_to_le32(0x0000002A);
1090 case CHIP_POLARIS11:
1091 buffer[count++] = cpu_to_le32(0x16000012);
1092 buffer[count++] = cpu_to_le32(0x00000000);
1095 buffer[count++] = cpu_to_le32(0x3a00161a);
1096 buffer[count++] = cpu_to_le32(0x0000002e);
1100 buffer[count++] = cpu_to_le32(0x00000002);
1101 buffer[count++] = cpu_to_le32(0x00000000);
1104 buffer[count++] = cpu_to_le32(0x00000000);
1105 buffer[count++] = cpu_to_le32(0x00000000);
1108 buffer[count++] = cpu_to_le32(0x00000000);
1109 buffer[count++] = cpu_to_le32(0x00000000);
1113 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1114 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1116 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1117 buffer[count++] = cpu_to_le32(0);
1120 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1122 const __le32 *fw_data;
1123 volatile u32 *dst_ptr;
1124 int me, i, max_me = 4;
1126 u32 table_offset, table_size;
1128 if (adev->asic_type == CHIP_CARRIZO)
1131 /* write the cp table buffer */
1132 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1133 for (me = 0; me < max_me; me++) {
1135 const struct gfx_firmware_header_v1_0 *hdr =
1136 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1137 fw_data = (const __le32 *)
1138 (adev->gfx.ce_fw->data +
1139 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1140 table_offset = le32_to_cpu(hdr->jt_offset);
1141 table_size = le32_to_cpu(hdr->jt_size);
1142 } else if (me == 1) {
1143 const struct gfx_firmware_header_v1_0 *hdr =
1144 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1145 fw_data = (const __le32 *)
1146 (adev->gfx.pfp_fw->data +
1147 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1148 table_offset = le32_to_cpu(hdr->jt_offset);
1149 table_size = le32_to_cpu(hdr->jt_size);
1150 } else if (me == 2) {
1151 const struct gfx_firmware_header_v1_0 *hdr =
1152 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1153 fw_data = (const __le32 *)
1154 (adev->gfx.me_fw->data +
1155 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1156 table_offset = le32_to_cpu(hdr->jt_offset);
1157 table_size = le32_to_cpu(hdr->jt_size);
1158 } else if (me == 3) {
1159 const struct gfx_firmware_header_v1_0 *hdr =
1160 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1161 fw_data = (const __le32 *)
1162 (adev->gfx.mec_fw->data +
1163 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1164 table_offset = le32_to_cpu(hdr->jt_offset);
1165 table_size = le32_to_cpu(hdr->jt_size);
1166 } else if (me == 4) {
1167 const struct gfx_firmware_header_v1_0 *hdr =
1168 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1169 fw_data = (const __le32 *)
1170 (adev->gfx.mec2_fw->data +
1171 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1172 table_offset = le32_to_cpu(hdr->jt_offset);
1173 table_size = le32_to_cpu(hdr->jt_size);
1176 for (i = 0; i < table_size; i ++) {
1177 dst_ptr[bo_offset + i] =
1178 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1181 bo_offset += table_size;
1185 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1189 /* clear state block */
1190 if (adev->gfx.rlc.clear_state_obj) {
1191 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1192 if (unlikely(r != 0))
1193 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1194 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1195 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1196 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1197 adev->gfx.rlc.clear_state_obj = NULL;
1200 /* jump table block */
1201 if (adev->gfx.rlc.cp_table_obj) {
1202 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1203 if (unlikely(r != 0))
1204 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1205 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1206 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1207 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1208 adev->gfx.rlc.cp_table_obj = NULL;
1212 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1214 volatile u32 *dst_ptr;
1216 const struct cs_section_def *cs_data;
1219 adev->gfx.rlc.cs_data = vi_cs_data;
1221 cs_data = adev->gfx.rlc.cs_data;
1224 /* clear state block */
1225 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1227 if (adev->gfx.rlc.clear_state_obj == NULL) {
1228 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1229 AMDGPU_GEM_DOMAIN_VRAM,
1230 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1232 &adev->gfx.rlc.clear_state_obj);
1234 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1235 gfx_v8_0_rlc_fini(adev);
1239 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1240 if (unlikely(r != 0)) {
1241 gfx_v8_0_rlc_fini(adev);
1244 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1245 &adev->gfx.rlc.clear_state_gpu_addr);
1247 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1248 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1249 gfx_v8_0_rlc_fini(adev);
1253 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1255 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1256 gfx_v8_0_rlc_fini(adev);
1259 /* set up the cs buffer */
1260 dst_ptr = adev->gfx.rlc.cs_ptr;
1261 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1262 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1263 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1266 if ((adev->asic_type == CHIP_CARRIZO) ||
1267 (adev->asic_type == CHIP_STONEY)) {
1268 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1269 if (adev->gfx.rlc.cp_table_obj == NULL) {
1270 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1271 AMDGPU_GEM_DOMAIN_VRAM,
1272 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1274 &adev->gfx.rlc.cp_table_obj);
1276 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1281 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1282 if (unlikely(r != 0)) {
1283 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1286 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1287 &adev->gfx.rlc.cp_table_gpu_addr);
1289 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1290 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1293 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1295 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1299 cz_init_cp_jump_table(adev);
1301 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1302 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1308 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1312 if (adev->gfx.mec.hpd_eop_obj) {
1313 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1314 if (unlikely(r != 0))
1315 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1316 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1317 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1318 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1319 adev->gfx.mec.hpd_eop_obj = NULL;
1323 #define MEC_HPD_SIZE 2048
1325 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1331 * we assign only 1 pipe because all other pipes will
1334 adev->gfx.mec.num_mec = 1;
1335 adev->gfx.mec.num_pipe = 1;
1336 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1338 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1339 r = amdgpu_bo_create(adev,
1340 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1342 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1343 &adev->gfx.mec.hpd_eop_obj);
1345 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1350 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1351 if (unlikely(r != 0)) {
1352 gfx_v8_0_mec_fini(adev);
1355 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1356 &adev->gfx.mec.hpd_eop_gpu_addr);
1358 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1359 gfx_v8_0_mec_fini(adev);
1362 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1364 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1365 gfx_v8_0_mec_fini(adev);
1369 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1371 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1372 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1377 static const u32 vgpr_init_compute_shader[] =
1379 0x7e000209, 0x7e020208,
1380 0x7e040207, 0x7e060206,
1381 0x7e080205, 0x7e0a0204,
1382 0x7e0c0203, 0x7e0e0202,
1383 0x7e100201, 0x7e120200,
1384 0x7e140209, 0x7e160208,
1385 0x7e180207, 0x7e1a0206,
1386 0x7e1c0205, 0x7e1e0204,
1387 0x7e200203, 0x7e220202,
1388 0x7e240201, 0x7e260200,
1389 0x7e280209, 0x7e2a0208,
1390 0x7e2c0207, 0x7e2e0206,
1391 0x7e300205, 0x7e320204,
1392 0x7e340203, 0x7e360202,
1393 0x7e380201, 0x7e3a0200,
1394 0x7e3c0209, 0x7e3e0208,
1395 0x7e400207, 0x7e420206,
1396 0x7e440205, 0x7e460204,
1397 0x7e480203, 0x7e4a0202,
1398 0x7e4c0201, 0x7e4e0200,
1399 0x7e500209, 0x7e520208,
1400 0x7e540207, 0x7e560206,
1401 0x7e580205, 0x7e5a0204,
1402 0x7e5c0203, 0x7e5e0202,
1403 0x7e600201, 0x7e620200,
1404 0x7e640209, 0x7e660208,
1405 0x7e680207, 0x7e6a0206,
1406 0x7e6c0205, 0x7e6e0204,
1407 0x7e700203, 0x7e720202,
1408 0x7e740201, 0x7e760200,
1409 0x7e780209, 0x7e7a0208,
1410 0x7e7c0207, 0x7e7e0206,
1411 0xbf8a0000, 0xbf810000,
1414 static const u32 sgpr_init_compute_shader[] =
1416 0xbe8a0100, 0xbe8c0102,
1417 0xbe8e0104, 0xbe900106,
1418 0xbe920108, 0xbe940100,
1419 0xbe960102, 0xbe980104,
1420 0xbe9a0106, 0xbe9c0108,
1421 0xbe9e0100, 0xbea00102,
1422 0xbea20104, 0xbea40106,
1423 0xbea60108, 0xbea80100,
1424 0xbeaa0102, 0xbeac0104,
1425 0xbeae0106, 0xbeb00108,
1426 0xbeb20100, 0xbeb40102,
1427 0xbeb60104, 0xbeb80106,
1428 0xbeba0108, 0xbebc0100,
1429 0xbebe0102, 0xbec00104,
1430 0xbec20106, 0xbec40108,
1431 0xbec60100, 0xbec80102,
1432 0xbee60004, 0xbee70005,
1433 0xbeea0006, 0xbeeb0007,
1434 0xbee80008, 0xbee90009,
1435 0xbefc0000, 0xbf8a0000,
1436 0xbf810000, 0x00000000,
1439 static const u32 vgpr_init_regs[] =
1441 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1442 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1443 mmCOMPUTE_NUM_THREAD_X, 256*4,
1444 mmCOMPUTE_NUM_THREAD_Y, 1,
1445 mmCOMPUTE_NUM_THREAD_Z, 1,
1446 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1447 mmCOMPUTE_PGM_RSRC2, 20,
1448 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1449 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1450 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1451 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1452 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1453 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1454 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1455 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1456 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1457 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1460 static const u32 sgpr1_init_regs[] =
1462 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1463 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1464 mmCOMPUTE_NUM_THREAD_X, 256*5,
1465 mmCOMPUTE_NUM_THREAD_Y, 1,
1466 mmCOMPUTE_NUM_THREAD_Z, 1,
1467 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1468 mmCOMPUTE_PGM_RSRC2, 20,
1469 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1470 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1471 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1472 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1473 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1474 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1475 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1476 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1477 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1478 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1481 static const u32 sgpr2_init_regs[] =
1483 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1484 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1485 mmCOMPUTE_NUM_THREAD_X, 256*5,
1486 mmCOMPUTE_NUM_THREAD_Y, 1,
1487 mmCOMPUTE_NUM_THREAD_Z, 1,
1488 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1489 mmCOMPUTE_PGM_RSRC2, 20,
1490 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1491 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1492 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1493 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1494 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1495 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1496 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1497 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1498 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1499 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1502 static const u32 sec_ded_counter_registers[] =
1505 mmCPC_EDC_SCRATCH_CNT,
1506 mmCPC_EDC_UCODE_CNT,
1513 mmDC_EDC_CSINVOC_CNT,
1514 mmDC_EDC_RESTORE_CNT,
1520 mmSQC_ATC_EDC_GATCL1_CNT,
1526 mmTCP_ATC_EDC_GATCL1_CNT,
1531 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1533 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1534 struct amdgpu_ib ib;
1535 struct fence *f = NULL;
1538 unsigned total_size, vgpr_offset, sgpr_offset;
1541 /* only supported on CZ */
1542 if (adev->asic_type != CHIP_CARRIZO)
1545 /* bail if the compute ring is not ready */
1549 tmp = RREG32(mmGB_EDC_MODE);
1550 WREG32(mmGB_EDC_MODE, 0);
1553 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1555 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1557 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1558 total_size = ALIGN(total_size, 256);
1559 vgpr_offset = total_size;
1560 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1561 sgpr_offset = total_size;
1562 total_size += sizeof(sgpr_init_compute_shader);
1564 /* allocate an indirect buffer to put the commands in */
1565 memset(&ib, 0, sizeof(ib));
1566 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1568 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1572 /* load the compute shaders */
1573 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1574 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1576 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1577 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1579 /* init the ib length to 0 */
1583 /* write the register state for the compute dispatch */
1584 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1585 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1586 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1587 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1589 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1590 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1591 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1592 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1593 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1594 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1596 /* write dispatch packet */
1597 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1598 ib.ptr[ib.length_dw++] = 8; /* x */
1599 ib.ptr[ib.length_dw++] = 1; /* y */
1600 ib.ptr[ib.length_dw++] = 1; /* z */
1601 ib.ptr[ib.length_dw++] =
1602 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1604 /* write CS partial flush packet */
1605 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1606 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1609 /* write the register state for the compute dispatch */
1610 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1611 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1612 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1613 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1615 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1616 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1617 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1618 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1619 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1620 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1622 /* write dispatch packet */
1623 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1624 ib.ptr[ib.length_dw++] = 8; /* x */
1625 ib.ptr[ib.length_dw++] = 1; /* y */
1626 ib.ptr[ib.length_dw++] = 1; /* z */
1627 ib.ptr[ib.length_dw++] =
1628 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1630 /* write CS partial flush packet */
1631 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1632 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1635 /* write the register state for the compute dispatch */
1636 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1637 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1638 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1639 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1641 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1642 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1643 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1644 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1645 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1646 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1648 /* write dispatch packet */
1649 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1650 ib.ptr[ib.length_dw++] = 8; /* x */
1651 ib.ptr[ib.length_dw++] = 1; /* y */
1652 ib.ptr[ib.length_dw++] = 1; /* z */
1653 ib.ptr[ib.length_dw++] =
1654 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1656 /* write CS partial flush packet */
1657 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1658 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1660 /* shedule the ib on the ring */
1661 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1663 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1667 /* wait for the GPU to finish processing the IB */
1668 r = fence_wait(f, false);
1670 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1674 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1675 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1676 WREG32(mmGB_EDC_MODE, tmp);
1678 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1679 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1680 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1683 /* read back registers to clear the counters */
1684 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1685 RREG32(sec_ded_counter_registers[i]);
1688 amdgpu_ib_free(adev, &ib, NULL);
1694 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1697 u32 mc_shared_chmap, mc_arb_ramcfg;
1698 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1702 switch (adev->asic_type) {
1704 adev->gfx.config.max_shader_engines = 1;
1705 adev->gfx.config.max_tile_pipes = 2;
1706 adev->gfx.config.max_cu_per_sh = 6;
1707 adev->gfx.config.max_sh_per_se = 1;
1708 adev->gfx.config.max_backends_per_se = 2;
1709 adev->gfx.config.max_texture_channel_caches = 2;
1710 adev->gfx.config.max_gprs = 256;
1711 adev->gfx.config.max_gs_threads = 32;
1712 adev->gfx.config.max_hw_contexts = 8;
1714 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1715 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1716 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1717 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1718 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1721 adev->gfx.config.max_shader_engines = 4;
1722 adev->gfx.config.max_tile_pipes = 16;
1723 adev->gfx.config.max_cu_per_sh = 16;
1724 adev->gfx.config.max_sh_per_se = 1;
1725 adev->gfx.config.max_backends_per_se = 4;
1726 adev->gfx.config.max_texture_channel_caches = 16;
1727 adev->gfx.config.max_gprs = 256;
1728 adev->gfx.config.max_gs_threads = 32;
1729 adev->gfx.config.max_hw_contexts = 8;
1731 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1732 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1733 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1734 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1735 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1737 case CHIP_POLARIS11:
1738 ret = amdgpu_atombios_get_gfx_info(adev);
1741 adev->gfx.config.max_gprs = 256;
1742 adev->gfx.config.max_gs_threads = 32;
1743 adev->gfx.config.max_hw_contexts = 8;
1745 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1746 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1747 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1748 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1749 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1751 case CHIP_POLARIS10:
1752 ret = amdgpu_atombios_get_gfx_info(adev);
1755 adev->gfx.config.max_gprs = 256;
1756 adev->gfx.config.max_gs_threads = 32;
1757 adev->gfx.config.max_hw_contexts = 8;
1759 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1760 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1761 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1762 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1763 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1766 adev->gfx.config.max_shader_engines = 4;
1767 adev->gfx.config.max_tile_pipes = 8;
1768 adev->gfx.config.max_cu_per_sh = 8;
1769 adev->gfx.config.max_sh_per_se = 1;
1770 adev->gfx.config.max_backends_per_se = 2;
1771 adev->gfx.config.max_texture_channel_caches = 8;
1772 adev->gfx.config.max_gprs = 256;
1773 adev->gfx.config.max_gs_threads = 32;
1774 adev->gfx.config.max_hw_contexts = 8;
1776 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1777 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1778 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1779 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1780 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1783 adev->gfx.config.max_shader_engines = 1;
1784 adev->gfx.config.max_tile_pipes = 2;
1785 adev->gfx.config.max_sh_per_se = 1;
1786 adev->gfx.config.max_backends_per_se = 2;
1788 switch (adev->pdev->revision) {
1796 adev->gfx.config.max_cu_per_sh = 8;
1806 adev->gfx.config.max_cu_per_sh = 6;
1813 adev->gfx.config.max_cu_per_sh = 6;
1822 adev->gfx.config.max_cu_per_sh = 4;
1826 adev->gfx.config.max_texture_channel_caches = 2;
1827 adev->gfx.config.max_gprs = 256;
1828 adev->gfx.config.max_gs_threads = 32;
1829 adev->gfx.config.max_hw_contexts = 8;
1831 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1832 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1833 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1834 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1835 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1838 adev->gfx.config.max_shader_engines = 1;
1839 adev->gfx.config.max_tile_pipes = 2;
1840 adev->gfx.config.max_sh_per_se = 1;
1841 adev->gfx.config.max_backends_per_se = 1;
1843 switch (adev->pdev->revision) {
1850 adev->gfx.config.max_cu_per_sh = 3;
1856 adev->gfx.config.max_cu_per_sh = 2;
1860 adev->gfx.config.max_texture_channel_caches = 2;
1861 adev->gfx.config.max_gprs = 256;
1862 adev->gfx.config.max_gs_threads = 16;
1863 adev->gfx.config.max_hw_contexts = 8;
1865 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1866 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1867 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1868 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1869 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1872 adev->gfx.config.max_shader_engines = 2;
1873 adev->gfx.config.max_tile_pipes = 4;
1874 adev->gfx.config.max_cu_per_sh = 2;
1875 adev->gfx.config.max_sh_per_se = 1;
1876 adev->gfx.config.max_backends_per_se = 2;
1877 adev->gfx.config.max_texture_channel_caches = 4;
1878 adev->gfx.config.max_gprs = 256;
1879 adev->gfx.config.max_gs_threads = 32;
1880 adev->gfx.config.max_hw_contexts = 8;
1882 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1883 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1884 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1885 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1886 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1890 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1891 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1892 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1894 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1895 adev->gfx.config.mem_max_burst_length_bytes = 256;
1896 if (adev->flags & AMD_IS_APU) {
1897 /* Get memory bank mapping mode. */
1898 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1899 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1900 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1902 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1903 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1904 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1906 /* Validate settings in case only one DIMM installed. */
1907 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1908 dimm00_addr_map = 0;
1909 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1910 dimm01_addr_map = 0;
1911 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1912 dimm10_addr_map = 0;
1913 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1914 dimm11_addr_map = 0;
1916 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1917 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1918 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1919 adev->gfx.config.mem_row_size_in_kb = 2;
1921 adev->gfx.config.mem_row_size_in_kb = 1;
1923 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1924 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1925 if (adev->gfx.config.mem_row_size_in_kb > 4)
1926 adev->gfx.config.mem_row_size_in_kb = 4;
1929 adev->gfx.config.shader_engine_tile_size = 32;
1930 adev->gfx.config.num_gpus = 1;
1931 adev->gfx.config.multi_gpu_tile_size = 64;
1933 /* fix up row size */
1934 switch (adev->gfx.config.mem_row_size_in_kb) {
1937 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1940 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1943 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1946 adev->gfx.config.gb_addr_config = gb_addr_config;
1951 static int gfx_v8_0_sw_init(void *handle)
1954 struct amdgpu_ring *ring;
1955 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1958 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1962 /* Privileged reg */
1963 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1967 /* Privileged inst */
1968 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1972 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1974 gfx_v8_0_scratch_init(adev);
1976 r = gfx_v8_0_init_microcode(adev);
1978 DRM_ERROR("Failed to load gfx firmware!\n");
1982 r = gfx_v8_0_rlc_init(adev);
1984 DRM_ERROR("Failed to init rlc BOs!\n");
1988 r = gfx_v8_0_mec_init(adev);
1990 DRM_ERROR("Failed to init MEC BOs!\n");
1994 /* set up the gfx ring */
1995 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1996 ring = &adev->gfx.gfx_ring[i];
1997 ring->ring_obj = NULL;
1998 sprintf(ring->name, "gfx");
1999 /* no gfx doorbells on iceland */
2000 if (adev->asic_type != CHIP_TOPAZ) {
2001 ring->use_doorbell = true;
2002 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2005 r = amdgpu_ring_init(adev, ring, 1024,
2006 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2007 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2008 AMDGPU_RING_TYPE_GFX);
2013 /* set up the compute queues */
2014 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2017 /* max 32 queues per MEC */
2018 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2019 DRM_ERROR("Too many (%d) compute rings!\n", i);
2022 ring = &adev->gfx.compute_ring[i];
2023 ring->ring_obj = NULL;
2024 ring->use_doorbell = true;
2025 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2026 ring->me = 1; /* first MEC */
2028 ring->queue = i % 8;
2029 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2030 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2031 /* type-2 packets are deprecated on MEC, use type-3 instead */
2032 r = amdgpu_ring_init(adev, ring, 1024,
2033 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2034 &adev->gfx.eop_irq, irq_type,
2035 AMDGPU_RING_TYPE_COMPUTE);
2040 /* reserve GDS, GWS and OA resource for gfx */
2041 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2042 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2043 &adev->gds.gds_gfx_bo, NULL, NULL);
2047 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2048 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2049 &adev->gds.gws_gfx_bo, NULL, NULL);
2053 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2054 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2055 &adev->gds.oa_gfx_bo, NULL, NULL);
2059 adev->gfx.ce_ram_size = 0x8000;
2061 r = gfx_v8_0_gpu_early_init(adev);
2068 static int gfx_v8_0_sw_fini(void *handle)
2071 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2073 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2074 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2075 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2077 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2078 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2079 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2080 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2082 gfx_v8_0_mec_fini(adev);
2083 gfx_v8_0_rlc_fini(adev);
2084 gfx_v8_0_free_microcode(adev);
2089 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2091 uint32_t *modearray, *mod2array;
2092 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2093 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2096 modearray = adev->gfx.config.tile_mode_array;
2097 mod2array = adev->gfx.config.macrotile_mode_array;
2099 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2100 modearray[reg_offset] = 0;
2102 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2103 mod2array[reg_offset] = 0;
2105 switch (adev->asic_type) {
2107 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2108 PIPE_CONFIG(ADDR_SURF_P2) |
2109 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2110 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2111 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2112 PIPE_CONFIG(ADDR_SURF_P2) |
2113 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2114 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2115 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2116 PIPE_CONFIG(ADDR_SURF_P2) |
2117 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2118 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2119 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2120 PIPE_CONFIG(ADDR_SURF_P2) |
2121 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2122 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2123 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124 PIPE_CONFIG(ADDR_SURF_P2) |
2125 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2126 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2127 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2128 PIPE_CONFIG(ADDR_SURF_P2) |
2129 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2131 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2132 PIPE_CONFIG(ADDR_SURF_P2) |
2133 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2134 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2135 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2136 PIPE_CONFIG(ADDR_SURF_P2));
2137 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2138 PIPE_CONFIG(ADDR_SURF_P2) |
2139 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2141 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2142 PIPE_CONFIG(ADDR_SURF_P2) |
2143 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2144 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2145 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2146 PIPE_CONFIG(ADDR_SURF_P2) |
2147 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2148 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2149 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2150 PIPE_CONFIG(ADDR_SURF_P2) |
2151 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2152 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2153 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2154 PIPE_CONFIG(ADDR_SURF_P2) |
2155 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2156 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2157 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2158 PIPE_CONFIG(ADDR_SURF_P2) |
2159 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2160 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2161 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2162 PIPE_CONFIG(ADDR_SURF_P2) |
2163 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2164 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2165 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2166 PIPE_CONFIG(ADDR_SURF_P2) |
2167 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2168 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2169 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2170 PIPE_CONFIG(ADDR_SURF_P2) |
2171 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2172 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2173 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2174 PIPE_CONFIG(ADDR_SURF_P2) |
2175 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2176 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2177 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2178 PIPE_CONFIG(ADDR_SURF_P2) |
2179 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2181 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2182 PIPE_CONFIG(ADDR_SURF_P2) |
2183 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2185 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2186 PIPE_CONFIG(ADDR_SURF_P2) |
2187 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2189 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2190 PIPE_CONFIG(ADDR_SURF_P2) |
2191 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2192 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2193 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2194 PIPE_CONFIG(ADDR_SURF_P2) |
2195 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2197 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2198 PIPE_CONFIG(ADDR_SURF_P2) |
2199 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2200 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2201 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2202 PIPE_CONFIG(ADDR_SURF_P2) |
2203 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2205 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2206 PIPE_CONFIG(ADDR_SURF_P2) |
2207 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2210 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2211 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2212 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2213 NUM_BANKS(ADDR_SURF_8_BANK));
2214 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2215 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2216 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2217 NUM_BANKS(ADDR_SURF_8_BANK));
2218 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2219 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2220 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2221 NUM_BANKS(ADDR_SURF_8_BANK));
2222 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2223 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2224 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2225 NUM_BANKS(ADDR_SURF_8_BANK));
2226 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2227 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2228 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2229 NUM_BANKS(ADDR_SURF_8_BANK));
2230 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2233 NUM_BANKS(ADDR_SURF_8_BANK));
2234 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2235 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2236 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2237 NUM_BANKS(ADDR_SURF_8_BANK));
2238 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2239 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2240 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2241 NUM_BANKS(ADDR_SURF_16_BANK));
2242 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2245 NUM_BANKS(ADDR_SURF_16_BANK));
2246 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2247 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2248 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2249 NUM_BANKS(ADDR_SURF_16_BANK));
2250 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2251 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2252 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2253 NUM_BANKS(ADDR_SURF_16_BANK));
2254 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2257 NUM_BANKS(ADDR_SURF_16_BANK));
2258 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2261 NUM_BANKS(ADDR_SURF_16_BANK));
2262 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2263 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2264 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2265 NUM_BANKS(ADDR_SURF_8_BANK));
2267 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2268 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2270 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2272 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2273 if (reg_offset != 7)
2274 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2278 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2279 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2280 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2281 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2282 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2283 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2284 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2285 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2286 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2287 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2289 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2290 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2293 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2294 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2297 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2298 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2299 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2301 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2302 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2303 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2305 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2306 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2307 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2308 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2309 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2311 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2312 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2313 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2314 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2315 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2316 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2318 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2321 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2322 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2323 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2324 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2325 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2326 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2328 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2329 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2333 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2336 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2337 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2338 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2340 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2341 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2344 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2345 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2346 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2347 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2348 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2349 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2352 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2353 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2355 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2356 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2359 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2360 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2361 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2364 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2365 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2368 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2369 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2371 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2372 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2373 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2375 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2376 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2377 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2381 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2385 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2387 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2389 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2391 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2392 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2393 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2395 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2396 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2397 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2398 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2399 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2401 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2402 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2403 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2404 NUM_BANKS(ADDR_SURF_8_BANK));
2405 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2406 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2407 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2408 NUM_BANKS(ADDR_SURF_8_BANK));
2409 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2412 NUM_BANKS(ADDR_SURF_8_BANK));
2413 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2415 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2416 NUM_BANKS(ADDR_SURF_8_BANK));
2417 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2418 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2419 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2420 NUM_BANKS(ADDR_SURF_8_BANK));
2421 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424 NUM_BANKS(ADDR_SURF_8_BANK));
2425 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2427 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2428 NUM_BANKS(ADDR_SURF_8_BANK));
2429 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2431 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2432 NUM_BANKS(ADDR_SURF_8_BANK));
2433 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2435 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2436 NUM_BANKS(ADDR_SURF_8_BANK));
2437 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2439 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2440 NUM_BANKS(ADDR_SURF_8_BANK));
2441 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2443 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2444 NUM_BANKS(ADDR_SURF_8_BANK));
2445 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2447 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448 NUM_BANKS(ADDR_SURF_8_BANK));
2449 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2451 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452 NUM_BANKS(ADDR_SURF_8_BANK));
2453 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456 NUM_BANKS(ADDR_SURF_4_BANK));
2458 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2459 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2461 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2462 if (reg_offset != 7)
2463 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2467 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2468 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2470 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2471 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2472 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2474 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2475 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2476 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2478 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2479 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2482 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2483 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2486 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2487 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2488 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2490 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2491 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2492 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2494 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2495 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2496 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2497 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2498 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2500 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2501 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2502 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2503 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2504 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2505 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2506 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2508 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2509 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2510 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2512 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2513 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2514 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2515 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2516 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2517 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2518 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2520 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2521 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2524 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2525 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2526 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2529 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2530 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2532 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2533 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2534 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2535 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2536 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2537 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2538 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2540 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2541 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2542 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2544 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2545 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2548 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2549 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2550 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2552 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2553 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2554 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2556 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2557 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2558 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2559 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2560 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2561 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2562 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2564 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2565 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2566 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2568 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2570 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2574 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2576 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2577 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2578 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2580 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2581 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2582 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2584 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2585 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2586 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2587 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2588 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2590 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2592 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2593 NUM_BANKS(ADDR_SURF_16_BANK));
2594 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2595 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2596 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2597 NUM_BANKS(ADDR_SURF_16_BANK));
2598 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2600 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2601 NUM_BANKS(ADDR_SURF_16_BANK));
2602 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2604 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2605 NUM_BANKS(ADDR_SURF_16_BANK));
2606 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2608 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2609 NUM_BANKS(ADDR_SURF_16_BANK));
2610 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2612 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2613 NUM_BANKS(ADDR_SURF_16_BANK));
2614 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2616 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2617 NUM_BANKS(ADDR_SURF_16_BANK));
2618 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2620 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2621 NUM_BANKS(ADDR_SURF_16_BANK));
2622 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2624 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2625 NUM_BANKS(ADDR_SURF_16_BANK));
2626 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2628 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2629 NUM_BANKS(ADDR_SURF_16_BANK));
2630 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2632 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2633 NUM_BANKS(ADDR_SURF_16_BANK));
2634 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2636 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2637 NUM_BANKS(ADDR_SURF_8_BANK));
2638 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2640 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2641 NUM_BANKS(ADDR_SURF_4_BANK));
2642 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2645 NUM_BANKS(ADDR_SURF_4_BANK));
2647 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2648 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2650 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2651 if (reg_offset != 7)
2652 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2655 case CHIP_POLARIS11:
2656 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2659 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2660 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2662 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2663 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2664 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2667 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2668 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2671 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2672 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2675 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2676 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2677 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2679 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2680 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2681 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2682 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2683 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2684 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2685 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2688 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2689 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2690 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2691 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2693 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2694 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2697 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2698 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2699 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2701 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2702 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2703 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2705 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2706 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2707 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2710 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2713 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2714 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2715 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2717 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2718 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2719 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2721 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2722 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2726 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2727 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2730 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2734 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2735 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2737 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2738 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2739 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2742 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2743 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2745 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2746 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2747 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2749 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2750 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2751 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2754 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2755 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2756 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2757 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2758 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2759 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2760 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2761 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2762 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2763 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2765 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2766 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2767 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2768 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2769 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2770 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2771 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2773 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2774 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2775 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2777 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2779 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2781 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2782 NUM_BANKS(ADDR_SURF_16_BANK));
2784 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2785 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2786 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2787 NUM_BANKS(ADDR_SURF_16_BANK));
2789 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2791 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2792 NUM_BANKS(ADDR_SURF_16_BANK));
2794 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2796 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2797 NUM_BANKS(ADDR_SURF_16_BANK));
2799 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2800 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2801 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2802 NUM_BANKS(ADDR_SURF_16_BANK));
2804 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2807 NUM_BANKS(ADDR_SURF_16_BANK));
2809 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2810 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2811 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2812 NUM_BANKS(ADDR_SURF_16_BANK));
2814 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2815 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2816 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2817 NUM_BANKS(ADDR_SURF_16_BANK));
2819 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2820 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2821 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2822 NUM_BANKS(ADDR_SURF_16_BANK));
2824 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2826 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2827 NUM_BANKS(ADDR_SURF_16_BANK));
2829 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2831 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2832 NUM_BANKS(ADDR_SURF_16_BANK));
2834 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2836 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2837 NUM_BANKS(ADDR_SURF_16_BANK));
2839 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2841 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2842 NUM_BANKS(ADDR_SURF_8_BANK));
2844 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2847 NUM_BANKS(ADDR_SURF_4_BANK));
2849 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2850 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2852 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2853 if (reg_offset != 7)
2854 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2857 case CHIP_POLARIS10:
2858 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2859 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2860 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2861 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2862 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2863 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2864 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2865 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2866 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2867 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2868 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2869 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2870 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2871 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2872 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2873 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2874 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2875 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2876 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2877 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2878 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2879 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2880 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2881 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2882 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2883 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2884 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2885 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2886 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2887 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2888 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2889 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2890 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2891 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2892 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2893 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2894 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2895 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2896 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2899 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2900 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2901 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2903 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2904 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2905 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2906 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2908 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2909 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2911 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2912 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2915 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2917 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2919 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2920 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2921 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2924 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2925 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2927 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2928 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2929 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2931 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2932 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2933 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2934 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2936 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2937 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2940 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2941 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2944 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2945 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2946 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2948 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2949 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2950 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2951 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2952 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2953 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2954 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2955 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2956 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2957 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2958 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2959 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2960 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2961 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2962 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2963 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2964 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2965 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2966 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2967 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2968 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2969 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2970 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2971 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2972 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2973 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2974 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2975 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2976 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2977 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2978 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2979 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2981 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2982 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2983 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2984 NUM_BANKS(ADDR_SURF_16_BANK));
2986 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2987 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2988 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2989 NUM_BANKS(ADDR_SURF_16_BANK));
2991 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2993 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994 NUM_BANKS(ADDR_SURF_16_BANK));
2996 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2997 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2998 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999 NUM_BANKS(ADDR_SURF_16_BANK));
3001 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3002 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3003 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3004 NUM_BANKS(ADDR_SURF_16_BANK));
3006 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3007 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3008 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3009 NUM_BANKS(ADDR_SURF_16_BANK));
3011 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3012 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3013 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3014 NUM_BANKS(ADDR_SURF_16_BANK));
3016 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3017 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3018 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3019 NUM_BANKS(ADDR_SURF_16_BANK));
3021 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3023 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3024 NUM_BANKS(ADDR_SURF_16_BANK));
3026 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3027 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3028 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3029 NUM_BANKS(ADDR_SURF_16_BANK));
3031 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3033 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3034 NUM_BANKS(ADDR_SURF_16_BANK));
3036 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3038 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3039 NUM_BANKS(ADDR_SURF_8_BANK));
3041 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3043 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3044 NUM_BANKS(ADDR_SURF_4_BANK));
3046 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3048 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3049 NUM_BANKS(ADDR_SURF_4_BANK));
3051 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3052 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3054 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3055 if (reg_offset != 7)
3056 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3060 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3061 PIPE_CONFIG(ADDR_SURF_P2) |
3062 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3063 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3064 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3065 PIPE_CONFIG(ADDR_SURF_P2) |
3066 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3067 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3068 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3069 PIPE_CONFIG(ADDR_SURF_P2) |
3070 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3071 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3072 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3073 PIPE_CONFIG(ADDR_SURF_P2) |
3074 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3075 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3076 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3077 PIPE_CONFIG(ADDR_SURF_P2) |
3078 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3079 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3080 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3081 PIPE_CONFIG(ADDR_SURF_P2) |
3082 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3083 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3084 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3085 PIPE_CONFIG(ADDR_SURF_P2) |
3086 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3087 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3088 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3089 PIPE_CONFIG(ADDR_SURF_P2));
3090 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3091 PIPE_CONFIG(ADDR_SURF_P2) |
3092 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3094 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3095 PIPE_CONFIG(ADDR_SURF_P2) |
3096 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3097 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3098 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3099 PIPE_CONFIG(ADDR_SURF_P2) |
3100 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3102 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3103 PIPE_CONFIG(ADDR_SURF_P2) |
3104 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3106 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3107 PIPE_CONFIG(ADDR_SURF_P2) |
3108 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3109 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3110 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3111 PIPE_CONFIG(ADDR_SURF_P2) |
3112 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3113 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3114 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3115 PIPE_CONFIG(ADDR_SURF_P2) |
3116 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3117 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3118 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3119 PIPE_CONFIG(ADDR_SURF_P2) |
3120 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3121 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3122 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3123 PIPE_CONFIG(ADDR_SURF_P2) |
3124 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3125 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3126 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3127 PIPE_CONFIG(ADDR_SURF_P2) |
3128 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3129 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3130 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3131 PIPE_CONFIG(ADDR_SURF_P2) |
3132 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3134 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3135 PIPE_CONFIG(ADDR_SURF_P2) |
3136 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3137 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3138 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3139 PIPE_CONFIG(ADDR_SURF_P2) |
3140 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3141 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3142 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3143 PIPE_CONFIG(ADDR_SURF_P2) |
3144 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3145 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3146 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3147 PIPE_CONFIG(ADDR_SURF_P2) |
3148 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3149 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3150 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3151 PIPE_CONFIG(ADDR_SURF_P2) |
3152 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3153 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3154 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3155 PIPE_CONFIG(ADDR_SURF_P2) |
3156 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3157 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3158 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3159 PIPE_CONFIG(ADDR_SURF_P2) |
3160 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3163 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3164 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3165 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3166 NUM_BANKS(ADDR_SURF_8_BANK));
3167 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3170 NUM_BANKS(ADDR_SURF_8_BANK));
3171 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3172 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3173 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3174 NUM_BANKS(ADDR_SURF_8_BANK));
3175 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3178 NUM_BANKS(ADDR_SURF_8_BANK));
3179 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3182 NUM_BANKS(ADDR_SURF_8_BANK));
3183 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3184 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3185 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3186 NUM_BANKS(ADDR_SURF_8_BANK));
3187 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3188 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3189 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3190 NUM_BANKS(ADDR_SURF_8_BANK));
3191 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3194 NUM_BANKS(ADDR_SURF_16_BANK));
3195 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3196 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3197 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3198 NUM_BANKS(ADDR_SURF_16_BANK));
3199 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3200 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3201 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3202 NUM_BANKS(ADDR_SURF_16_BANK));
3203 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3204 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3205 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3206 NUM_BANKS(ADDR_SURF_16_BANK));
3207 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3208 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3209 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3210 NUM_BANKS(ADDR_SURF_16_BANK));
3211 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3212 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3213 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3214 NUM_BANKS(ADDR_SURF_16_BANK));
3215 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3216 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3217 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3218 NUM_BANKS(ADDR_SURF_8_BANK));
3220 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3221 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3223 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3225 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3226 if (reg_offset != 7)
3227 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3232 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3236 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3237 PIPE_CONFIG(ADDR_SURF_P2) |
3238 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3239 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3240 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3241 PIPE_CONFIG(ADDR_SURF_P2) |
3242 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3243 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3244 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3245 PIPE_CONFIG(ADDR_SURF_P2) |
3246 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3247 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3248 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3249 PIPE_CONFIG(ADDR_SURF_P2) |
3250 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3251 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3252 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3253 PIPE_CONFIG(ADDR_SURF_P2) |
3254 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3255 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3256 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3257 PIPE_CONFIG(ADDR_SURF_P2) |
3258 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3259 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3260 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261 PIPE_CONFIG(ADDR_SURF_P2) |
3262 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3263 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3264 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3265 PIPE_CONFIG(ADDR_SURF_P2));
3266 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3267 PIPE_CONFIG(ADDR_SURF_P2) |
3268 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3271 PIPE_CONFIG(ADDR_SURF_P2) |
3272 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3274 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3275 PIPE_CONFIG(ADDR_SURF_P2) |
3276 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3278 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3279 PIPE_CONFIG(ADDR_SURF_P2) |
3280 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3282 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3283 PIPE_CONFIG(ADDR_SURF_P2) |
3284 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3286 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3287 PIPE_CONFIG(ADDR_SURF_P2) |
3288 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3290 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3291 PIPE_CONFIG(ADDR_SURF_P2) |
3292 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3294 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3295 PIPE_CONFIG(ADDR_SURF_P2) |
3296 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3298 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3299 PIPE_CONFIG(ADDR_SURF_P2) |
3300 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3302 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3303 PIPE_CONFIG(ADDR_SURF_P2) |
3304 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3306 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3307 PIPE_CONFIG(ADDR_SURF_P2) |
3308 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3310 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3311 PIPE_CONFIG(ADDR_SURF_P2) |
3312 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3314 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3315 PIPE_CONFIG(ADDR_SURF_P2) |
3316 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3318 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3319 PIPE_CONFIG(ADDR_SURF_P2) |
3320 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3322 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3323 PIPE_CONFIG(ADDR_SURF_P2) |
3324 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3326 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3327 PIPE_CONFIG(ADDR_SURF_P2) |
3328 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3330 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3331 PIPE_CONFIG(ADDR_SURF_P2) |
3332 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3334 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3335 PIPE_CONFIG(ADDR_SURF_P2) |
3336 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3339 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3342 NUM_BANKS(ADDR_SURF_8_BANK));
3343 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3346 NUM_BANKS(ADDR_SURF_8_BANK));
3347 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3350 NUM_BANKS(ADDR_SURF_8_BANK));
3351 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3354 NUM_BANKS(ADDR_SURF_8_BANK));
3355 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3358 NUM_BANKS(ADDR_SURF_8_BANK));
3359 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3362 NUM_BANKS(ADDR_SURF_8_BANK));
3363 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3366 NUM_BANKS(ADDR_SURF_8_BANK));
3367 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3370 NUM_BANKS(ADDR_SURF_16_BANK));
3371 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3374 NUM_BANKS(ADDR_SURF_16_BANK));
3375 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3378 NUM_BANKS(ADDR_SURF_16_BANK));
3379 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3382 NUM_BANKS(ADDR_SURF_16_BANK));
3383 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3386 NUM_BANKS(ADDR_SURF_16_BANK));
3387 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3390 NUM_BANKS(ADDR_SURF_16_BANK));
3391 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3394 NUM_BANKS(ADDR_SURF_8_BANK));
3396 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3397 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3399 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3401 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3402 if (reg_offset != 7)
3403 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3409 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3410 u32 se_num, u32 sh_num, u32 instance)
3414 if (instance == 0xffffffff)
3415 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3417 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3419 if (se_num == 0xffffffff)
3420 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3422 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3424 if (sh_num == 0xffffffff)
3425 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3427 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3429 WREG32(mmGRBM_GFX_INDEX, data);
3432 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3434 return (u32)((1ULL << bit_width) - 1);
3437 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3441 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3442 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3444 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3446 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3447 adev->gfx.config.max_sh_per_se);
3449 return (~data) & mask;
3453 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3455 switch (adev->asic_type) {
3457 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3458 RB_XSEL2(1) | PKR_MAP(2) |
3459 PKR_XSEL(1) | PKR_YSEL(1) |
3460 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3461 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3465 case CHIP_POLARIS10:
3466 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3467 SE_XSEL(1) | SE_YSEL(1);
3468 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3473 *rconf |= RB_MAP_PKR0(2);
3476 case CHIP_POLARIS11:
3477 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3478 SE_XSEL(1) | SE_YSEL(1);
3486 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3492 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3493 u32 raster_config, u32 raster_config_1,
3494 unsigned rb_mask, unsigned num_rb)
3496 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3497 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3498 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3499 unsigned rb_per_se = num_rb / num_se;
3500 unsigned se_mask[4];
3503 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3504 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3505 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3506 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3508 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3509 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3510 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3512 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3513 (!se_mask[2] && !se_mask[3]))) {
3514 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3516 if (!se_mask[0] && !se_mask[1]) {
3518 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3521 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3525 for (se = 0; se < num_se; se++) {
3526 unsigned raster_config_se = raster_config;
3527 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3528 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3529 int idx = (se / 2) * 2;
3531 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3532 raster_config_se &= ~SE_MAP_MASK;
3534 if (!se_mask[idx]) {
3535 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3537 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3541 pkr0_mask &= rb_mask;
3542 pkr1_mask &= rb_mask;
3543 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3544 raster_config_se &= ~PKR_MAP_MASK;
3547 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3549 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3553 if (rb_per_se >= 2) {
3554 unsigned rb0_mask = 1 << (se * rb_per_se);
3555 unsigned rb1_mask = rb0_mask << 1;
3557 rb0_mask &= rb_mask;
3558 rb1_mask &= rb_mask;
3559 if (!rb0_mask || !rb1_mask) {
3560 raster_config_se &= ~RB_MAP_PKR0_MASK;
3564 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3567 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3571 if (rb_per_se > 2) {
3572 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3573 rb1_mask = rb0_mask << 1;
3574 rb0_mask &= rb_mask;
3575 rb1_mask &= rb_mask;
3576 if (!rb0_mask || !rb1_mask) {
3577 raster_config_se &= ~RB_MAP_PKR1_MASK;
3581 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3584 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3590 /* GRBM_GFX_INDEX has a different offset on VI */
3591 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3592 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3593 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3596 /* GRBM_GFX_INDEX has a different offset on VI */
3597 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3600 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3604 u32 raster_config = 0, raster_config_1 = 0;
3606 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3607 adev->gfx.config.max_sh_per_se;
3608 unsigned num_rb_pipes;
3610 mutex_lock(&adev->grbm_idx_mutex);
3611 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3612 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3613 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3614 data = gfx_v8_0_get_rb_active_bitmap(adev);
3615 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3616 rb_bitmap_width_per_sh);
3619 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3621 adev->gfx.config.backend_enable_mask = active_rbs;
3622 adev->gfx.config.num_rbs = hweight32(active_rbs);
3624 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3625 adev->gfx.config.max_shader_engines, 16);
3627 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3629 if (!adev->gfx.config.backend_enable_mask ||
3630 adev->gfx.config.num_rbs >= num_rb_pipes) {
3631 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3632 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3634 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3635 adev->gfx.config.backend_enable_mask,
3639 mutex_unlock(&adev->grbm_idx_mutex);
3643 * gfx_v8_0_init_compute_vmid - gart enable
3645 * @rdev: amdgpu_device pointer
3647 * Initialize compute vmid sh_mem registers
3650 #define DEFAULT_SH_MEM_BASES (0x6000)
3651 #define FIRST_COMPUTE_VMID (8)
3652 #define LAST_COMPUTE_VMID (16)
3653 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3656 uint32_t sh_mem_config;
3657 uint32_t sh_mem_bases;
3660 * Configure apertures:
3661 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3662 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3663 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3665 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3667 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3668 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3669 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3670 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3671 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3672 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3674 mutex_lock(&adev->srbm_mutex);
3675 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3676 vi_srbm_select(adev, 0, 0, 0, i);
3677 /* CP and shaders */
3678 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3679 WREG32(mmSH_MEM_APE1_BASE, 1);
3680 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3681 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3683 vi_srbm_select(adev, 0, 0, 0, 0);
3684 mutex_unlock(&adev->srbm_mutex);
3687 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3692 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3693 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3694 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3695 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3697 gfx_v8_0_tiling_mode_table_init(adev);
3698 gfx_v8_0_setup_rb(adev);
3699 gfx_v8_0_get_cu_info(adev);
3701 /* XXX SH_MEM regs */
3702 /* where to put LDS, scratch, GPUVM in FSA64 space */
3703 mutex_lock(&adev->srbm_mutex);
3704 for (i = 0; i < 16; i++) {
3705 vi_srbm_select(adev, 0, 0, 0, i);
3706 /* CP and shaders */
3708 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3709 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3710 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3711 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3712 WREG32(mmSH_MEM_CONFIG, tmp);
3714 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3715 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3716 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3717 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3718 WREG32(mmSH_MEM_CONFIG, tmp);
3721 WREG32(mmSH_MEM_APE1_BASE, 1);
3722 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3723 WREG32(mmSH_MEM_BASES, 0);
3725 vi_srbm_select(adev, 0, 0, 0, 0);
3726 mutex_unlock(&adev->srbm_mutex);
3728 gfx_v8_0_init_compute_vmid(adev);
3730 mutex_lock(&adev->grbm_idx_mutex);
3732 * making sure that the following register writes will be broadcasted
3733 * to all the shaders
3735 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3737 WREG32(mmPA_SC_FIFO_SIZE,
3738 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3739 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3740 (adev->gfx.config.sc_prim_fifo_size_backend <<
3741 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3742 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3743 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3744 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3745 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3746 mutex_unlock(&adev->grbm_idx_mutex);
3750 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3755 mutex_lock(&adev->grbm_idx_mutex);
3756 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3757 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3758 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3759 for (k = 0; k < adev->usec_timeout; k++) {
3760 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3766 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3767 mutex_unlock(&adev->grbm_idx_mutex);
3769 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3770 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3771 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3772 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3773 for (k = 0; k < adev->usec_timeout; k++) {
3774 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3780 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3783 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3785 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3786 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3787 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3788 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3790 WREG32(mmCP_INT_CNTL_RING0, tmp);
3793 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3796 WREG32(mmRLC_CSIB_ADDR_HI,
3797 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3798 WREG32(mmRLC_CSIB_ADDR_LO,
3799 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3800 WREG32(mmRLC_CSIB_LENGTH,
3801 adev->gfx.rlc.clear_state_size);
3804 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3807 int *unique_indices,
3810 int *ind_start_offsets,
3815 bool new_entry = true;
3817 for (; ind_offset < list_size; ind_offset++) {
3821 ind_start_offsets[*offset_count] = ind_offset;
3822 *offset_count = *offset_count + 1;
3823 BUG_ON(*offset_count >= max_offset);
3826 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3833 /* look for the matching indice */
3835 indices < *indices_count;
3837 if (unique_indices[indices] ==
3838 register_list_format[ind_offset])
3842 if (indices >= *indices_count) {
3843 unique_indices[*indices_count] =
3844 register_list_format[ind_offset];
3845 indices = *indices_count;
3846 *indices_count = *indices_count + 1;
3847 BUG_ON(*indices_count >= max_indices);
3850 register_list_format[ind_offset] = indices;
3854 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3857 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3858 int indices_count = 0;
3859 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3860 int offset_count = 0;
3863 unsigned int *register_list_format =
3864 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3865 if (register_list_format == NULL)
3867 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3868 adev->gfx.rlc.reg_list_format_size_bytes);
3870 gfx_v8_0_parse_ind_reg_list(register_list_format,
3871 RLC_FormatDirectRegListLength,
3872 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3875 sizeof(unique_indices) / sizeof(int),
3876 indirect_start_offsets,
3878 sizeof(indirect_start_offsets)/sizeof(int));
3880 /* save and restore list */
3881 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3883 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3884 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3885 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3888 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3889 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3890 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3892 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3893 list_size = list_size >> 1;
3894 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3895 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3897 /* starting offsets starts */
3898 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3899 adev->gfx.rlc.starting_offsets_start);
3900 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3901 WREG32(mmRLC_GPM_SCRATCH_DATA,
3902 indirect_start_offsets[i]);
3904 /* unique indices */
3905 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3906 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3907 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3908 if (unique_indices[i] != 0) {
3909 amdgpu_mm_wreg(adev, temp + i,
3910 unique_indices[i] & 0x3FFFF, false);
3911 amdgpu_mm_wreg(adev, data + i,
3912 unique_indices[i] >> 20, false);
3915 kfree(register_list_format);
3920 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3922 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3925 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3929 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3930 AMD_PG_SUPPORT_GFX_SMG |
3931 AMD_PG_SUPPORT_GFX_DMG)) {
3932 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3934 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3935 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3936 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3937 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3938 WREG32(mmRLC_PG_DELAY, data);
3940 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3941 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3945 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3948 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3951 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3954 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3957 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3959 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
3962 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3964 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3965 AMD_PG_SUPPORT_GFX_SMG |
3966 AMD_PG_SUPPORT_GFX_DMG |
3968 AMD_PG_SUPPORT_GDS |
3969 AMD_PG_SUPPORT_RLC_SMU_HS)) {
3970 gfx_v8_0_init_csb(adev);
3971 gfx_v8_0_init_save_restore_list(adev);
3972 gfx_v8_0_enable_save_restore_machine(adev);
3974 if ((adev->asic_type == CHIP_CARRIZO) ||
3975 (adev->asic_type == CHIP_STONEY)) {
3976 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3977 gfx_v8_0_init_power_gating(adev);
3978 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3979 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3980 cz_enable_sck_slow_down_on_power_up(adev, true);
3981 cz_enable_sck_slow_down_on_power_down(adev, true);
3983 cz_enable_sck_slow_down_on_power_up(adev, false);
3984 cz_enable_sck_slow_down_on_power_down(adev, false);
3986 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3987 cz_enable_cp_power_gating(adev, true);
3989 cz_enable_cp_power_gating(adev, false);
3990 } else if (adev->asic_type == CHIP_POLARIS11) {
3991 gfx_v8_0_init_power_gating(adev);
3996 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3998 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4000 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4001 gfx_v8_0_wait_for_rlc_serdes(adev);
4004 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4006 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4009 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4013 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4015 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4017 /* carrizo do enable cp interrupt after cp inited */
4018 if (!(adev->flags & AMD_IS_APU))
4019 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4024 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4026 const struct rlc_firmware_header_v2_0 *hdr;
4027 const __le32 *fw_data;
4028 unsigned i, fw_size;
4030 if (!adev->gfx.rlc_fw)
4033 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4034 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4036 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4037 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4038 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4040 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4041 for (i = 0; i < fw_size; i++)
4042 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4043 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4048 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4053 gfx_v8_0_rlc_stop(adev);
4056 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4057 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4058 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4059 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4060 if (adev->asic_type == CHIP_POLARIS11 ||
4061 adev->asic_type == CHIP_POLARIS10) {
4062 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4064 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4068 WREG32(mmRLC_PG_CNTL, 0);
4070 gfx_v8_0_rlc_reset(adev);
4071 gfx_v8_0_init_pg(adev);
4073 if (!adev->pp_enabled) {
4074 if (!adev->firmware.smu_load) {
4075 /* legacy rlc firmware loading */
4076 r = gfx_v8_0_rlc_load_microcode(adev);
4080 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4081 AMDGPU_UCODE_ID_RLC_G);
4087 gfx_v8_0_rlc_start(adev);
4092 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4095 u32 tmp = RREG32(mmCP_ME_CNTL);
4098 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4099 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4100 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4102 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4103 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4104 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4105 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4106 adev->gfx.gfx_ring[i].ready = false;
4108 WREG32(mmCP_ME_CNTL, tmp);
4112 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4114 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4115 const struct gfx_firmware_header_v1_0 *ce_hdr;
4116 const struct gfx_firmware_header_v1_0 *me_hdr;
4117 const __le32 *fw_data;
4118 unsigned i, fw_size;
4120 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4123 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4124 adev->gfx.pfp_fw->data;
4125 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4126 adev->gfx.ce_fw->data;
4127 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4128 adev->gfx.me_fw->data;
4130 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4131 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4132 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4134 gfx_v8_0_cp_gfx_enable(adev, false);
4137 fw_data = (const __le32 *)
4138 (adev->gfx.pfp_fw->data +
4139 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4140 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4141 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4142 for (i = 0; i < fw_size; i++)
4143 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4144 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4147 fw_data = (const __le32 *)
4148 (adev->gfx.ce_fw->data +
4149 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4150 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4151 WREG32(mmCP_CE_UCODE_ADDR, 0);
4152 for (i = 0; i < fw_size; i++)
4153 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4154 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4157 fw_data = (const __le32 *)
4158 (adev->gfx.me_fw->data +
4159 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4160 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4161 WREG32(mmCP_ME_RAM_WADDR, 0);
4162 for (i = 0; i < fw_size; i++)
4163 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4164 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4169 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4172 const struct cs_section_def *sect = NULL;
4173 const struct cs_extent_def *ext = NULL;
4175 /* begin clear state */
4177 /* context control state */
4180 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4181 for (ext = sect->section; ext->extent != NULL; ++ext) {
4182 if (sect->id == SECT_CONTEXT)
4183 count += 2 + ext->reg_count;
4188 /* pa_sc_raster_config/pa_sc_raster_config1 */
4190 /* end clear state */
4198 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4200 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4201 const struct cs_section_def *sect = NULL;
4202 const struct cs_extent_def *ext = NULL;
4206 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4207 WREG32(mmCP_ENDIAN_SWAP, 0);
4208 WREG32(mmCP_DEVICE_ID, 1);
4210 gfx_v8_0_cp_gfx_enable(adev, true);
4212 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4214 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4218 /* clear state buffer */
4219 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4220 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4222 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4223 amdgpu_ring_write(ring, 0x80000000);
4224 amdgpu_ring_write(ring, 0x80000000);
4226 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4227 for (ext = sect->section; ext->extent != NULL; ++ext) {
4228 if (sect->id == SECT_CONTEXT) {
4229 amdgpu_ring_write(ring,
4230 PACKET3(PACKET3_SET_CONTEXT_REG,
4232 amdgpu_ring_write(ring,
4233 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4234 for (i = 0; i < ext->reg_count; i++)
4235 amdgpu_ring_write(ring, ext->extent[i]);
4240 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4241 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4242 switch (adev->asic_type) {
4244 case CHIP_POLARIS10:
4245 amdgpu_ring_write(ring, 0x16000012);
4246 amdgpu_ring_write(ring, 0x0000002A);
4248 case CHIP_POLARIS11:
4249 amdgpu_ring_write(ring, 0x16000012);
4250 amdgpu_ring_write(ring, 0x00000000);
4253 amdgpu_ring_write(ring, 0x3a00161a);
4254 amdgpu_ring_write(ring, 0x0000002e);
4257 amdgpu_ring_write(ring, 0x00000002);
4258 amdgpu_ring_write(ring, 0x00000000);
4261 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4262 0x00000000 : 0x00000002);
4263 amdgpu_ring_write(ring, 0x00000000);
4266 amdgpu_ring_write(ring, 0x00000000);
4267 amdgpu_ring_write(ring, 0x00000000);
4273 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4274 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4276 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4277 amdgpu_ring_write(ring, 0);
4279 /* init the CE partitions */
4280 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4281 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4282 amdgpu_ring_write(ring, 0x8000);
4283 amdgpu_ring_write(ring, 0x8000);
4285 amdgpu_ring_commit(ring);
4290 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4292 struct amdgpu_ring *ring;
4295 u64 rb_addr, rptr_addr;
4298 /* Set the write pointer delay */
4299 WREG32(mmCP_RB_WPTR_DELAY, 0);
4301 /* set the RB to use vmid 0 */
4302 WREG32(mmCP_RB_VMID, 0);
4304 /* Set ring buffer size */
4305 ring = &adev->gfx.gfx_ring[0];
4306 rb_bufsz = order_base_2(ring->ring_size / 8);
4307 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4308 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4309 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4310 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4312 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4314 WREG32(mmCP_RB0_CNTL, tmp);
4316 /* Initialize the ring buffer's read and write pointers */
4317 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4319 WREG32(mmCP_RB0_WPTR, ring->wptr);
4321 /* set the wb address wether it's enabled or not */
4322 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4323 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4324 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4327 WREG32(mmCP_RB0_CNTL, tmp);
4329 rb_addr = ring->gpu_addr >> 8;
4330 WREG32(mmCP_RB0_BASE, rb_addr);
4331 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4333 /* no gfx doorbells on iceland */
4334 if (adev->asic_type != CHIP_TOPAZ) {
4335 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4336 if (ring->use_doorbell) {
4337 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4338 DOORBELL_OFFSET, ring->doorbell_index);
4339 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4341 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4344 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4347 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4349 if (adev->asic_type == CHIP_TONGA) {
4350 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4351 DOORBELL_RANGE_LOWER,
4352 AMDGPU_DOORBELL_GFX_RING0);
4353 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4355 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4356 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4361 /* start the ring */
4362 gfx_v8_0_cp_gfx_start(adev);
4364 r = amdgpu_ring_test_ring(ring);
4366 ring->ready = false;
4371 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4376 WREG32(mmCP_MEC_CNTL, 0);
4378 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4379 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4380 adev->gfx.compute_ring[i].ready = false;
4385 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4387 const struct gfx_firmware_header_v1_0 *mec_hdr;
4388 const __le32 *fw_data;
4389 unsigned i, fw_size;
4391 if (!adev->gfx.mec_fw)
4394 gfx_v8_0_cp_compute_enable(adev, false);
4396 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4397 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4399 fw_data = (const __le32 *)
4400 (adev->gfx.mec_fw->data +
4401 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4402 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4405 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4406 for (i = 0; i < fw_size; i++)
4407 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4408 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4410 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4411 if (adev->gfx.mec2_fw) {
4412 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4414 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4415 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4417 fw_data = (const __le32 *)
4418 (adev->gfx.mec2_fw->data +
4419 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4420 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4422 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4423 for (i = 0; i < fw_size; i++)
4424 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4425 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4432 uint32_t header; /* ordinal0 */
4433 uint32_t compute_dispatch_initiator; /* ordinal1 */
4434 uint32_t compute_dim_x; /* ordinal2 */
4435 uint32_t compute_dim_y; /* ordinal3 */
4436 uint32_t compute_dim_z; /* ordinal4 */
4437 uint32_t compute_start_x; /* ordinal5 */
4438 uint32_t compute_start_y; /* ordinal6 */
4439 uint32_t compute_start_z; /* ordinal7 */
4440 uint32_t compute_num_thread_x; /* ordinal8 */
4441 uint32_t compute_num_thread_y; /* ordinal9 */
4442 uint32_t compute_num_thread_z; /* ordinal10 */
4443 uint32_t compute_pipelinestat_enable; /* ordinal11 */
4444 uint32_t compute_perfcount_enable; /* ordinal12 */
4445 uint32_t compute_pgm_lo; /* ordinal13 */
4446 uint32_t compute_pgm_hi; /* ordinal14 */
4447 uint32_t compute_tba_lo; /* ordinal15 */
4448 uint32_t compute_tba_hi; /* ordinal16 */
4449 uint32_t compute_tma_lo; /* ordinal17 */
4450 uint32_t compute_tma_hi; /* ordinal18 */
4451 uint32_t compute_pgm_rsrc1; /* ordinal19 */
4452 uint32_t compute_pgm_rsrc2; /* ordinal20 */
4453 uint32_t compute_vmid; /* ordinal21 */
4454 uint32_t compute_resource_limits; /* ordinal22 */
4455 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
4456 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
4457 uint32_t compute_tmpring_size; /* ordinal25 */
4458 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
4459 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
4460 uint32_t compute_restart_x; /* ordinal28 */
4461 uint32_t compute_restart_y; /* ordinal29 */
4462 uint32_t compute_restart_z; /* ordinal30 */
4463 uint32_t compute_thread_trace_enable; /* ordinal31 */
4464 uint32_t compute_misc_reserved; /* ordinal32 */
4465 uint32_t compute_dispatch_id; /* ordinal33 */
4466 uint32_t compute_threadgroup_id; /* ordinal34 */
4467 uint32_t compute_relaunch; /* ordinal35 */
4468 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
4469 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
4470 uint32_t compute_wave_restore_control; /* ordinal38 */
4471 uint32_t reserved9; /* ordinal39 */
4472 uint32_t reserved10; /* ordinal40 */
4473 uint32_t reserved11; /* ordinal41 */
4474 uint32_t reserved12; /* ordinal42 */
4475 uint32_t reserved13; /* ordinal43 */
4476 uint32_t reserved14; /* ordinal44 */
4477 uint32_t reserved15; /* ordinal45 */
4478 uint32_t reserved16; /* ordinal46 */
4479 uint32_t reserved17; /* ordinal47 */
4480 uint32_t reserved18; /* ordinal48 */
4481 uint32_t reserved19; /* ordinal49 */
4482 uint32_t reserved20; /* ordinal50 */
4483 uint32_t reserved21; /* ordinal51 */
4484 uint32_t reserved22; /* ordinal52 */
4485 uint32_t reserved23; /* ordinal53 */
4486 uint32_t reserved24; /* ordinal54 */
4487 uint32_t reserved25; /* ordinal55 */
4488 uint32_t reserved26; /* ordinal56 */
4489 uint32_t reserved27; /* ordinal57 */
4490 uint32_t reserved28; /* ordinal58 */
4491 uint32_t reserved29; /* ordinal59 */
4492 uint32_t reserved30; /* ordinal60 */
4493 uint32_t reserved31; /* ordinal61 */
4494 uint32_t reserved32; /* ordinal62 */
4495 uint32_t reserved33; /* ordinal63 */
4496 uint32_t reserved34; /* ordinal64 */
4497 uint32_t compute_user_data_0; /* ordinal65 */
4498 uint32_t compute_user_data_1; /* ordinal66 */
4499 uint32_t compute_user_data_2; /* ordinal67 */
4500 uint32_t compute_user_data_3; /* ordinal68 */
4501 uint32_t compute_user_data_4; /* ordinal69 */
4502 uint32_t compute_user_data_5; /* ordinal70 */
4503 uint32_t compute_user_data_6; /* ordinal71 */
4504 uint32_t compute_user_data_7; /* ordinal72 */
4505 uint32_t compute_user_data_8; /* ordinal73 */
4506 uint32_t compute_user_data_9; /* ordinal74 */
4507 uint32_t compute_user_data_10; /* ordinal75 */
4508 uint32_t compute_user_data_11; /* ordinal76 */
4509 uint32_t compute_user_data_12; /* ordinal77 */
4510 uint32_t compute_user_data_13; /* ordinal78 */
4511 uint32_t compute_user_data_14; /* ordinal79 */
4512 uint32_t compute_user_data_15; /* ordinal80 */
4513 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
4514 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
4515 uint32_t reserved35; /* ordinal83 */
4516 uint32_t reserved36; /* ordinal84 */
4517 uint32_t reserved37; /* ordinal85 */
4518 uint32_t cp_mqd_query_time_lo; /* ordinal86 */
4519 uint32_t cp_mqd_query_time_hi; /* ordinal87 */
4520 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
4521 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
4522 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
4523 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
4524 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
4525 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
4526 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
4527 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
4528 uint32_t reserved38; /* ordinal96 */
4529 uint32_t reserved39; /* ordinal97 */
4530 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
4531 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
4532 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
4533 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
4534 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
4535 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
4536 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
4537 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
4538 uint32_t reserved40; /* ordinal106 */
4539 uint32_t reserved41; /* ordinal107 */
4540 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
4541 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
4542 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
4543 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
4544 uint32_t reserved42; /* ordinal112 */
4545 uint32_t reserved43; /* ordinal113 */
4546 uint32_t cp_pq_exe_status_lo; /* ordinal114 */
4547 uint32_t cp_pq_exe_status_hi; /* ordinal115 */
4548 uint32_t cp_packet_id_lo; /* ordinal116 */
4549 uint32_t cp_packet_id_hi; /* ordinal117 */
4550 uint32_t cp_packet_exe_status_lo; /* ordinal118 */
4551 uint32_t cp_packet_exe_status_hi; /* ordinal119 */
4552 uint32_t gds_save_base_addr_lo; /* ordinal120 */
4553 uint32_t gds_save_base_addr_hi; /* ordinal121 */
4554 uint32_t gds_save_mask_lo; /* ordinal122 */
4555 uint32_t gds_save_mask_hi; /* ordinal123 */
4556 uint32_t ctx_save_base_addr_lo; /* ordinal124 */
4557 uint32_t ctx_save_base_addr_hi; /* ordinal125 */
4558 uint32_t reserved44; /* ordinal126 */
4559 uint32_t reserved45; /* ordinal127 */
4560 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
4561 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
4562 uint32_t cp_hqd_active; /* ordinal130 */
4563 uint32_t cp_hqd_vmid; /* ordinal131 */
4564 uint32_t cp_hqd_persistent_state; /* ordinal132 */
4565 uint32_t cp_hqd_pipe_priority; /* ordinal133 */
4566 uint32_t cp_hqd_queue_priority; /* ordinal134 */
4567 uint32_t cp_hqd_quantum; /* ordinal135 */
4568 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
4569 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
4570 uint32_t cp_hqd_pq_rptr; /* ordinal138 */
4571 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
4572 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
4573 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
4574 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
4575 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
4576 uint32_t cp_hqd_pq_wptr; /* ordinal144 */
4577 uint32_t cp_hqd_pq_control; /* ordinal145 */
4578 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
4579 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
4580 uint32_t cp_hqd_ib_rptr; /* ordinal148 */
4581 uint32_t cp_hqd_ib_control; /* ordinal149 */
4582 uint32_t cp_hqd_iq_timer; /* ordinal150 */
4583 uint32_t cp_hqd_iq_rptr; /* ordinal151 */
4584 uint32_t cp_hqd_dequeue_request; /* ordinal152 */
4585 uint32_t cp_hqd_dma_offload; /* ordinal153 */
4586 uint32_t cp_hqd_sema_cmd; /* ordinal154 */
4587 uint32_t cp_hqd_msg_type; /* ordinal155 */
4588 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
4589 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
4590 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
4591 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
4592 uint32_t cp_hqd_hq_status0; /* ordinal160 */
4593 uint32_t cp_hqd_hq_control0; /* ordinal161 */
4594 uint32_t cp_mqd_control; /* ordinal162 */
4595 uint32_t cp_hqd_hq_status1; /* ordinal163 */
4596 uint32_t cp_hqd_hq_control1; /* ordinal164 */
4597 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
4598 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
4599 uint32_t cp_hqd_eop_control; /* ordinal167 */
4600 uint32_t cp_hqd_eop_rptr; /* ordinal168 */
4601 uint32_t cp_hqd_eop_wptr; /* ordinal169 */
4602 uint32_t cp_hqd_eop_done_events; /* ordinal170 */
4603 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
4604 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
4605 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
4606 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
4607 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
4608 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
4609 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
4610 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
4611 uint32_t cp_hqd_error; /* ordinal179 */
4612 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
4613 uint32_t cp_hqd_eop_dones; /* ordinal181 */
4614 uint32_t reserved46; /* ordinal182 */
4615 uint32_t reserved47; /* ordinal183 */
4616 uint32_t reserved48; /* ordinal184 */
4617 uint32_t reserved49; /* ordinal185 */
4618 uint32_t reserved50; /* ordinal186 */
4619 uint32_t reserved51; /* ordinal187 */
4620 uint32_t reserved52; /* ordinal188 */
4621 uint32_t reserved53; /* ordinal189 */
4622 uint32_t reserved54; /* ordinal190 */
4623 uint32_t reserved55; /* ordinal191 */
4624 uint32_t iqtimer_pkt_header; /* ordinal192 */
4625 uint32_t iqtimer_pkt_dw0; /* ordinal193 */
4626 uint32_t iqtimer_pkt_dw1; /* ordinal194 */
4627 uint32_t iqtimer_pkt_dw2; /* ordinal195 */
4628 uint32_t iqtimer_pkt_dw3; /* ordinal196 */
4629 uint32_t iqtimer_pkt_dw4; /* ordinal197 */
4630 uint32_t iqtimer_pkt_dw5; /* ordinal198 */
4631 uint32_t iqtimer_pkt_dw6; /* ordinal199 */
4632 uint32_t iqtimer_pkt_dw7; /* ordinal200 */
4633 uint32_t iqtimer_pkt_dw8; /* ordinal201 */
4634 uint32_t iqtimer_pkt_dw9; /* ordinal202 */
4635 uint32_t iqtimer_pkt_dw10; /* ordinal203 */
4636 uint32_t iqtimer_pkt_dw11; /* ordinal204 */
4637 uint32_t iqtimer_pkt_dw12; /* ordinal205 */
4638 uint32_t iqtimer_pkt_dw13; /* ordinal206 */
4639 uint32_t iqtimer_pkt_dw14; /* ordinal207 */
4640 uint32_t iqtimer_pkt_dw15; /* ordinal208 */
4641 uint32_t iqtimer_pkt_dw16; /* ordinal209 */
4642 uint32_t iqtimer_pkt_dw17; /* ordinal210 */
4643 uint32_t iqtimer_pkt_dw18; /* ordinal211 */
4644 uint32_t iqtimer_pkt_dw19; /* ordinal212 */
4645 uint32_t iqtimer_pkt_dw20; /* ordinal213 */
4646 uint32_t iqtimer_pkt_dw21; /* ordinal214 */
4647 uint32_t iqtimer_pkt_dw22; /* ordinal215 */
4648 uint32_t iqtimer_pkt_dw23; /* ordinal216 */
4649 uint32_t iqtimer_pkt_dw24; /* ordinal217 */
4650 uint32_t iqtimer_pkt_dw25; /* ordinal218 */
4651 uint32_t iqtimer_pkt_dw26; /* ordinal219 */
4652 uint32_t iqtimer_pkt_dw27; /* ordinal220 */
4653 uint32_t iqtimer_pkt_dw28; /* ordinal221 */
4654 uint32_t iqtimer_pkt_dw29; /* ordinal222 */
4655 uint32_t iqtimer_pkt_dw30; /* ordinal223 */
4656 uint32_t iqtimer_pkt_dw31; /* ordinal224 */
4657 uint32_t reserved56; /* ordinal225 */
4658 uint32_t reserved57; /* ordinal226 */
4659 uint32_t reserved58; /* ordinal227 */
4660 uint32_t set_resources_header; /* ordinal228 */
4661 uint32_t set_resources_dw1; /* ordinal229 */
4662 uint32_t set_resources_dw2; /* ordinal230 */
4663 uint32_t set_resources_dw3; /* ordinal231 */
4664 uint32_t set_resources_dw4; /* ordinal232 */
4665 uint32_t set_resources_dw5; /* ordinal233 */
4666 uint32_t set_resources_dw6; /* ordinal234 */
4667 uint32_t set_resources_dw7; /* ordinal235 */
4668 uint32_t reserved59; /* ordinal236 */
4669 uint32_t reserved60; /* ordinal237 */
4670 uint32_t reserved61; /* ordinal238 */
4671 uint32_t reserved62; /* ordinal239 */
4672 uint32_t reserved63; /* ordinal240 */
4673 uint32_t reserved64; /* ordinal241 */
4674 uint32_t reserved65; /* ordinal242 */
4675 uint32_t reserved66; /* ordinal243 */
4676 uint32_t reserved67; /* ordinal244 */
4677 uint32_t reserved68; /* ordinal245 */
4678 uint32_t reserved69; /* ordinal246 */
4679 uint32_t reserved70; /* ordinal247 */
4680 uint32_t reserved71; /* ordinal248 */
4681 uint32_t reserved72; /* ordinal249 */
4682 uint32_t reserved73; /* ordinal250 */
4683 uint32_t reserved74; /* ordinal251 */
4684 uint32_t reserved75; /* ordinal252 */
4685 uint32_t reserved76; /* ordinal253 */
4686 uint32_t reserved77; /* ordinal254 */
4687 uint32_t reserved78; /* ordinal255 */
4689 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4692 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4696 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4697 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4699 if (ring->mqd_obj) {
4700 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4701 if (unlikely(r != 0))
4702 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4704 amdgpu_bo_unpin(ring->mqd_obj);
4705 amdgpu_bo_unreserve(ring->mqd_obj);
4707 amdgpu_bo_unref(&ring->mqd_obj);
4708 ring->mqd_obj = NULL;
4713 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4717 bool use_doorbell = true;
4725 /* init the pipes */
4726 mutex_lock(&adev->srbm_mutex);
4727 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4728 int me = (i < 4) ? 1 : 2;
4729 int pipe = (i < 4) ? i : (i - 4);
4731 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4734 vi_srbm_select(adev, me, pipe, 0, 0);
4736 /* write the EOP addr */
4737 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4738 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4740 /* set the VMID assigned */
4741 WREG32(mmCP_HQD_VMID, 0);
4743 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4744 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4745 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4746 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4747 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4749 vi_srbm_select(adev, 0, 0, 0, 0);
4750 mutex_unlock(&adev->srbm_mutex);
4752 /* init the queues. Just two for now. */
4753 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4754 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4756 if (ring->mqd_obj == NULL) {
4757 r = amdgpu_bo_create(adev,
4758 sizeof(struct vi_mqd),
4760 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4761 NULL, &ring->mqd_obj);
4763 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4768 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4769 if (unlikely(r != 0)) {
4770 gfx_v8_0_cp_compute_fini(adev);
4773 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4776 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4777 gfx_v8_0_cp_compute_fini(adev);
4780 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4782 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4783 gfx_v8_0_cp_compute_fini(adev);
4787 /* init the mqd struct */
4788 memset(buf, 0, sizeof(struct vi_mqd));
4790 mqd = (struct vi_mqd *)buf;
4791 mqd->header = 0xC0310800;
4792 mqd->compute_pipelinestat_enable = 0x00000001;
4793 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4794 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4795 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4796 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4797 mqd->compute_misc_reserved = 0x00000003;
4799 mutex_lock(&adev->srbm_mutex);
4800 vi_srbm_select(adev, ring->me,
4804 /* disable wptr polling */
4805 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4806 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4807 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4809 mqd->cp_hqd_eop_base_addr_lo =
4810 RREG32(mmCP_HQD_EOP_BASE_ADDR);
4811 mqd->cp_hqd_eop_base_addr_hi =
4812 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4814 /* enable doorbell? */
4815 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4817 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4819 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4821 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4822 mqd->cp_hqd_pq_doorbell_control = tmp;
4824 /* disable the queue if it's active */
4825 mqd->cp_hqd_dequeue_request = 0;
4826 mqd->cp_hqd_pq_rptr = 0;
4827 mqd->cp_hqd_pq_wptr= 0;
4828 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4829 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4830 for (j = 0; j < adev->usec_timeout; j++) {
4831 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4835 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4836 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4837 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4840 /* set the pointer to the MQD */
4841 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4842 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4843 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4844 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4846 /* set MQD vmid to 0 */
4847 tmp = RREG32(mmCP_MQD_CONTROL);
4848 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4849 WREG32(mmCP_MQD_CONTROL, tmp);
4850 mqd->cp_mqd_control = tmp;
4852 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4853 hqd_gpu_addr = ring->gpu_addr >> 8;
4854 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4855 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4856 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4857 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4859 /* set up the HQD, this is similar to CP_RB0_CNTL */
4860 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4861 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4862 (order_base_2(ring->ring_size / 4) - 1));
4863 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4864 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4866 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4868 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4869 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4870 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4871 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4872 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4873 mqd->cp_hqd_pq_control = tmp;
4875 /* set the wb address wether it's enabled or not */
4876 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4877 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4878 mqd->cp_hqd_pq_rptr_report_addr_hi =
4879 upper_32_bits(wb_gpu_addr) & 0xffff;
4880 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4881 mqd->cp_hqd_pq_rptr_report_addr_lo);
4882 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4883 mqd->cp_hqd_pq_rptr_report_addr_hi);
4885 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4886 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4887 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4888 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4889 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4890 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4891 mqd->cp_hqd_pq_wptr_poll_addr_hi);
4893 /* enable the doorbell if requested */
4895 if ((adev->asic_type == CHIP_CARRIZO) ||
4896 (adev->asic_type == CHIP_FIJI) ||
4897 (adev->asic_type == CHIP_STONEY) ||
4898 (adev->asic_type == CHIP_POLARIS11) ||
4899 (adev->asic_type == CHIP_POLARIS10)) {
4900 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4901 AMDGPU_DOORBELL_KIQ << 2);
4902 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4903 AMDGPU_DOORBELL_MEC_RING7 << 2);
4905 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4906 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4907 DOORBELL_OFFSET, ring->doorbell_index);
4908 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4909 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4910 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4911 mqd->cp_hqd_pq_doorbell_control = tmp;
4914 mqd->cp_hqd_pq_doorbell_control = 0;
4916 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4917 mqd->cp_hqd_pq_doorbell_control);
4919 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4921 mqd->cp_hqd_pq_wptr = ring->wptr;
4922 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4923 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4925 /* set the vmid for the queue */
4926 mqd->cp_hqd_vmid = 0;
4927 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4929 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4930 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4931 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4932 mqd->cp_hqd_persistent_state = tmp;
4933 if (adev->asic_type == CHIP_STONEY ||
4934 adev->asic_type == CHIP_POLARIS11 ||
4935 adev->asic_type == CHIP_POLARIS10) {
4936 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4937 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4938 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4941 /* activate the queue */
4942 mqd->cp_hqd_active = 1;
4943 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4945 vi_srbm_select(adev, 0, 0, 0, 0);
4946 mutex_unlock(&adev->srbm_mutex);
4948 amdgpu_bo_kunmap(ring->mqd_obj);
4949 amdgpu_bo_unreserve(ring->mqd_obj);
4953 tmp = RREG32(mmCP_PQ_STATUS);
4954 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4955 WREG32(mmCP_PQ_STATUS, tmp);
4958 gfx_v8_0_cp_compute_enable(adev, true);
4960 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4961 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4964 r = amdgpu_ring_test_ring(ring);
4966 ring->ready = false;
4972 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4976 if (!(adev->flags & AMD_IS_APU))
4977 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4979 if (!adev->pp_enabled) {
4980 if (!adev->firmware.smu_load) {
4981 /* legacy firmware loading */
4982 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4986 r = gfx_v8_0_cp_compute_load_microcode(adev);
4990 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4991 AMDGPU_UCODE_ID_CP_CE);
4995 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4996 AMDGPU_UCODE_ID_CP_PFP);
5000 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5001 AMDGPU_UCODE_ID_CP_ME);
5005 if (adev->asic_type == CHIP_TOPAZ) {
5006 r = gfx_v8_0_cp_compute_load_microcode(adev);
5010 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5011 AMDGPU_UCODE_ID_CP_MEC1);
5018 r = gfx_v8_0_cp_gfx_resume(adev);
5022 r = gfx_v8_0_cp_compute_resume(adev);
5026 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5031 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5033 gfx_v8_0_cp_gfx_enable(adev, enable);
5034 gfx_v8_0_cp_compute_enable(adev, enable);
5037 static int gfx_v8_0_hw_init(void *handle)
5040 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5042 gfx_v8_0_init_golden_registers(adev);
5043 gfx_v8_0_gpu_init(adev);
5045 r = gfx_v8_0_rlc_resume(adev);
5049 r = gfx_v8_0_cp_resume(adev);
5054 static int gfx_v8_0_hw_fini(void *handle)
5056 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5058 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5059 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5060 gfx_v8_0_cp_enable(adev, false);
5061 gfx_v8_0_rlc_stop(adev);
5062 gfx_v8_0_cp_compute_fini(adev);
5064 amdgpu_set_powergating_state(adev,
5065 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5070 static int gfx_v8_0_suspend(void *handle)
5072 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5074 return gfx_v8_0_hw_fini(adev);
5077 static int gfx_v8_0_resume(void *handle)
5079 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5081 return gfx_v8_0_hw_init(adev);
5084 static bool gfx_v8_0_is_idle(void *handle)
5086 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5088 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5094 static int gfx_v8_0_wait_for_idle(void *handle)
5097 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5099 for (i = 0; i < adev->usec_timeout; i++) {
5100 if (gfx_v8_0_is_idle(handle))
5108 static bool gfx_v8_0_check_soft_reset(void *handle)
5110 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5111 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5115 tmp = RREG32(mmGRBM_STATUS);
5116 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5117 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5118 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5119 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5120 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5121 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5122 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5123 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5124 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5125 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5126 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5127 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5128 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5132 tmp = RREG32(mmGRBM_STATUS2);
5133 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5134 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5135 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5137 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5138 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5139 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5140 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5142 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5144 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5146 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5147 SOFT_RESET_GRBM, 1);
5151 tmp = RREG32(mmSRBM_STATUS);
5152 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5153 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5154 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5155 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5156 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5157 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5159 if (grbm_soft_reset || srbm_soft_reset) {
5160 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5161 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5164 adev->gfx.grbm_soft_reset = 0;
5165 adev->gfx.srbm_soft_reset = 0;
5170 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5171 struct amdgpu_ring *ring)
5175 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5176 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5178 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5179 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5181 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5182 for (i = 0; i < adev->usec_timeout; i++) {
5183 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5190 static int gfx_v8_0_pre_soft_reset(void *handle)
5192 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5193 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5195 if ((!adev->gfx.grbm_soft_reset) &&
5196 (!adev->gfx.srbm_soft_reset))
5199 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5200 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5203 gfx_v8_0_rlc_stop(adev);
5205 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5206 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5207 /* Disable GFX parsing/prefetching */
5208 gfx_v8_0_cp_gfx_enable(adev, false);
5210 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5211 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5212 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5213 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5216 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5217 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5219 gfx_v8_0_inactive_hqd(adev, ring);
5221 /* Disable MEC parsing/prefetching */
5222 gfx_v8_0_cp_compute_enable(adev, false);
5228 static int gfx_v8_0_soft_reset(void *handle)
5230 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5231 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5234 if ((!adev->gfx.grbm_soft_reset) &&
5235 (!adev->gfx.srbm_soft_reset))
5238 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5239 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5241 if (grbm_soft_reset || srbm_soft_reset) {
5242 tmp = RREG32(mmGMCON_DEBUG);
5243 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5244 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5245 WREG32(mmGMCON_DEBUG, tmp);
5249 if (grbm_soft_reset) {
5250 tmp = RREG32(mmGRBM_SOFT_RESET);
5251 tmp |= grbm_soft_reset;
5252 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5253 WREG32(mmGRBM_SOFT_RESET, tmp);
5254 tmp = RREG32(mmGRBM_SOFT_RESET);
5258 tmp &= ~grbm_soft_reset;
5259 WREG32(mmGRBM_SOFT_RESET, tmp);
5260 tmp = RREG32(mmGRBM_SOFT_RESET);
5263 if (srbm_soft_reset) {
5264 tmp = RREG32(mmSRBM_SOFT_RESET);
5265 tmp |= srbm_soft_reset;
5266 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5267 WREG32(mmSRBM_SOFT_RESET, tmp);
5268 tmp = RREG32(mmSRBM_SOFT_RESET);
5272 tmp &= ~srbm_soft_reset;
5273 WREG32(mmSRBM_SOFT_RESET, tmp);
5274 tmp = RREG32(mmSRBM_SOFT_RESET);
5277 if (grbm_soft_reset || srbm_soft_reset) {
5278 tmp = RREG32(mmGMCON_DEBUG);
5279 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5280 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5281 WREG32(mmGMCON_DEBUG, tmp);
5284 /* Wait a little for things to settle down */
5290 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5291 struct amdgpu_ring *ring)
5293 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5294 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5295 WREG32(mmCP_HQD_PQ_RPTR, 0);
5296 WREG32(mmCP_HQD_PQ_WPTR, 0);
5297 vi_srbm_select(adev, 0, 0, 0, 0);
5300 static int gfx_v8_0_post_soft_reset(void *handle)
5302 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5303 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5305 if ((!adev->gfx.grbm_soft_reset) &&
5306 (!adev->gfx.srbm_soft_reset))
5309 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5310 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5312 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5313 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5314 gfx_v8_0_cp_gfx_resume(adev);
5316 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5317 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5318 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5319 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5322 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5323 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5325 gfx_v8_0_init_hqd(adev, ring);
5327 gfx_v8_0_cp_compute_resume(adev);
5329 gfx_v8_0_rlc_start(adev);
5335 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5337 * @adev: amdgpu_device pointer
5339 * Fetches a GPU clock counter snapshot.
5340 * Returns the 64 bit clock counter snapshot.
5342 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5346 mutex_lock(&adev->gfx.gpu_clock_mutex);
5347 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5348 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5349 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5350 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5354 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5356 uint32_t gds_base, uint32_t gds_size,
5357 uint32_t gws_base, uint32_t gws_size,
5358 uint32_t oa_base, uint32_t oa_size)
5360 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5361 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5363 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5364 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5366 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5367 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5370 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5371 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5372 WRITE_DATA_DST_SEL(0)));
5373 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5374 amdgpu_ring_write(ring, 0);
5375 amdgpu_ring_write(ring, gds_base);
5378 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5379 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5380 WRITE_DATA_DST_SEL(0)));
5381 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5382 amdgpu_ring_write(ring, 0);
5383 amdgpu_ring_write(ring, gds_size);
5386 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5387 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5388 WRITE_DATA_DST_SEL(0)));
5389 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5390 amdgpu_ring_write(ring, 0);
5391 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5394 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5395 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5396 WRITE_DATA_DST_SEL(0)));
5397 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5398 amdgpu_ring_write(ring, 0);
5399 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5402 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5403 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5404 .select_se_sh = &gfx_v8_0_select_se_sh,
5407 static int gfx_v8_0_early_init(void *handle)
5409 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5411 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5412 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5413 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5414 gfx_v8_0_set_ring_funcs(adev);
5415 gfx_v8_0_set_irq_funcs(adev);
5416 gfx_v8_0_set_gds_init(adev);
5417 gfx_v8_0_set_rlc_funcs(adev);
5422 static int gfx_v8_0_late_init(void *handle)
5424 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5427 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5431 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5435 /* requires IBs so do in late init after IB pool is initialized */
5436 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5440 amdgpu_set_powergating_state(adev,
5441 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5446 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5449 if (adev->asic_type == CHIP_POLARIS11)
5450 /* Send msg to SMU via Powerplay */
5451 amdgpu_set_powergating_state(adev,
5452 AMD_IP_BLOCK_TYPE_SMC,
5454 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5456 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5459 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5462 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5465 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5468 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5471 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5474 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5477 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5480 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5482 /* Read any GFX register to wake up GFX. */
5484 RREG32(mmDB_RENDER_CONTROL);
5487 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5490 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5491 cz_enable_gfx_cg_power_gating(adev, true);
5492 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5493 cz_enable_gfx_pipeline_power_gating(adev, true);
5495 cz_enable_gfx_cg_power_gating(adev, false);
5496 cz_enable_gfx_pipeline_power_gating(adev, false);
5500 static int gfx_v8_0_set_powergating_state(void *handle,
5501 enum amd_powergating_state state)
5503 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5504 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5506 if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5509 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5510 AMD_PG_SUPPORT_RLC_SMU_HS |
5512 AMD_PG_SUPPORT_GFX_DMG))
5513 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5514 switch (adev->asic_type) {
5517 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5518 cz_update_gfx_cg_power_gating(adev, enable);
5520 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5521 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5523 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5525 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5526 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5528 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5530 case CHIP_POLARIS11:
5531 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5532 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5534 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5536 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5537 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5539 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5541 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5542 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5544 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5549 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5550 AMD_PG_SUPPORT_RLC_SMU_HS |
5552 AMD_PG_SUPPORT_GFX_DMG))
5553 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5557 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5558 uint32_t reg_addr, uint32_t cmd)
5562 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5564 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5565 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5567 data = RREG32(mmRLC_SERDES_WR_CTRL);
5568 if (adev->asic_type == CHIP_STONEY)
5569 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5570 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5571 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5572 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5573 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5574 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5575 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5576 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5577 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5579 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5580 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5581 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5582 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5583 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5584 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5585 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5586 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5587 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5588 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5589 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5590 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5591 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5592 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5593 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5595 WREG32(mmRLC_SERDES_WR_CTRL, data);
5598 #define MSG_ENTER_RLC_SAFE_MODE 1
5599 #define MSG_EXIT_RLC_SAFE_MODE 0
5600 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5601 #define RLC_GPR_REG2__REQ__SHIFT 0
5602 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5603 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5605 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5610 data = RREG32(mmRLC_CNTL);
5611 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5614 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5615 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5616 AMD_PG_SUPPORT_GFX_DMG))) {
5617 data |= RLC_GPR_REG2__REQ_MASK;
5618 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5619 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5620 WREG32(mmRLC_GPR_REG2, data);
5622 for (i = 0; i < adev->usec_timeout; i++) {
5623 if ((RREG32(mmRLC_GPM_STAT) &
5624 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5625 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5626 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5627 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5632 for (i = 0; i < adev->usec_timeout; i++) {
5633 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5637 adev->gfx.rlc.in_safe_mode = true;
5641 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5646 data = RREG32(mmRLC_CNTL);
5647 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5650 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5651 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5652 AMD_PG_SUPPORT_GFX_DMG))) {
5653 data |= RLC_GPR_REG2__REQ_MASK;
5654 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5655 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5656 WREG32(mmRLC_GPR_REG2, data);
5657 adev->gfx.rlc.in_safe_mode = false;
5660 for (i = 0; i < adev->usec_timeout; i++) {
5661 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5667 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5672 data = RREG32(mmRLC_CNTL);
5673 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5676 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5677 data |= RLC_SAFE_MODE__CMD_MASK;
5678 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5679 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5680 WREG32(mmRLC_SAFE_MODE, data);
5682 for (i = 0; i < adev->usec_timeout; i++) {
5683 if ((RREG32(mmRLC_GPM_STAT) &
5684 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5685 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5686 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5687 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5692 for (i = 0; i < adev->usec_timeout; i++) {
5693 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5697 adev->gfx.rlc.in_safe_mode = true;
5701 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5706 data = RREG32(mmRLC_CNTL);
5707 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5710 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5711 if (adev->gfx.rlc.in_safe_mode) {
5712 data |= RLC_SAFE_MODE__CMD_MASK;
5713 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5714 WREG32(mmRLC_SAFE_MODE, data);
5715 adev->gfx.rlc.in_safe_mode = false;
5719 for (i = 0; i < adev->usec_timeout; i++) {
5720 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5726 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5728 adev->gfx.rlc.in_safe_mode = true;
5731 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5733 adev->gfx.rlc.in_safe_mode = false;
5736 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5737 .enter_safe_mode = cz_enter_rlc_safe_mode,
5738 .exit_safe_mode = cz_exit_rlc_safe_mode
5741 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5742 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5743 .exit_safe_mode = iceland_exit_rlc_safe_mode
5746 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5747 .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5748 .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5751 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5754 uint32_t temp, data;
5756 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5758 /* It is disabled by HW by default */
5759 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5760 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5761 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5762 /* 1 - RLC memory Light sleep */
5763 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5765 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5766 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5769 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5770 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5771 if (adev->flags & AMD_IS_APU)
5772 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5773 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5774 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5776 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5777 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5778 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5779 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5782 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5784 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5785 gfx_v8_0_wait_for_rlc_serdes(adev);
5787 /* 5 - clear mgcg override */
5788 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5790 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5791 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5792 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5793 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5794 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5795 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5796 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5797 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5798 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5799 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5800 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5801 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5803 WREG32(mmCGTS_SM_CTRL_REG, data);
5807 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5808 gfx_v8_0_wait_for_rlc_serdes(adev);
5810 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5811 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5812 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5813 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5814 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5815 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5817 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5819 /* 2 - disable MGLS in RLC */
5820 data = RREG32(mmRLC_MEM_SLP_CNTL);
5821 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5822 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5823 WREG32(mmRLC_MEM_SLP_CNTL, data);
5826 /* 3 - disable MGLS in CP */
5827 data = RREG32(mmCP_MEM_SLP_CNTL);
5828 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5829 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5830 WREG32(mmCP_MEM_SLP_CNTL, data);
5833 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5834 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5835 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5836 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5838 WREG32(mmCGTS_SM_CTRL_REG, data);
5840 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5841 gfx_v8_0_wait_for_rlc_serdes(adev);
5843 /* 6 - set mgcg override */
5844 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5848 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5849 gfx_v8_0_wait_for_rlc_serdes(adev);
5852 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5855 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5858 uint32_t temp, temp1, data, data1;
5860 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5862 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5864 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5865 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5866 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5868 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5870 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5871 gfx_v8_0_wait_for_rlc_serdes(adev);
5873 /* 2 - clear cgcg override */
5874 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5876 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5877 gfx_v8_0_wait_for_rlc_serdes(adev);
5879 /* 3 - write cmd to set CGLS */
5880 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5882 /* 4 - enable cgcg */
5883 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5885 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5887 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5889 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5890 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5893 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5895 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5899 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5901 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5902 * Cmp_busy/GFX_Idle interrupts
5904 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5906 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5907 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5910 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5911 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5912 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5914 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5916 /* read gfx register to wake up cgcg */
5917 RREG32(mmCB_CGTT_SCLK_CTRL);
5918 RREG32(mmCB_CGTT_SCLK_CTRL);
5919 RREG32(mmCB_CGTT_SCLK_CTRL);
5920 RREG32(mmCB_CGTT_SCLK_CTRL);
5922 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5923 gfx_v8_0_wait_for_rlc_serdes(adev);
5925 /* write cmd to Set CGCG Overrride */
5926 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5928 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5929 gfx_v8_0_wait_for_rlc_serdes(adev);
5931 /* write cmd to Clear CGLS */
5932 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5934 /* disable cgcg, cgls should be disabled too. */
5935 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5936 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5938 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5941 gfx_v8_0_wait_for_rlc_serdes(adev);
5943 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5945 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5949 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5950 * === MGCG + MGLS + TS(CG/LS) ===
5952 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5953 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5955 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5956 * === CGCG + CGLS ===
5958 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5959 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5964 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5965 enum amd_clockgating_state state)
5967 uint32_t msg_id, pp_state;
5968 void *pp_handle = adev->powerplay.pp_handle;
5970 if (state == AMD_CG_STATE_UNGATE)
5973 pp_state = PP_STATE_CG | PP_STATE_LS;
5975 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5977 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5979 amd_set_clockgating_by_smu(pp_handle, msg_id);
5981 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5983 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5985 amd_set_clockgating_by_smu(pp_handle, msg_id);
5990 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5991 enum amd_clockgating_state state)
5993 uint32_t msg_id, pp_state;
5994 void *pp_handle = adev->powerplay.pp_handle;
5996 if (state == AMD_CG_STATE_UNGATE)
5999 pp_state = PP_STATE_CG | PP_STATE_LS;
6001 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6003 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6005 amd_set_clockgating_by_smu(pp_handle, msg_id);
6007 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6009 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6011 amd_set_clockgating_by_smu(pp_handle, msg_id);
6013 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6015 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6017 amd_set_clockgating_by_smu(pp_handle, msg_id);
6019 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6021 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6023 amd_set_clockgating_by_smu(pp_handle, msg_id);
6025 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6027 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6029 amd_set_clockgating_by_smu(pp_handle, msg_id);
6034 static int gfx_v8_0_set_clockgating_state(void *handle,
6035 enum amd_clockgating_state state)
6037 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6039 switch (adev->asic_type) {
6043 gfx_v8_0_update_gfx_clock_gating(adev,
6044 state == AMD_CG_STATE_GATE ? true : false);
6047 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6049 case CHIP_POLARIS10:
6050 case CHIP_POLARIS11:
6051 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6059 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6061 return ring->adev->wb.wb[ring->rptr_offs];
6064 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6066 struct amdgpu_device *adev = ring->adev;
6068 if (ring->use_doorbell)
6069 /* XXX check if swapping is necessary on BE */
6070 return ring->adev->wb.wb[ring->wptr_offs];
6072 return RREG32(mmCP_RB0_WPTR);
6075 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6077 struct amdgpu_device *adev = ring->adev;
6079 if (ring->use_doorbell) {
6080 /* XXX check if swapping is necessary on BE */
6081 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6082 WDOORBELL32(ring->doorbell_index, ring->wptr);
6084 WREG32(mmCP_RB0_WPTR, ring->wptr);
6085 (void)RREG32(mmCP_RB0_WPTR);
6089 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6091 u32 ref_and_mask, reg_mem_engine;
6093 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
6096 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6099 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6106 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6107 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6110 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6111 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6112 WAIT_REG_MEM_FUNCTION(3) | /* == */
6114 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6115 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6116 amdgpu_ring_write(ring, ref_and_mask);
6117 amdgpu_ring_write(ring, ref_and_mask);
6118 amdgpu_ring_write(ring, 0x20); /* poll interval */
6121 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6123 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6124 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6125 WRITE_DATA_DST_SEL(0) |
6127 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6128 amdgpu_ring_write(ring, 0);
6129 amdgpu_ring_write(ring, 1);
6133 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6134 struct amdgpu_ib *ib,
6135 unsigned vm_id, bool ctx_switch)
6137 u32 header, control = 0;
6139 if (ib->flags & AMDGPU_IB_FLAG_CE)
6140 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6142 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6144 control |= ib->length_dw | (vm_id << 24);
6146 amdgpu_ring_write(ring, header);
6147 amdgpu_ring_write(ring,
6151 (ib->gpu_addr & 0xFFFFFFFC));
6152 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6153 amdgpu_ring_write(ring, control);
6156 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6157 struct amdgpu_ib *ib,
6158 unsigned vm_id, bool ctx_switch)
6160 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6162 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6163 amdgpu_ring_write(ring,
6167 (ib->gpu_addr & 0xFFFFFFFC));
6168 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6169 amdgpu_ring_write(ring, control);
6172 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6173 u64 seq, unsigned flags)
6175 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6176 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6178 /* EVENT_WRITE_EOP - flush caches, send int */
6179 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6180 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6182 EOP_TC_WB_ACTION_EN |
6183 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6185 amdgpu_ring_write(ring, addr & 0xfffffffc);
6186 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6187 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6188 amdgpu_ring_write(ring, lower_32_bits(seq));
6189 amdgpu_ring_write(ring, upper_32_bits(seq));
6193 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6195 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6196 uint32_t seq = ring->fence_drv.sync_seq;
6197 uint64_t addr = ring->fence_drv.gpu_addr;
6199 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6200 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6201 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6202 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6203 amdgpu_ring_write(ring, addr & 0xfffffffc);
6204 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6205 amdgpu_ring_write(ring, seq);
6206 amdgpu_ring_write(ring, 0xffffffff);
6207 amdgpu_ring_write(ring, 4); /* poll interval */
6210 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6211 unsigned vm_id, uint64_t pd_addr)
6213 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6215 /* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
6217 amdgpu_ring_insert_nop(ring, 128);
6219 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6220 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6221 WRITE_DATA_DST_SEL(0)) |
6224 amdgpu_ring_write(ring,
6225 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6227 amdgpu_ring_write(ring,
6228 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6230 amdgpu_ring_write(ring, 0);
6231 amdgpu_ring_write(ring, pd_addr >> 12);
6233 /* bits 0-15 are the VM contexts0-15 */
6234 /* invalidate the cache */
6235 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6236 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6237 WRITE_DATA_DST_SEL(0)));
6238 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6239 amdgpu_ring_write(ring, 0);
6240 amdgpu_ring_write(ring, 1 << vm_id);
6242 /* wait for the invalidate to complete */
6243 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6244 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6245 WAIT_REG_MEM_FUNCTION(0) | /* always */
6246 WAIT_REG_MEM_ENGINE(0))); /* me */
6247 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6248 amdgpu_ring_write(ring, 0);
6249 amdgpu_ring_write(ring, 0); /* ref */
6250 amdgpu_ring_write(ring, 0); /* mask */
6251 amdgpu_ring_write(ring, 0x20); /* poll interval */
6253 /* compute doesn't have PFP */
6255 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6256 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6257 amdgpu_ring_write(ring, 0x0);
6258 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6259 amdgpu_ring_insert_nop(ring, 128);
6263 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6265 return ring->adev->wb.wb[ring->wptr_offs];
6268 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6270 struct amdgpu_device *adev = ring->adev;
6272 /* XXX check if swapping is necessary on BE */
6273 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6274 WDOORBELL32(ring->doorbell_index, ring->wptr);
6277 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6281 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6282 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6284 /* RELEASE_MEM - flush caches, send int */
6285 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6286 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6288 EOP_TC_WB_ACTION_EN |
6289 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6291 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6292 amdgpu_ring_write(ring, addr & 0xfffffffc);
6293 amdgpu_ring_write(ring, upper_32_bits(addr));
6294 amdgpu_ring_write(ring, lower_32_bits(seq));
6295 amdgpu_ring_write(ring, upper_32_bits(seq));
6298 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6300 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6301 amdgpu_ring_write(ring, 0);
6304 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6308 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6309 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6310 /* set load_global_config & load_global_uconfig */
6312 /* set load_cs_sh_regs */
6314 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6317 /* set load_ce_ram if preamble presented */
6318 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6321 /* still load_ce_ram if this is the first time preamble presented
6322 * although there is no context switch happens.
6324 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6328 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6329 amdgpu_ring_write(ring, dw2);
6330 amdgpu_ring_write(ring, 0);
6333 static unsigned gfx_v8_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
6336 4; /* gfx_v8_0_ring_emit_ib_gfx */
6339 static unsigned gfx_v8_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
6342 20 + /* gfx_v8_0_ring_emit_gds_switch */
6343 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6344 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6345 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6346 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6347 256 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6348 2 + /* gfx_v8_ring_emit_sb */
6349 3; /* gfx_v8_ring_emit_cntxcntl */
6352 static unsigned gfx_v8_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
6355 4; /* gfx_v8_0_ring_emit_ib_compute */
6358 static unsigned gfx_v8_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
6361 20 + /* gfx_v8_0_ring_emit_gds_switch */
6362 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6363 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6364 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6365 17 + /* gfx_v8_0_ring_emit_vm_flush */
6366 7 + 7 + 7; /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6369 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6370 enum amdgpu_interrupt_state state)
6372 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6373 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6376 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6378 enum amdgpu_interrupt_state state)
6381 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6382 * handles the setting of interrupts for this specific pipe. All other
6383 * pipes' interrupts are set by amdkfd.
6391 DRM_DEBUG("invalid pipe %d\n", pipe);
6395 DRM_DEBUG("invalid me %d\n", me);
6399 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6400 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6403 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6404 struct amdgpu_irq_src *source,
6406 enum amdgpu_interrupt_state state)
6408 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6409 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6414 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6415 struct amdgpu_irq_src *source,
6417 enum amdgpu_interrupt_state state)
6419 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6420 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6425 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6426 struct amdgpu_irq_src *src,
6428 enum amdgpu_interrupt_state state)
6431 case AMDGPU_CP_IRQ_GFX_EOP:
6432 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6434 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6435 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6437 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6438 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6440 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6441 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6443 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6444 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6446 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6447 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6449 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6450 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6452 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6453 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6455 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6456 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6464 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6465 struct amdgpu_irq_src *source,
6466 struct amdgpu_iv_entry *entry)
6469 u8 me_id, pipe_id, queue_id;
6470 struct amdgpu_ring *ring;
6472 DRM_DEBUG("IH: CP EOP\n");
6473 me_id = (entry->ring_id & 0x0c) >> 2;
6474 pipe_id = (entry->ring_id & 0x03) >> 0;
6475 queue_id = (entry->ring_id & 0x70) >> 4;
6479 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6483 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6484 ring = &adev->gfx.compute_ring[i];
6485 /* Per-queue interrupt is supported for MEC starting from VI.
6486 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6488 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6489 amdgpu_fence_process(ring);
6496 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6497 struct amdgpu_irq_src *source,
6498 struct amdgpu_iv_entry *entry)
6500 DRM_ERROR("Illegal register access in command stream\n");
6501 schedule_work(&adev->reset_work);
6505 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6506 struct amdgpu_irq_src *source,
6507 struct amdgpu_iv_entry *entry)
6509 DRM_ERROR("Illegal instruction in command stream\n");
6510 schedule_work(&adev->reset_work);
6514 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6516 .early_init = gfx_v8_0_early_init,
6517 .late_init = gfx_v8_0_late_init,
6518 .sw_init = gfx_v8_0_sw_init,
6519 .sw_fini = gfx_v8_0_sw_fini,
6520 .hw_init = gfx_v8_0_hw_init,
6521 .hw_fini = gfx_v8_0_hw_fini,
6522 .suspend = gfx_v8_0_suspend,
6523 .resume = gfx_v8_0_resume,
6524 .is_idle = gfx_v8_0_is_idle,
6525 .wait_for_idle = gfx_v8_0_wait_for_idle,
6526 .check_soft_reset = gfx_v8_0_check_soft_reset,
6527 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6528 .soft_reset = gfx_v8_0_soft_reset,
6529 .post_soft_reset = gfx_v8_0_post_soft_reset,
6530 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6531 .set_powergating_state = gfx_v8_0_set_powergating_state,
6534 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6535 .get_rptr = gfx_v8_0_ring_get_rptr,
6536 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6537 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6539 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6540 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6541 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6542 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6543 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6544 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6545 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6546 .test_ring = gfx_v8_0_ring_test_ring,
6547 .test_ib = gfx_v8_0_ring_test_ib,
6548 .insert_nop = amdgpu_ring_insert_nop,
6549 .pad_ib = amdgpu_ring_generic_pad_ib,
6550 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6551 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6552 .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_gfx,
6553 .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_gfx,
6556 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6557 .get_rptr = gfx_v8_0_ring_get_rptr,
6558 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6559 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6561 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6562 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6563 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6564 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6565 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6566 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6567 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6568 .test_ring = gfx_v8_0_ring_test_ring,
6569 .test_ib = gfx_v8_0_ring_test_ib,
6570 .insert_nop = amdgpu_ring_insert_nop,
6571 .pad_ib = amdgpu_ring_generic_pad_ib,
6572 .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_compute,
6573 .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_compute,
6576 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6580 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6581 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6583 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6584 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6587 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6588 .set = gfx_v8_0_set_eop_interrupt_state,
6589 .process = gfx_v8_0_eop_irq,
6592 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6593 .set = gfx_v8_0_set_priv_reg_fault_state,
6594 .process = gfx_v8_0_priv_reg_irq,
6597 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6598 .set = gfx_v8_0_set_priv_inst_fault_state,
6599 .process = gfx_v8_0_priv_inst_irq,
6602 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6604 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6605 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6607 adev->gfx.priv_reg_irq.num_types = 1;
6608 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6610 adev->gfx.priv_inst_irq.num_types = 1;
6611 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6614 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6616 switch (adev->asic_type) {
6618 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6622 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6625 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6630 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6632 /* init asci gds info */
6633 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6634 adev->gds.gws.total_size = 64;
6635 adev->gds.oa.total_size = 16;
6637 if (adev->gds.mem.total_size == 64 * 1024) {
6638 adev->gds.mem.gfx_partition_size = 4096;
6639 adev->gds.mem.cs_partition_size = 4096;
6641 adev->gds.gws.gfx_partition_size = 4;
6642 adev->gds.gws.cs_partition_size = 4;
6644 adev->gds.oa.gfx_partition_size = 4;
6645 adev->gds.oa.cs_partition_size = 1;
6647 adev->gds.mem.gfx_partition_size = 1024;
6648 adev->gds.mem.cs_partition_size = 1024;
6650 adev->gds.gws.gfx_partition_size = 16;
6651 adev->gds.gws.cs_partition_size = 16;
6653 adev->gds.oa.gfx_partition_size = 4;
6654 adev->gds.oa.cs_partition_size = 4;
6658 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6666 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6667 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6669 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6672 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6676 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6677 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6679 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6681 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6684 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6686 int i, j, k, counter, active_cu_number = 0;
6687 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6688 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6689 unsigned disable_masks[4 * 2];
6691 memset(cu_info, 0, sizeof(*cu_info));
6693 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6695 mutex_lock(&adev->grbm_idx_mutex);
6696 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6697 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6701 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6703 gfx_v8_0_set_user_cu_inactive_bitmap(
6704 adev, disable_masks[i * 2 + j]);
6705 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6706 cu_info->bitmap[i][j] = bitmap;
6708 for (k = 0; k < 16; k ++) {
6709 if (bitmap & mask) {
6716 active_cu_number += counter;
6717 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6720 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6721 mutex_unlock(&adev->grbm_idx_mutex);
6723 cu_info->number = active_cu_number;
6724 cu_info->ao_cu_mask = ao_cu_mask;