2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
28 #include "vi_structs.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
51 #include "smu/smu_7_1_3_d.h"
53 #define GFX8_NUM_GFX_RINGS 1
54 #define GFX8_MEC_HPD_SIZE 2048
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
79 #define SET_BPM_SERDES_CMD 1
80 #define CLE_BPM_SERDES_CMD 0
82 /* BPM Register Address*/
84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
92 #define RLC_FormatDirectRegListLength 14
96 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
98 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
99 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
100 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
101 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
102 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
103 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
104 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
105 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
106 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
107 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
108 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
109 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
110 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
111 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
112 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
113 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
116 static const u32 golden_settings_tonga_a11[] =
118 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
119 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
120 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
121 mmGB_GPU_ID, 0x0000000f, 0x00000000,
122 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
123 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
124 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
125 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
126 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
127 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
128 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
129 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
130 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
131 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
132 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
133 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
136 static const u32 tonga_golden_common_all[] =
138 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
139 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
140 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
141 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
142 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
143 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
144 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
145 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
148 static const u32 tonga_mgcg_cgcg_init[] =
150 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
151 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
152 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
153 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
154 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
155 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
156 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
157 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
158 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
159 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
160 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
161 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
162 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
163 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
164 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
165 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
166 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
167 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
168 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
169 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
170 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
171 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
172 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
173 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
174 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
175 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
176 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
177 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
178 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
179 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
180 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
181 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
182 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
183 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
184 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
185 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
186 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
187 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
188 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
189 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
190 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
191 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
192 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
193 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
194 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
195 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
196 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
197 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
198 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
199 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
200 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
201 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
202 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
203 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
204 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
205 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
206 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
207 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
208 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
209 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
210 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
211 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
212 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
213 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
214 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
215 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
216 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
217 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
218 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
219 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
220 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
221 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
222 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
223 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
224 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
227 static const u32 golden_settings_polaris11_a11[] =
229 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
230 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
231 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
232 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
233 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
234 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
235 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
236 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
237 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
238 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
239 mmSQ_CONFIG, 0x07f80000, 0x01180000,
240 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
241 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
242 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
243 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
244 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
245 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
248 static const u32 polaris11_golden_common_all[] =
250 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
251 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
252 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
253 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
254 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
255 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
258 static const u32 golden_settings_polaris10_a11[] =
260 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
261 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
262 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
263 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
264 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
265 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
266 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
267 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
268 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
269 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
270 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
271 mmSQ_CONFIG, 0x07f80000, 0x07180000,
272 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
273 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
274 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
275 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
276 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
279 static const u32 polaris10_golden_common_all[] =
281 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
282 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
283 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
284 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
285 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
286 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
287 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
288 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
291 static const u32 fiji_golden_common_all[] =
293 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
294 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
295 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
296 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
297 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
298 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
299 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
300 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
301 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
302 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
305 static const u32 golden_settings_fiji_a10[] =
307 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
308 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
309 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
310 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
311 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
312 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
313 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
314 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
315 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
316 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
317 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
320 static const u32 fiji_mgcg_cgcg_init[] =
322 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
323 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
324 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
325 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
326 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
327 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
328 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
329 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
330 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
331 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
332 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
333 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
334 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
335 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
336 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
337 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
338 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
339 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
340 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
341 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
342 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
343 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
344 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
345 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
346 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
347 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
348 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
349 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
350 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
351 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
352 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
353 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
354 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
355 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
356 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
359 static const u32 golden_settings_iceland_a11[] =
361 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
362 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
363 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
364 mmGB_GPU_ID, 0x0000000f, 0x00000000,
365 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
366 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
367 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
368 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
369 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
370 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
371 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
372 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
373 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
374 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
375 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
376 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
379 static const u32 iceland_golden_common_all[] =
381 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
382 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
383 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
384 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
385 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
386 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
387 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
388 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
391 static const u32 iceland_mgcg_cgcg_init[] =
393 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
394 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
395 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
396 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
397 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
398 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
399 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
400 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
401 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
402 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
403 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
404 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
405 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
406 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
407 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
408 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
409 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
410 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
411 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
412 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
413 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
414 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
415 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
416 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
417 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
418 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
419 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
420 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
421 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
422 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
423 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
424 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
425 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
426 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
427 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
428 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
429 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
430 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
431 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
432 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
433 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
434 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
435 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
436 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
437 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
438 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
439 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
440 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
441 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
442 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
443 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
444 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
445 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
446 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
447 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
448 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
449 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
450 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
451 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
452 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
453 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
454 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
455 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
456 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
459 static const u32 cz_golden_settings_a11[] =
461 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
462 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
463 mmGB_GPU_ID, 0x0000000f, 0x00000000,
464 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
465 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
466 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
467 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
468 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
469 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
470 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
471 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
472 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
475 static const u32 cz_golden_common_all[] =
477 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
478 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
479 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
480 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
481 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
482 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
483 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
484 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
487 static const u32 cz_mgcg_cgcg_init[] =
489 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
490 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
491 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
492 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
493 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
494 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
495 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
496 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
497 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
498 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
499 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
500 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
501 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
502 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
503 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
504 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
505 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
506 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
507 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
508 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
509 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
510 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
511 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
512 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
513 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
514 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
515 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
516 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
517 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
518 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
519 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
520 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
521 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
522 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
523 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
524 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
525 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
526 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
527 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
528 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
529 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
530 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
531 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
532 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
533 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
534 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
535 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
538 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
543 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
548 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
553 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
558 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
561 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
562 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
563 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
566 static const u32 stoney_golden_settings_a11[] =
568 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
569 mmGB_GPU_ID, 0x0000000f, 0x00000000,
570 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
571 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
572 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
573 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
574 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
575 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
576 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
577 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
580 static const u32 stoney_golden_common_all[] =
582 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
583 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
584 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
585 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
586 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
587 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
588 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
589 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
592 static const u32 stoney_mgcg_cgcg_init[] =
594 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
595 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
596 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
597 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
598 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
601 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
602 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
603 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
604 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
605 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
606 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
607 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
608 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
610 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
612 switch (adev->asic_type) {
614 amdgpu_program_register_sequence(adev,
615 iceland_mgcg_cgcg_init,
616 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
617 amdgpu_program_register_sequence(adev,
618 golden_settings_iceland_a11,
619 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
620 amdgpu_program_register_sequence(adev,
621 iceland_golden_common_all,
622 (const u32)ARRAY_SIZE(iceland_golden_common_all));
625 amdgpu_program_register_sequence(adev,
627 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
628 amdgpu_program_register_sequence(adev,
629 golden_settings_fiji_a10,
630 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
631 amdgpu_program_register_sequence(adev,
632 fiji_golden_common_all,
633 (const u32)ARRAY_SIZE(fiji_golden_common_all));
637 amdgpu_program_register_sequence(adev,
638 tonga_mgcg_cgcg_init,
639 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
640 amdgpu_program_register_sequence(adev,
641 golden_settings_tonga_a11,
642 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
643 amdgpu_program_register_sequence(adev,
644 tonga_golden_common_all,
645 (const u32)ARRAY_SIZE(tonga_golden_common_all));
649 amdgpu_program_register_sequence(adev,
650 golden_settings_polaris11_a11,
651 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
652 amdgpu_program_register_sequence(adev,
653 polaris11_golden_common_all,
654 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
657 amdgpu_program_register_sequence(adev,
658 golden_settings_polaris10_a11,
659 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
660 amdgpu_program_register_sequence(adev,
661 polaris10_golden_common_all,
662 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
663 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
664 if (adev->pdev->revision == 0xc7 &&
665 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
666 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
667 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
668 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
669 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
673 amdgpu_program_register_sequence(adev,
675 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
676 amdgpu_program_register_sequence(adev,
677 cz_golden_settings_a11,
678 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
679 amdgpu_program_register_sequence(adev,
680 cz_golden_common_all,
681 (const u32)ARRAY_SIZE(cz_golden_common_all));
684 amdgpu_program_register_sequence(adev,
685 stoney_mgcg_cgcg_init,
686 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
687 amdgpu_program_register_sequence(adev,
688 stoney_golden_settings_a11,
689 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
690 amdgpu_program_register_sequence(adev,
691 stoney_golden_common_all,
692 (const u32)ARRAY_SIZE(stoney_golden_common_all));
699 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
701 adev->gfx.scratch.num_reg = 8;
702 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
703 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
706 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
708 struct amdgpu_device *adev = ring->adev;
714 r = amdgpu_gfx_scratch_get(adev, &scratch);
716 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
719 WREG32(scratch, 0xCAFEDEAD);
720 r = amdgpu_ring_alloc(ring, 3);
722 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
724 amdgpu_gfx_scratch_free(adev, scratch);
727 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
728 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
729 amdgpu_ring_write(ring, 0xDEADBEEF);
730 amdgpu_ring_commit(ring);
732 for (i = 0; i < adev->usec_timeout; i++) {
733 tmp = RREG32(scratch);
734 if (tmp == 0xDEADBEEF)
738 if (i < adev->usec_timeout) {
739 DRM_INFO("ring test on %d succeeded in %d usecs\n",
742 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
743 ring->idx, scratch, tmp);
746 amdgpu_gfx_scratch_free(adev, scratch);
750 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
752 struct amdgpu_device *adev = ring->adev;
754 struct dma_fence *f = NULL;
759 r = amdgpu_gfx_scratch_get(adev, &scratch);
761 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
764 WREG32(scratch, 0xCAFEDEAD);
765 memset(&ib, 0, sizeof(ib));
766 r = amdgpu_ib_get(adev, NULL, 256, &ib);
768 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
771 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
772 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
773 ib.ptr[2] = 0xDEADBEEF;
776 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
780 r = dma_fence_wait_timeout(f, false, timeout);
782 DRM_ERROR("amdgpu: IB test timed out.\n");
786 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
789 tmp = RREG32(scratch);
790 if (tmp == 0xDEADBEEF) {
791 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
794 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
799 amdgpu_ib_free(adev, &ib, NULL);
802 amdgpu_gfx_scratch_free(adev, scratch);
807 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
809 release_firmware(adev->gfx.pfp_fw);
810 adev->gfx.pfp_fw = NULL;
811 release_firmware(adev->gfx.me_fw);
812 adev->gfx.me_fw = NULL;
813 release_firmware(adev->gfx.ce_fw);
814 adev->gfx.ce_fw = NULL;
815 release_firmware(adev->gfx.rlc_fw);
816 adev->gfx.rlc_fw = NULL;
817 release_firmware(adev->gfx.mec_fw);
818 adev->gfx.mec_fw = NULL;
819 if ((adev->asic_type != CHIP_STONEY) &&
820 (adev->asic_type != CHIP_TOPAZ))
821 release_firmware(adev->gfx.mec2_fw);
822 adev->gfx.mec2_fw = NULL;
824 kfree(adev->gfx.rlc.register_list_format);
827 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
829 const char *chip_name;
832 struct amdgpu_firmware_info *info = NULL;
833 const struct common_firmware_header *header = NULL;
834 const struct gfx_firmware_header_v1_0 *cp_hdr;
835 const struct rlc_firmware_header_v2_0 *rlc_hdr;
836 unsigned int *tmp = NULL, i;
840 switch (adev->asic_type) {
848 chip_name = "carrizo";
854 chip_name = "polaris11";
857 chip_name = "polaris10";
860 chip_name = "polaris12";
863 chip_name = "stoney";
869 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
870 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
873 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
876 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
877 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
878 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
880 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
881 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
884 err = amdgpu_ucode_validate(adev->gfx.me_fw);
887 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
888 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
890 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
892 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
893 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
896 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
899 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
900 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
901 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
904 * Support for MCBP/Virtualization in combination with chained IBs is
905 * formal released on feature version #46
907 if (adev->gfx.ce_feature_version >= 46 &&
908 adev->gfx.pfp_feature_version >= 46) {
909 adev->virt.chained_ib_support = true;
910 DRM_INFO("Chained IB support enabled!\n");
912 adev->virt.chained_ib_support = false;
914 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
915 err = reject_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
918 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
919 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
920 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
921 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
923 adev->gfx.rlc.save_and_restore_offset =
924 le32_to_cpu(rlc_hdr->save_and_restore_offset);
925 adev->gfx.rlc.clear_state_descriptor_offset =
926 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
927 adev->gfx.rlc.avail_scratch_ram_locations =
928 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
929 adev->gfx.rlc.reg_restore_list_size =
930 le32_to_cpu(rlc_hdr->reg_restore_list_size);
931 adev->gfx.rlc.reg_list_format_start =
932 le32_to_cpu(rlc_hdr->reg_list_format_start);
933 adev->gfx.rlc.reg_list_format_separate_start =
934 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
935 adev->gfx.rlc.starting_offsets_start =
936 le32_to_cpu(rlc_hdr->starting_offsets_start);
937 adev->gfx.rlc.reg_list_format_size_bytes =
938 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
939 adev->gfx.rlc.reg_list_size_bytes =
940 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
942 adev->gfx.rlc.register_list_format =
943 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
944 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
946 if (!adev->gfx.rlc.register_list_format) {
951 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
952 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
953 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
954 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
956 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
958 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
959 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
960 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
961 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
963 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
964 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
967 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
970 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
971 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
972 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
974 if ((adev->asic_type != CHIP_STONEY) &&
975 (adev->asic_type != CHIP_TOPAZ)) {
976 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
977 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
979 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
982 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
983 adev->gfx.mec2_fw->data;
984 adev->gfx.mec2_fw_version =
985 le32_to_cpu(cp_hdr->header.ucode_version);
986 adev->gfx.mec2_feature_version =
987 le32_to_cpu(cp_hdr->ucode_feature_version);
990 adev->gfx.mec2_fw = NULL;
994 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
995 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
996 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
997 info->fw = adev->gfx.pfp_fw;
998 header = (const struct common_firmware_header *)info->fw->data;
999 adev->firmware.fw_size +=
1000 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1002 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1003 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1004 info->fw = adev->gfx.me_fw;
1005 header = (const struct common_firmware_header *)info->fw->data;
1006 adev->firmware.fw_size +=
1007 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1009 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1010 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1011 info->fw = adev->gfx.ce_fw;
1012 header = (const struct common_firmware_header *)info->fw->data;
1013 adev->firmware.fw_size +=
1014 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1016 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1017 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1018 info->fw = adev->gfx.rlc_fw;
1019 header = (const struct common_firmware_header *)info->fw->data;
1020 adev->firmware.fw_size +=
1021 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1023 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1024 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1025 info->fw = adev->gfx.mec_fw;
1026 header = (const struct common_firmware_header *)info->fw->data;
1027 adev->firmware.fw_size +=
1028 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1030 /* we need account JT in */
1031 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1032 adev->firmware.fw_size +=
1033 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1035 if (amdgpu_sriov_vf(adev)) {
1036 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1037 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1038 info->fw = adev->gfx.mec_fw;
1039 adev->firmware.fw_size +=
1040 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1043 if (adev->gfx.mec2_fw) {
1044 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1045 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1046 info->fw = adev->gfx.mec2_fw;
1047 header = (const struct common_firmware_header *)info->fw->data;
1048 adev->firmware.fw_size +=
1049 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1057 "gfx8: Failed to load firmware \"%s\"\n",
1059 release_firmware(adev->gfx.pfp_fw);
1060 adev->gfx.pfp_fw = NULL;
1061 release_firmware(adev->gfx.me_fw);
1062 adev->gfx.me_fw = NULL;
1063 release_firmware(adev->gfx.ce_fw);
1064 adev->gfx.ce_fw = NULL;
1065 release_firmware(adev->gfx.rlc_fw);
1066 adev->gfx.rlc_fw = NULL;
1067 release_firmware(adev->gfx.mec_fw);
1068 adev->gfx.mec_fw = NULL;
1069 release_firmware(adev->gfx.mec2_fw);
1070 adev->gfx.mec2_fw = NULL;
1075 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1076 volatile u32 *buffer)
1079 const struct cs_section_def *sect = NULL;
1080 const struct cs_extent_def *ext = NULL;
1082 if (adev->gfx.rlc.cs_data == NULL)
1087 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1088 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1090 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1091 buffer[count++] = cpu_to_le32(0x80000000);
1092 buffer[count++] = cpu_to_le32(0x80000000);
1094 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1095 for (ext = sect->section; ext->extent != NULL; ++ext) {
1096 if (sect->id == SECT_CONTEXT) {
1098 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1099 buffer[count++] = cpu_to_le32(ext->reg_index -
1100 PACKET3_SET_CONTEXT_REG_START);
1101 for (i = 0; i < ext->reg_count; i++)
1102 buffer[count++] = cpu_to_le32(ext->extent[i]);
1109 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1110 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1111 PACKET3_SET_CONTEXT_REG_START);
1112 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1113 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1115 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1116 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1118 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1119 buffer[count++] = cpu_to_le32(0);
1122 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1124 const __le32 *fw_data;
1125 volatile u32 *dst_ptr;
1126 int me, i, max_me = 4;
1128 u32 table_offset, table_size;
1130 if (adev->asic_type == CHIP_CARRIZO)
1133 /* write the cp table buffer */
1134 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1135 for (me = 0; me < max_me; me++) {
1137 const struct gfx_firmware_header_v1_0 *hdr =
1138 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1139 fw_data = (const __le32 *)
1140 (adev->gfx.ce_fw->data +
1141 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1142 table_offset = le32_to_cpu(hdr->jt_offset);
1143 table_size = le32_to_cpu(hdr->jt_size);
1144 } else if (me == 1) {
1145 const struct gfx_firmware_header_v1_0 *hdr =
1146 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1147 fw_data = (const __le32 *)
1148 (adev->gfx.pfp_fw->data +
1149 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1150 table_offset = le32_to_cpu(hdr->jt_offset);
1151 table_size = le32_to_cpu(hdr->jt_size);
1152 } else if (me == 2) {
1153 const struct gfx_firmware_header_v1_0 *hdr =
1154 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1155 fw_data = (const __le32 *)
1156 (adev->gfx.me_fw->data +
1157 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1158 table_offset = le32_to_cpu(hdr->jt_offset);
1159 table_size = le32_to_cpu(hdr->jt_size);
1160 } else if (me == 3) {
1161 const struct gfx_firmware_header_v1_0 *hdr =
1162 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1163 fw_data = (const __le32 *)
1164 (adev->gfx.mec_fw->data +
1165 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1166 table_offset = le32_to_cpu(hdr->jt_offset);
1167 table_size = le32_to_cpu(hdr->jt_size);
1168 } else if (me == 4) {
1169 const struct gfx_firmware_header_v1_0 *hdr =
1170 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1171 fw_data = (const __le32 *)
1172 (adev->gfx.mec2_fw->data +
1173 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1174 table_offset = le32_to_cpu(hdr->jt_offset);
1175 table_size = le32_to_cpu(hdr->jt_size);
1178 for (i = 0; i < table_size; i ++) {
1179 dst_ptr[bo_offset + i] =
1180 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1183 bo_offset += table_size;
1187 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1189 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1190 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1193 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1195 volatile u32 *dst_ptr;
1197 const struct cs_section_def *cs_data;
1200 adev->gfx.rlc.cs_data = vi_cs_data;
1202 cs_data = adev->gfx.rlc.cs_data;
1205 /* clear state block */
1206 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1208 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1209 AMDGPU_GEM_DOMAIN_VRAM,
1210 &adev->gfx.rlc.clear_state_obj,
1211 &adev->gfx.rlc.clear_state_gpu_addr,
1212 (void **)&adev->gfx.rlc.cs_ptr);
1214 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1215 gfx_v8_0_rlc_fini(adev);
1219 /* set up the cs buffer */
1220 dst_ptr = adev->gfx.rlc.cs_ptr;
1221 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1222 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1223 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1226 if ((adev->asic_type == CHIP_CARRIZO) ||
1227 (adev->asic_type == CHIP_STONEY)) {
1228 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1229 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1230 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1231 &adev->gfx.rlc.cp_table_obj,
1232 &adev->gfx.rlc.cp_table_gpu_addr,
1233 (void **)&adev->gfx.rlc.cp_table_ptr);
1235 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1239 cz_init_cp_jump_table(adev);
1241 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1242 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1248 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1250 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1253 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1257 size_t mec_hpd_size;
1259 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1261 /* take ownership of the relevant compute queues */
1262 amdgpu_gfx_compute_queue_acquire(adev);
1264 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1266 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1267 AMDGPU_GEM_DOMAIN_GTT,
1268 &adev->gfx.mec.hpd_eop_obj,
1269 &adev->gfx.mec.hpd_eop_gpu_addr,
1272 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1276 memset(hpd, 0, mec_hpd_size);
1278 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1279 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1284 static const u32 vgpr_init_compute_shader[] =
1286 0x7e000209, 0x7e020208,
1287 0x7e040207, 0x7e060206,
1288 0x7e080205, 0x7e0a0204,
1289 0x7e0c0203, 0x7e0e0202,
1290 0x7e100201, 0x7e120200,
1291 0x7e140209, 0x7e160208,
1292 0x7e180207, 0x7e1a0206,
1293 0x7e1c0205, 0x7e1e0204,
1294 0x7e200203, 0x7e220202,
1295 0x7e240201, 0x7e260200,
1296 0x7e280209, 0x7e2a0208,
1297 0x7e2c0207, 0x7e2e0206,
1298 0x7e300205, 0x7e320204,
1299 0x7e340203, 0x7e360202,
1300 0x7e380201, 0x7e3a0200,
1301 0x7e3c0209, 0x7e3e0208,
1302 0x7e400207, 0x7e420206,
1303 0x7e440205, 0x7e460204,
1304 0x7e480203, 0x7e4a0202,
1305 0x7e4c0201, 0x7e4e0200,
1306 0x7e500209, 0x7e520208,
1307 0x7e540207, 0x7e560206,
1308 0x7e580205, 0x7e5a0204,
1309 0x7e5c0203, 0x7e5e0202,
1310 0x7e600201, 0x7e620200,
1311 0x7e640209, 0x7e660208,
1312 0x7e680207, 0x7e6a0206,
1313 0x7e6c0205, 0x7e6e0204,
1314 0x7e700203, 0x7e720202,
1315 0x7e740201, 0x7e760200,
1316 0x7e780209, 0x7e7a0208,
1317 0x7e7c0207, 0x7e7e0206,
1318 0xbf8a0000, 0xbf810000,
1321 static const u32 sgpr_init_compute_shader[] =
1323 0xbe8a0100, 0xbe8c0102,
1324 0xbe8e0104, 0xbe900106,
1325 0xbe920108, 0xbe940100,
1326 0xbe960102, 0xbe980104,
1327 0xbe9a0106, 0xbe9c0108,
1328 0xbe9e0100, 0xbea00102,
1329 0xbea20104, 0xbea40106,
1330 0xbea60108, 0xbea80100,
1331 0xbeaa0102, 0xbeac0104,
1332 0xbeae0106, 0xbeb00108,
1333 0xbeb20100, 0xbeb40102,
1334 0xbeb60104, 0xbeb80106,
1335 0xbeba0108, 0xbebc0100,
1336 0xbebe0102, 0xbec00104,
1337 0xbec20106, 0xbec40108,
1338 0xbec60100, 0xbec80102,
1339 0xbee60004, 0xbee70005,
1340 0xbeea0006, 0xbeeb0007,
1341 0xbee80008, 0xbee90009,
1342 0xbefc0000, 0xbf8a0000,
1343 0xbf810000, 0x00000000,
1346 static const u32 vgpr_init_regs[] =
1348 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1349 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1350 mmCOMPUTE_NUM_THREAD_X, 256*4,
1351 mmCOMPUTE_NUM_THREAD_Y, 1,
1352 mmCOMPUTE_NUM_THREAD_Z, 1,
1353 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1354 mmCOMPUTE_PGM_RSRC2, 20,
1355 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1356 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1357 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1358 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1359 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1360 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1361 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1362 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1363 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1364 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1367 static const u32 sgpr1_init_regs[] =
1369 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1370 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1371 mmCOMPUTE_NUM_THREAD_X, 256*5,
1372 mmCOMPUTE_NUM_THREAD_Y, 1,
1373 mmCOMPUTE_NUM_THREAD_Z, 1,
1374 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1375 mmCOMPUTE_PGM_RSRC2, 20,
1376 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1377 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1378 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1379 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1380 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1381 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1382 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1383 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1384 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1385 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1388 static const u32 sgpr2_init_regs[] =
1390 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1391 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1392 mmCOMPUTE_NUM_THREAD_X, 256*5,
1393 mmCOMPUTE_NUM_THREAD_Y, 1,
1394 mmCOMPUTE_NUM_THREAD_Z, 1,
1395 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1396 mmCOMPUTE_PGM_RSRC2, 20,
1397 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1398 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1399 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1400 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1401 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1402 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1403 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1404 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1405 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1406 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1409 static const u32 sec_ded_counter_registers[] =
1412 mmCPC_EDC_SCRATCH_CNT,
1413 mmCPC_EDC_UCODE_CNT,
1420 mmDC_EDC_CSINVOC_CNT,
1421 mmDC_EDC_RESTORE_CNT,
1427 mmSQC_ATC_EDC_GATCL1_CNT,
1433 mmTCP_ATC_EDC_GATCL1_CNT,
1438 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1440 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1441 struct amdgpu_ib ib;
1442 struct dma_fence *f = NULL;
1445 unsigned total_size, vgpr_offset, sgpr_offset;
1448 /* only supported on CZ */
1449 if (adev->asic_type != CHIP_CARRIZO)
1452 /* bail if the compute ring is not ready */
1456 tmp = RREG32(mmGB_EDC_MODE);
1457 WREG32(mmGB_EDC_MODE, 0);
1460 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1462 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1464 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1465 total_size = ALIGN(total_size, 256);
1466 vgpr_offset = total_size;
1467 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1468 sgpr_offset = total_size;
1469 total_size += sizeof(sgpr_init_compute_shader);
1471 /* allocate an indirect buffer to put the commands in */
1472 memset(&ib, 0, sizeof(ib));
1473 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1475 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1479 /* load the compute shaders */
1480 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1481 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1483 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1484 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1486 /* init the ib length to 0 */
1490 /* write the register state for the compute dispatch */
1491 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1492 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1493 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1494 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1496 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1497 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1498 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1499 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1500 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1501 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1503 /* write dispatch packet */
1504 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1505 ib.ptr[ib.length_dw++] = 8; /* x */
1506 ib.ptr[ib.length_dw++] = 1; /* y */
1507 ib.ptr[ib.length_dw++] = 1; /* z */
1508 ib.ptr[ib.length_dw++] =
1509 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1511 /* write CS partial flush packet */
1512 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1513 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1516 /* write the register state for the compute dispatch */
1517 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1518 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1519 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1520 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1522 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1523 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1524 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1525 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1526 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1527 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1529 /* write dispatch packet */
1530 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1531 ib.ptr[ib.length_dw++] = 8; /* x */
1532 ib.ptr[ib.length_dw++] = 1; /* y */
1533 ib.ptr[ib.length_dw++] = 1; /* z */
1534 ib.ptr[ib.length_dw++] =
1535 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1537 /* write CS partial flush packet */
1538 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1539 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1542 /* write the register state for the compute dispatch */
1543 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1544 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1545 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1546 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1548 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1549 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1550 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1551 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1552 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1553 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1555 /* write dispatch packet */
1556 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1557 ib.ptr[ib.length_dw++] = 8; /* x */
1558 ib.ptr[ib.length_dw++] = 1; /* y */
1559 ib.ptr[ib.length_dw++] = 1; /* z */
1560 ib.ptr[ib.length_dw++] =
1561 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1563 /* write CS partial flush packet */
1564 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1565 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1567 /* shedule the ib on the ring */
1568 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1570 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1574 /* wait for the GPU to finish processing the IB */
1575 r = dma_fence_wait(f, false);
1577 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1581 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1582 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1583 WREG32(mmGB_EDC_MODE, tmp);
1585 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1586 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1587 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1590 /* read back registers to clear the counters */
1591 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1592 RREG32(sec_ded_counter_registers[i]);
1595 amdgpu_ib_free(adev, &ib, NULL);
1601 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1604 u32 mc_shared_chmap, mc_arb_ramcfg;
1605 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1609 switch (adev->asic_type) {
1611 adev->gfx.config.max_shader_engines = 1;
1612 adev->gfx.config.max_tile_pipes = 2;
1613 adev->gfx.config.max_cu_per_sh = 6;
1614 adev->gfx.config.max_sh_per_se = 1;
1615 adev->gfx.config.max_backends_per_se = 2;
1616 adev->gfx.config.max_texture_channel_caches = 2;
1617 adev->gfx.config.max_gprs = 256;
1618 adev->gfx.config.max_gs_threads = 32;
1619 adev->gfx.config.max_hw_contexts = 8;
1621 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1622 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1623 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1624 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1625 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1628 adev->gfx.config.max_shader_engines = 4;
1629 adev->gfx.config.max_tile_pipes = 16;
1630 adev->gfx.config.max_cu_per_sh = 16;
1631 adev->gfx.config.max_sh_per_se = 1;
1632 adev->gfx.config.max_backends_per_se = 4;
1633 adev->gfx.config.max_texture_channel_caches = 16;
1634 adev->gfx.config.max_gprs = 256;
1635 adev->gfx.config.max_gs_threads = 32;
1636 adev->gfx.config.max_hw_contexts = 8;
1638 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1639 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1640 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1641 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1642 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1644 case CHIP_POLARIS11:
1645 case CHIP_POLARIS12:
1646 ret = amdgpu_atombios_get_gfx_info(adev);
1649 adev->gfx.config.max_gprs = 256;
1650 adev->gfx.config.max_gs_threads = 32;
1651 adev->gfx.config.max_hw_contexts = 8;
1653 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1654 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1655 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1656 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1657 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1659 case CHIP_POLARIS10:
1660 ret = amdgpu_atombios_get_gfx_info(adev);
1663 adev->gfx.config.max_gprs = 256;
1664 adev->gfx.config.max_gs_threads = 32;
1665 adev->gfx.config.max_hw_contexts = 8;
1667 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1668 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1669 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1670 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1671 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1674 adev->gfx.config.max_shader_engines = 4;
1675 adev->gfx.config.max_tile_pipes = 8;
1676 adev->gfx.config.max_cu_per_sh = 8;
1677 adev->gfx.config.max_sh_per_se = 1;
1678 adev->gfx.config.max_backends_per_se = 2;
1679 adev->gfx.config.max_texture_channel_caches = 8;
1680 adev->gfx.config.max_gprs = 256;
1681 adev->gfx.config.max_gs_threads = 32;
1682 adev->gfx.config.max_hw_contexts = 8;
1684 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1685 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1686 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1687 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1688 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1691 adev->gfx.config.max_shader_engines = 1;
1692 adev->gfx.config.max_tile_pipes = 2;
1693 adev->gfx.config.max_sh_per_se = 1;
1694 adev->gfx.config.max_backends_per_se = 2;
1695 adev->gfx.config.max_cu_per_sh = 8;
1696 adev->gfx.config.max_texture_channel_caches = 2;
1697 adev->gfx.config.max_gprs = 256;
1698 adev->gfx.config.max_gs_threads = 32;
1699 adev->gfx.config.max_hw_contexts = 8;
1701 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1702 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1703 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1704 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1705 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1708 adev->gfx.config.max_shader_engines = 1;
1709 adev->gfx.config.max_tile_pipes = 2;
1710 adev->gfx.config.max_sh_per_se = 1;
1711 adev->gfx.config.max_backends_per_se = 1;
1712 adev->gfx.config.max_cu_per_sh = 3;
1713 adev->gfx.config.max_texture_channel_caches = 2;
1714 adev->gfx.config.max_gprs = 256;
1715 adev->gfx.config.max_gs_threads = 16;
1716 adev->gfx.config.max_hw_contexts = 8;
1718 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1719 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1720 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1721 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1722 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1725 adev->gfx.config.max_shader_engines = 2;
1726 adev->gfx.config.max_tile_pipes = 4;
1727 adev->gfx.config.max_cu_per_sh = 2;
1728 adev->gfx.config.max_sh_per_se = 1;
1729 adev->gfx.config.max_backends_per_se = 2;
1730 adev->gfx.config.max_texture_channel_caches = 4;
1731 adev->gfx.config.max_gprs = 256;
1732 adev->gfx.config.max_gs_threads = 32;
1733 adev->gfx.config.max_hw_contexts = 8;
1735 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1736 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1737 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1738 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1739 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1743 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1744 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1745 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1747 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1748 adev->gfx.config.mem_max_burst_length_bytes = 256;
1749 if (adev->flags & AMD_IS_APU) {
1750 /* Get memory bank mapping mode. */
1751 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1752 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1753 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1755 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1756 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1757 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1759 /* Validate settings in case only one DIMM installed. */
1760 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1761 dimm00_addr_map = 0;
1762 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1763 dimm01_addr_map = 0;
1764 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1765 dimm10_addr_map = 0;
1766 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1767 dimm11_addr_map = 0;
1769 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1770 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1771 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1772 adev->gfx.config.mem_row_size_in_kb = 2;
1774 adev->gfx.config.mem_row_size_in_kb = 1;
1776 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1777 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1778 if (adev->gfx.config.mem_row_size_in_kb > 4)
1779 adev->gfx.config.mem_row_size_in_kb = 4;
1782 adev->gfx.config.shader_engine_tile_size = 32;
1783 adev->gfx.config.num_gpus = 1;
1784 adev->gfx.config.multi_gpu_tile_size = 64;
1786 /* fix up row size */
1787 switch (adev->gfx.config.mem_row_size_in_kb) {
1790 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1793 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1796 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1799 adev->gfx.config.gb_addr_config = gb_addr_config;
1804 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1805 int mec, int pipe, int queue)
1809 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1811 ring = &adev->gfx.compute_ring[ring_id];
1816 ring->queue = queue;
1818 ring->ring_obj = NULL;
1819 ring->use_doorbell = true;
1820 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1821 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1822 + (ring_id * GFX8_MEC_HPD_SIZE);
1823 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1825 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1826 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1829 /* type-2 packets are deprecated on MEC, use type-3 instead */
1830 r = amdgpu_ring_init(adev, ring, 1024,
1831 &adev->gfx.eop_irq, irq_type);
1839 static int gfx_v8_0_sw_init(void *handle)
1841 int i, j, k, r, ring_id;
1842 struct amdgpu_ring *ring;
1843 struct amdgpu_kiq *kiq;
1844 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1846 switch (adev->asic_type) {
1849 case CHIP_POLARIS11:
1850 case CHIP_POLARIS12:
1851 case CHIP_POLARIS10:
1853 adev->gfx.mec.num_mec = 2;
1858 adev->gfx.mec.num_mec = 1;
1862 adev->gfx.mec.num_pipe_per_mec = 4;
1863 adev->gfx.mec.num_queue_per_pipe = 8;
1866 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1871 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1875 /* Privileged reg */
1876 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1877 &adev->gfx.priv_reg_irq);
1881 /* Privileged inst */
1882 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1883 &adev->gfx.priv_inst_irq);
1887 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1889 gfx_v8_0_scratch_init(adev);
1891 r = gfx_v8_0_init_microcode(adev);
1893 DRM_ERROR("Failed to load gfx firmware!\n");
1897 r = gfx_v8_0_rlc_init(adev);
1899 DRM_ERROR("Failed to init rlc BOs!\n");
1903 r = gfx_v8_0_mec_init(adev);
1905 DRM_ERROR("Failed to init MEC BOs!\n");
1909 /* set up the gfx ring */
1910 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1911 ring = &adev->gfx.gfx_ring[i];
1912 ring->ring_obj = NULL;
1913 sprintf(ring->name, "gfx");
1914 /* no gfx doorbells on iceland */
1915 if (adev->asic_type != CHIP_TOPAZ) {
1916 ring->use_doorbell = true;
1917 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1920 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1921 AMDGPU_CP_IRQ_GFX_EOP);
1927 /* set up the compute queues - allocate horizontally across pipes */
1929 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1930 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1931 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1932 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1935 r = gfx_v8_0_compute_ring_init(adev,
1946 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
1948 DRM_ERROR("Failed to init KIQ BOs!\n");
1952 kiq = &adev->gfx.kiq;
1953 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1957 /* create MQD for all compute queues as well as KIQ for SRIOV case */
1958 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
1962 /* reserve GDS, GWS and OA resource for gfx */
1963 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
1964 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
1965 &adev->gds.gds_gfx_bo, NULL, NULL);
1969 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
1970 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
1971 &adev->gds.gws_gfx_bo, NULL, NULL);
1975 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
1976 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
1977 &adev->gds.oa_gfx_bo, NULL, NULL);
1981 adev->gfx.ce_ram_size = 0x8000;
1983 r = gfx_v8_0_gpu_early_init(adev);
1990 static int gfx_v8_0_sw_fini(void *handle)
1993 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1995 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
1996 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
1997 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
1999 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2000 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2001 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2002 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2004 amdgpu_gfx_compute_mqd_sw_fini(adev);
2005 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2006 amdgpu_gfx_kiq_fini(adev);
2008 gfx_v8_0_mec_fini(adev);
2009 gfx_v8_0_rlc_fini(adev);
2010 gfx_v8_0_free_microcode(adev);
2015 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2017 uint32_t *modearray, *mod2array;
2018 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2019 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2022 modearray = adev->gfx.config.tile_mode_array;
2023 mod2array = adev->gfx.config.macrotile_mode_array;
2025 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2026 modearray[reg_offset] = 0;
2028 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2029 mod2array[reg_offset] = 0;
2031 switch (adev->asic_type) {
2033 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2034 PIPE_CONFIG(ADDR_SURF_P2) |
2035 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2036 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2037 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2038 PIPE_CONFIG(ADDR_SURF_P2) |
2039 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2040 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2041 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2042 PIPE_CONFIG(ADDR_SURF_P2) |
2043 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2044 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2045 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2046 PIPE_CONFIG(ADDR_SURF_P2) |
2047 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2048 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2049 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2050 PIPE_CONFIG(ADDR_SURF_P2) |
2051 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2052 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2053 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2054 PIPE_CONFIG(ADDR_SURF_P2) |
2055 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2056 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2057 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2058 PIPE_CONFIG(ADDR_SURF_P2) |
2059 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2060 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2061 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2062 PIPE_CONFIG(ADDR_SURF_P2));
2063 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2064 PIPE_CONFIG(ADDR_SURF_P2) |
2065 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2066 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2067 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2068 PIPE_CONFIG(ADDR_SURF_P2) |
2069 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2072 PIPE_CONFIG(ADDR_SURF_P2) |
2073 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2074 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2075 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2076 PIPE_CONFIG(ADDR_SURF_P2) |
2077 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2079 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2080 PIPE_CONFIG(ADDR_SURF_P2) |
2081 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2082 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2083 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2084 PIPE_CONFIG(ADDR_SURF_P2) |
2085 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2087 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2088 PIPE_CONFIG(ADDR_SURF_P2) |
2089 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2091 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2092 PIPE_CONFIG(ADDR_SURF_P2) |
2093 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2094 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2095 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2096 PIPE_CONFIG(ADDR_SURF_P2) |
2097 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2099 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2100 PIPE_CONFIG(ADDR_SURF_P2) |
2101 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2102 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2103 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2104 PIPE_CONFIG(ADDR_SURF_P2) |
2105 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2106 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2107 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2108 PIPE_CONFIG(ADDR_SURF_P2) |
2109 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2111 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2112 PIPE_CONFIG(ADDR_SURF_P2) |
2113 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2114 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2115 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2116 PIPE_CONFIG(ADDR_SURF_P2) |
2117 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2118 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2119 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2120 PIPE_CONFIG(ADDR_SURF_P2) |
2121 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2122 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2123 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2124 PIPE_CONFIG(ADDR_SURF_P2) |
2125 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2126 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2127 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2128 PIPE_CONFIG(ADDR_SURF_P2) |
2129 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2130 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2131 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2132 PIPE_CONFIG(ADDR_SURF_P2) |
2133 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2134 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2136 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2137 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2138 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2139 NUM_BANKS(ADDR_SURF_8_BANK));
2140 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2141 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2142 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2143 NUM_BANKS(ADDR_SURF_8_BANK));
2144 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2145 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2146 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2147 NUM_BANKS(ADDR_SURF_8_BANK));
2148 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2149 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2150 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2151 NUM_BANKS(ADDR_SURF_8_BANK));
2152 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2153 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2154 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2155 NUM_BANKS(ADDR_SURF_8_BANK));
2156 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2157 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2158 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2159 NUM_BANKS(ADDR_SURF_8_BANK));
2160 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2161 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2162 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2163 NUM_BANKS(ADDR_SURF_8_BANK));
2164 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2167 NUM_BANKS(ADDR_SURF_16_BANK));
2168 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2169 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2170 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2171 NUM_BANKS(ADDR_SURF_16_BANK));
2172 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2173 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2174 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2175 NUM_BANKS(ADDR_SURF_16_BANK));
2176 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2177 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2178 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2179 NUM_BANKS(ADDR_SURF_16_BANK));
2180 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2181 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2182 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2183 NUM_BANKS(ADDR_SURF_16_BANK));
2184 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2187 NUM_BANKS(ADDR_SURF_16_BANK));
2188 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2189 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2190 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2191 NUM_BANKS(ADDR_SURF_8_BANK));
2193 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2194 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2196 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2198 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2199 if (reg_offset != 7)
2200 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2204 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2205 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2206 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2207 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2208 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2209 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2210 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2211 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2212 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2213 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2214 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2215 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2216 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2217 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2218 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2219 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2220 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2221 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2222 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2223 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2224 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2225 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2226 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2227 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2228 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2229 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2230 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2231 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2233 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2234 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2235 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2237 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2238 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2239 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2240 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2241 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2242 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2244 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2246 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2247 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2248 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2250 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2251 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2252 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2254 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2255 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2256 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2257 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2258 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2260 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2263 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2264 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2266 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2267 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2268 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2270 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2271 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2272 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2274 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2275 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2276 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2278 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2279 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2280 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2282 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2283 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2284 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2286 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2287 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2291 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2295 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2299 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2303 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2307 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2311 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2314 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2318 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2319 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2322 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2327 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2330 NUM_BANKS(ADDR_SURF_8_BANK));
2331 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2333 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2334 NUM_BANKS(ADDR_SURF_8_BANK));
2335 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2337 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2338 NUM_BANKS(ADDR_SURF_8_BANK));
2339 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342 NUM_BANKS(ADDR_SURF_8_BANK));
2343 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2346 NUM_BANKS(ADDR_SURF_8_BANK));
2347 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2350 NUM_BANKS(ADDR_SURF_8_BANK));
2351 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2354 NUM_BANKS(ADDR_SURF_8_BANK));
2355 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 NUM_BANKS(ADDR_SURF_8_BANK));
2359 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2362 NUM_BANKS(ADDR_SURF_8_BANK));
2363 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2366 NUM_BANKS(ADDR_SURF_8_BANK));
2367 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2370 NUM_BANKS(ADDR_SURF_8_BANK));
2371 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2374 NUM_BANKS(ADDR_SURF_8_BANK));
2375 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2378 NUM_BANKS(ADDR_SURF_8_BANK));
2379 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382 NUM_BANKS(ADDR_SURF_4_BANK));
2384 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2385 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2387 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2388 if (reg_offset != 7)
2389 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2393 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2395 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2396 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2397 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2399 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2401 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2403 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2404 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2405 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2407 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2408 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2409 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2411 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2413 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2415 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2418 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2419 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2420 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2421 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2423 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2424 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2425 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2426 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2427 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2428 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2429 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2432 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2433 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2437 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2439 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2443 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2444 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2447 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2448 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2449 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2452 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2453 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2456 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2457 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2459 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2461 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2463 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2464 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2465 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2467 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2468 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2471 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2472 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2475 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2476 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2479 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2480 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2483 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2484 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2485 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2487 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2488 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2491 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2492 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2495 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2496 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2498 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2499 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2500 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2502 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2503 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2506 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2507 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2508 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2510 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2511 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2512 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2513 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2514 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2516 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2519 NUM_BANKS(ADDR_SURF_16_BANK));
2520 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523 NUM_BANKS(ADDR_SURF_16_BANK));
2524 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2527 NUM_BANKS(ADDR_SURF_16_BANK));
2528 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2531 NUM_BANKS(ADDR_SURF_16_BANK));
2532 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2535 NUM_BANKS(ADDR_SURF_16_BANK));
2536 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2538 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2539 NUM_BANKS(ADDR_SURF_16_BANK));
2540 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2543 NUM_BANKS(ADDR_SURF_16_BANK));
2544 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2547 NUM_BANKS(ADDR_SURF_16_BANK));
2548 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2550 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2551 NUM_BANKS(ADDR_SURF_16_BANK));
2552 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2554 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2555 NUM_BANKS(ADDR_SURF_16_BANK));
2556 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2558 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2559 NUM_BANKS(ADDR_SURF_16_BANK));
2560 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2562 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2563 NUM_BANKS(ADDR_SURF_8_BANK));
2564 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2566 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2567 NUM_BANKS(ADDR_SURF_4_BANK));
2568 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2570 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2571 NUM_BANKS(ADDR_SURF_4_BANK));
2573 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2574 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2576 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2577 if (reg_offset != 7)
2578 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2581 case CHIP_POLARIS11:
2582 case CHIP_POLARIS12:
2583 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2584 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2585 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2586 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2587 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2588 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2589 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2590 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2591 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2592 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2593 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2594 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2595 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2597 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2598 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2599 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2600 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2601 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2602 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2603 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2604 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2606 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2607 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2608 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2609 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2610 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2611 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2612 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2613 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2614 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2615 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2616 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2617 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2618 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2619 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2620 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2623 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2624 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2625 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2626 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2627 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2628 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2629 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2631 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2632 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2633 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2634 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2636 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2637 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2638 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2640 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2641 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2642 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2644 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2645 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2646 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2649 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2650 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2652 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2653 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2654 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2656 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2657 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2658 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2660 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2661 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2662 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2664 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2665 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2666 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2669 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2670 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2672 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2673 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2674 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2676 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2677 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2678 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2680 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2681 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2682 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2685 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2686 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2688 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2689 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2690 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2692 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2693 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2696 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2700 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2701 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2702 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2704 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2706 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2708 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2709 NUM_BANKS(ADDR_SURF_16_BANK));
2711 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2712 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2713 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2714 NUM_BANKS(ADDR_SURF_16_BANK));
2716 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2718 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2719 NUM_BANKS(ADDR_SURF_16_BANK));
2721 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2723 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2724 NUM_BANKS(ADDR_SURF_16_BANK));
2726 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2729 NUM_BANKS(ADDR_SURF_16_BANK));
2731 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2734 NUM_BANKS(ADDR_SURF_16_BANK));
2736 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2738 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2739 NUM_BANKS(ADDR_SURF_16_BANK));
2741 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2742 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2743 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2744 NUM_BANKS(ADDR_SURF_16_BANK));
2746 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2747 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2748 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2749 NUM_BANKS(ADDR_SURF_16_BANK));
2751 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2753 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2754 NUM_BANKS(ADDR_SURF_16_BANK));
2756 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2758 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2759 NUM_BANKS(ADDR_SURF_16_BANK));
2761 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2763 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2764 NUM_BANKS(ADDR_SURF_16_BANK));
2766 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2768 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2769 NUM_BANKS(ADDR_SURF_8_BANK));
2771 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2773 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2774 NUM_BANKS(ADDR_SURF_4_BANK));
2776 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2777 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2779 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2780 if (reg_offset != 7)
2781 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2784 case CHIP_POLARIS10:
2785 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2787 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2788 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2789 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2790 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2791 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2792 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2793 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2794 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2795 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2796 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2797 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2799 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2800 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2801 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2802 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2803 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2804 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2805 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2806 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2807 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2808 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2809 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2810 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2811 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2812 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2813 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2814 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2816 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2817 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2818 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2819 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2820 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2821 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2822 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2823 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2824 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2825 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2826 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2827 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2828 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2829 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2830 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2831 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2832 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2834 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2835 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2836 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2837 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2838 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2839 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2841 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2842 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2843 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2844 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2845 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2848 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2849 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2851 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2852 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2855 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2856 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2857 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2859 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2860 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2861 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2863 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2864 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2865 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2867 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2868 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2869 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2871 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2872 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2875 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2876 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2877 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2879 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2880 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2882 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2883 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2884 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2886 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2887 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2888 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2890 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2891 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2892 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2894 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2895 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2898 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2899 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2900 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2902 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2903 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2904 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2905 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2906 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2908 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2909 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2910 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2911 NUM_BANKS(ADDR_SURF_16_BANK));
2913 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2914 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2915 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2916 NUM_BANKS(ADDR_SURF_16_BANK));
2918 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2920 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2921 NUM_BANKS(ADDR_SURF_16_BANK));
2923 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2925 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2926 NUM_BANKS(ADDR_SURF_16_BANK));
2928 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2929 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2930 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2931 NUM_BANKS(ADDR_SURF_16_BANK));
2933 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2935 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2936 NUM_BANKS(ADDR_SURF_16_BANK));
2938 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2941 NUM_BANKS(ADDR_SURF_16_BANK));
2943 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2944 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2945 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2946 NUM_BANKS(ADDR_SURF_16_BANK));
2948 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2949 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2950 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2951 NUM_BANKS(ADDR_SURF_16_BANK));
2953 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2954 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2955 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2956 NUM_BANKS(ADDR_SURF_16_BANK));
2958 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2959 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2960 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2961 NUM_BANKS(ADDR_SURF_16_BANK));
2963 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2965 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2966 NUM_BANKS(ADDR_SURF_8_BANK));
2968 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2970 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2971 NUM_BANKS(ADDR_SURF_4_BANK));
2973 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2974 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2975 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2976 NUM_BANKS(ADDR_SURF_4_BANK));
2978 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2979 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2981 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2982 if (reg_offset != 7)
2983 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2987 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2988 PIPE_CONFIG(ADDR_SURF_P2) |
2989 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2990 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2991 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2992 PIPE_CONFIG(ADDR_SURF_P2) |
2993 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2994 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2995 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2996 PIPE_CONFIG(ADDR_SURF_P2) |
2997 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2998 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2999 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3000 PIPE_CONFIG(ADDR_SURF_P2) |
3001 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3002 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3003 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3004 PIPE_CONFIG(ADDR_SURF_P2) |
3005 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3006 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3007 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3008 PIPE_CONFIG(ADDR_SURF_P2) |
3009 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3010 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3011 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3012 PIPE_CONFIG(ADDR_SURF_P2) |
3013 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3014 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3015 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3016 PIPE_CONFIG(ADDR_SURF_P2));
3017 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3018 PIPE_CONFIG(ADDR_SURF_P2) |
3019 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3020 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3022 PIPE_CONFIG(ADDR_SURF_P2) |
3023 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3024 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3025 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026 PIPE_CONFIG(ADDR_SURF_P2) |
3027 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3029 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3030 PIPE_CONFIG(ADDR_SURF_P2) |
3031 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3033 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3034 PIPE_CONFIG(ADDR_SURF_P2) |
3035 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3036 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3037 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3038 PIPE_CONFIG(ADDR_SURF_P2) |
3039 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3041 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3042 PIPE_CONFIG(ADDR_SURF_P2) |
3043 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3044 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3045 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3046 PIPE_CONFIG(ADDR_SURF_P2) |
3047 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3049 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3050 PIPE_CONFIG(ADDR_SURF_P2) |
3051 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3052 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3053 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3054 PIPE_CONFIG(ADDR_SURF_P2) |
3055 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3057 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3058 PIPE_CONFIG(ADDR_SURF_P2) |
3059 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3061 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3062 PIPE_CONFIG(ADDR_SURF_P2) |
3063 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3065 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3066 PIPE_CONFIG(ADDR_SURF_P2) |
3067 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3068 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3069 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3070 PIPE_CONFIG(ADDR_SURF_P2) |
3071 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3073 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3074 PIPE_CONFIG(ADDR_SURF_P2) |
3075 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3077 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3078 PIPE_CONFIG(ADDR_SURF_P2) |
3079 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3081 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3082 PIPE_CONFIG(ADDR_SURF_P2) |
3083 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3085 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3086 PIPE_CONFIG(ADDR_SURF_P2) |
3087 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3090 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3091 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3092 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3093 NUM_BANKS(ADDR_SURF_8_BANK));
3094 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3095 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3096 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3097 NUM_BANKS(ADDR_SURF_8_BANK));
3098 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3099 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3100 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3101 NUM_BANKS(ADDR_SURF_8_BANK));
3102 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3103 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3104 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3105 NUM_BANKS(ADDR_SURF_8_BANK));
3106 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3107 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3108 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3109 NUM_BANKS(ADDR_SURF_8_BANK));
3110 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3111 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3112 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3113 NUM_BANKS(ADDR_SURF_8_BANK));
3114 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3115 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3116 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3117 NUM_BANKS(ADDR_SURF_8_BANK));
3118 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3119 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3120 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3121 NUM_BANKS(ADDR_SURF_16_BANK));
3122 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3123 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3124 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3125 NUM_BANKS(ADDR_SURF_16_BANK));
3126 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3127 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3128 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3129 NUM_BANKS(ADDR_SURF_16_BANK));
3130 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3131 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3132 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3133 NUM_BANKS(ADDR_SURF_16_BANK));
3134 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3135 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3136 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3137 NUM_BANKS(ADDR_SURF_16_BANK));
3138 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3139 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3140 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3141 NUM_BANKS(ADDR_SURF_16_BANK));
3142 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3143 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3144 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3145 NUM_BANKS(ADDR_SURF_8_BANK));
3147 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3148 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3150 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3152 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3153 if (reg_offset != 7)
3154 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3159 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3163 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3164 PIPE_CONFIG(ADDR_SURF_P2) |
3165 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3166 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3167 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3168 PIPE_CONFIG(ADDR_SURF_P2) |
3169 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3170 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3171 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3172 PIPE_CONFIG(ADDR_SURF_P2) |
3173 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3174 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3175 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3176 PIPE_CONFIG(ADDR_SURF_P2) |
3177 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3178 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3179 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3180 PIPE_CONFIG(ADDR_SURF_P2) |
3181 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3182 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3183 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3184 PIPE_CONFIG(ADDR_SURF_P2) |
3185 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3186 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3187 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3188 PIPE_CONFIG(ADDR_SURF_P2) |
3189 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3190 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3191 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3192 PIPE_CONFIG(ADDR_SURF_P2));
3193 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3194 PIPE_CONFIG(ADDR_SURF_P2) |
3195 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3197 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3198 PIPE_CONFIG(ADDR_SURF_P2) |
3199 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3200 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3201 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3202 PIPE_CONFIG(ADDR_SURF_P2) |
3203 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3205 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3206 PIPE_CONFIG(ADDR_SURF_P2) |
3207 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3209 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3210 PIPE_CONFIG(ADDR_SURF_P2) |
3211 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3212 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3213 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3214 PIPE_CONFIG(ADDR_SURF_P2) |
3215 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3216 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3217 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3218 PIPE_CONFIG(ADDR_SURF_P2) |
3219 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3220 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3221 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3222 PIPE_CONFIG(ADDR_SURF_P2) |
3223 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3224 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3225 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3226 PIPE_CONFIG(ADDR_SURF_P2) |
3227 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3228 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3229 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3230 PIPE_CONFIG(ADDR_SURF_P2) |
3231 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3232 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3233 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3234 PIPE_CONFIG(ADDR_SURF_P2) |
3235 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3236 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3237 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3238 PIPE_CONFIG(ADDR_SURF_P2) |
3239 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3240 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3241 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3242 PIPE_CONFIG(ADDR_SURF_P2) |
3243 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3244 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3245 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3246 PIPE_CONFIG(ADDR_SURF_P2) |
3247 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3248 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3249 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3250 PIPE_CONFIG(ADDR_SURF_P2) |
3251 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3253 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3254 PIPE_CONFIG(ADDR_SURF_P2) |
3255 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3256 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3257 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3258 PIPE_CONFIG(ADDR_SURF_P2) |
3259 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3260 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3261 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3262 PIPE_CONFIG(ADDR_SURF_P2) |
3263 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3264 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3266 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3269 NUM_BANKS(ADDR_SURF_8_BANK));
3270 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3271 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3272 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3273 NUM_BANKS(ADDR_SURF_8_BANK));
3274 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3275 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3276 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3277 NUM_BANKS(ADDR_SURF_8_BANK));
3278 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3281 NUM_BANKS(ADDR_SURF_8_BANK));
3282 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3283 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3284 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3285 NUM_BANKS(ADDR_SURF_8_BANK));
3286 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3287 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3288 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3289 NUM_BANKS(ADDR_SURF_8_BANK));
3290 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3291 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3292 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3293 NUM_BANKS(ADDR_SURF_8_BANK));
3294 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3295 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3296 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3297 NUM_BANKS(ADDR_SURF_16_BANK));
3298 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3299 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3300 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3301 NUM_BANKS(ADDR_SURF_16_BANK));
3302 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3303 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3304 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3305 NUM_BANKS(ADDR_SURF_16_BANK));
3306 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3307 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3308 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3309 NUM_BANKS(ADDR_SURF_16_BANK));
3310 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3311 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3312 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3313 NUM_BANKS(ADDR_SURF_16_BANK));
3314 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3315 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3316 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3317 NUM_BANKS(ADDR_SURF_16_BANK));
3318 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3319 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3320 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3321 NUM_BANKS(ADDR_SURF_8_BANK));
3323 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3324 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3326 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3328 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3329 if (reg_offset != 7)
3330 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3336 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3337 u32 se_num, u32 sh_num, u32 instance)
3341 if (instance == 0xffffffff)
3342 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3344 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3346 if (se_num == 0xffffffff)
3347 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3349 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3351 if (sh_num == 0xffffffff)
3352 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3354 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3356 WREG32(mmGRBM_GFX_INDEX, data);
3359 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3363 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3364 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3366 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3368 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3369 adev->gfx.config.max_sh_per_se);
3371 return (~data) & mask;
3375 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3377 switch (adev->asic_type) {
3379 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3380 RB_XSEL2(1) | PKR_MAP(2) |
3381 PKR_XSEL(1) | PKR_YSEL(1) |
3382 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3383 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3387 case CHIP_POLARIS10:
3388 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3389 SE_XSEL(1) | SE_YSEL(1);
3390 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3395 *rconf |= RB_MAP_PKR0(2);
3398 case CHIP_POLARIS11:
3399 case CHIP_POLARIS12:
3400 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3401 SE_XSEL(1) | SE_YSEL(1);
3409 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3415 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3416 u32 raster_config, u32 raster_config_1,
3417 unsigned rb_mask, unsigned num_rb)
3419 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3420 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3421 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3422 unsigned rb_per_se = num_rb / num_se;
3423 unsigned se_mask[4];
3426 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3427 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3428 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3429 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3431 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3432 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3433 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3435 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3436 (!se_mask[2] && !se_mask[3]))) {
3437 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3439 if (!se_mask[0] && !se_mask[1]) {
3441 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3444 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3448 for (se = 0; se < num_se; se++) {
3449 unsigned raster_config_se = raster_config;
3450 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3451 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3452 int idx = (se / 2) * 2;
3454 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3455 raster_config_se &= ~SE_MAP_MASK;
3457 if (!se_mask[idx]) {
3458 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3460 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3464 pkr0_mask &= rb_mask;
3465 pkr1_mask &= rb_mask;
3466 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3467 raster_config_se &= ~PKR_MAP_MASK;
3470 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3472 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3476 if (rb_per_se >= 2) {
3477 unsigned rb0_mask = 1 << (se * rb_per_se);
3478 unsigned rb1_mask = rb0_mask << 1;
3480 rb0_mask &= rb_mask;
3481 rb1_mask &= rb_mask;
3482 if (!rb0_mask || !rb1_mask) {
3483 raster_config_se &= ~RB_MAP_PKR0_MASK;
3487 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3490 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3494 if (rb_per_se > 2) {
3495 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3496 rb1_mask = rb0_mask << 1;
3497 rb0_mask &= rb_mask;
3498 rb1_mask &= rb_mask;
3499 if (!rb0_mask || !rb1_mask) {
3500 raster_config_se &= ~RB_MAP_PKR1_MASK;
3504 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3507 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3513 /* GRBM_GFX_INDEX has a different offset on VI */
3514 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3515 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3516 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3519 /* GRBM_GFX_INDEX has a different offset on VI */
3520 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3523 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3527 u32 raster_config = 0, raster_config_1 = 0;
3529 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3530 adev->gfx.config.max_sh_per_se;
3531 unsigned num_rb_pipes;
3533 mutex_lock(&adev->grbm_idx_mutex);
3534 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3535 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3536 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3537 data = gfx_v8_0_get_rb_active_bitmap(adev);
3538 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3539 rb_bitmap_width_per_sh);
3542 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3544 adev->gfx.config.backend_enable_mask = active_rbs;
3545 adev->gfx.config.num_rbs = hweight32(active_rbs);
3547 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3548 adev->gfx.config.max_shader_engines, 16);
3550 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3552 if (!adev->gfx.config.backend_enable_mask ||
3553 adev->gfx.config.num_rbs >= num_rb_pipes) {
3554 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3555 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3557 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3558 adev->gfx.config.backend_enable_mask,
3562 /* cache the values for userspace */
3563 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3564 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3565 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3566 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3567 RREG32(mmCC_RB_BACKEND_DISABLE);
3568 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3569 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3570 adev->gfx.config.rb_config[i][j].raster_config =
3571 RREG32(mmPA_SC_RASTER_CONFIG);
3572 adev->gfx.config.rb_config[i][j].raster_config_1 =
3573 RREG32(mmPA_SC_RASTER_CONFIG_1);
3576 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3577 mutex_unlock(&adev->grbm_idx_mutex);
3581 * gfx_v8_0_init_compute_vmid - gart enable
3583 * @adev: amdgpu_device pointer
3585 * Initialize compute vmid sh_mem registers
3588 #define DEFAULT_SH_MEM_BASES (0x6000)
3589 #define FIRST_COMPUTE_VMID (8)
3590 #define LAST_COMPUTE_VMID (16)
3591 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3594 uint32_t sh_mem_config;
3595 uint32_t sh_mem_bases;
3598 * Configure apertures:
3599 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3600 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3601 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3603 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3605 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3606 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3607 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3608 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3609 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3610 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3612 mutex_lock(&adev->srbm_mutex);
3613 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3614 vi_srbm_select(adev, 0, 0, 0, i);
3615 /* CP and shaders */
3616 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3617 WREG32(mmSH_MEM_APE1_BASE, 1);
3618 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3619 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3621 vi_srbm_select(adev, 0, 0, 0, 0);
3622 mutex_unlock(&adev->srbm_mutex);
3625 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3627 switch (adev->asic_type) {
3629 adev->gfx.config.double_offchip_lds_buf = 1;
3633 adev->gfx.config.double_offchip_lds_buf = 0;
3638 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3640 u32 tmp, sh_static_mem_cfg;
3643 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3644 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3645 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3646 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3648 gfx_v8_0_tiling_mode_table_init(adev);
3649 gfx_v8_0_setup_rb(adev);
3650 gfx_v8_0_get_cu_info(adev);
3651 gfx_v8_0_config_init(adev);
3653 /* XXX SH_MEM regs */
3654 /* where to put LDS, scratch, GPUVM in FSA64 space */
3655 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3657 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3659 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3661 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3663 mutex_lock(&adev->srbm_mutex);
3664 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3665 vi_srbm_select(adev, 0, 0, 0, i);
3666 /* CP and shaders */
3668 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3669 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3670 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3671 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3672 WREG32(mmSH_MEM_CONFIG, tmp);
3673 WREG32(mmSH_MEM_BASES, 0);
3675 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3676 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3677 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3678 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3679 WREG32(mmSH_MEM_CONFIG, tmp);
3680 tmp = adev->mc.shared_aperture_start >> 48;
3681 WREG32(mmSH_MEM_BASES, tmp);
3684 WREG32(mmSH_MEM_APE1_BASE, 1);
3685 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3687 vi_srbm_select(adev, 0, 0, 0, 0);
3688 mutex_unlock(&adev->srbm_mutex);
3690 gfx_v8_0_init_compute_vmid(adev);
3692 mutex_lock(&adev->grbm_idx_mutex);
3694 * making sure that the following register writes will be broadcasted
3695 * to all the shaders
3697 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3699 WREG32(mmPA_SC_FIFO_SIZE,
3700 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3701 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3702 (adev->gfx.config.sc_prim_fifo_size_backend <<
3703 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3704 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3705 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3706 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3707 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3709 tmp = RREG32(mmSPI_ARB_PRIORITY);
3710 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3711 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3712 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3713 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3714 WREG32(mmSPI_ARB_PRIORITY, tmp);
3716 mutex_unlock(&adev->grbm_idx_mutex);
3720 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3725 mutex_lock(&adev->grbm_idx_mutex);
3726 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3727 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3728 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3729 for (k = 0; k < adev->usec_timeout; k++) {
3730 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3736 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3737 mutex_unlock(&adev->grbm_idx_mutex);
3739 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3740 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3741 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3742 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3743 for (k = 0; k < adev->usec_timeout; k++) {
3744 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3750 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3753 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3755 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3756 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3757 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3758 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3760 WREG32(mmCP_INT_CNTL_RING0, tmp);
3763 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3766 WREG32(mmRLC_CSIB_ADDR_HI,
3767 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3768 WREG32(mmRLC_CSIB_ADDR_LO,
3769 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3770 WREG32(mmRLC_CSIB_LENGTH,
3771 adev->gfx.rlc.clear_state_size);
3774 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3777 int *unique_indices,
3780 int *ind_start_offsets,
3785 bool new_entry = true;
3787 for (; ind_offset < list_size; ind_offset++) {
3791 ind_start_offsets[*offset_count] = ind_offset;
3792 *offset_count = *offset_count + 1;
3793 BUG_ON(*offset_count >= max_offset);
3796 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3803 /* look for the matching indice */
3805 indices < *indices_count;
3807 if (unique_indices[indices] ==
3808 register_list_format[ind_offset])
3812 if (indices >= *indices_count) {
3813 unique_indices[*indices_count] =
3814 register_list_format[ind_offset];
3815 indices = *indices_count;
3816 *indices_count = *indices_count + 1;
3817 BUG_ON(*indices_count >= max_indices);
3820 register_list_format[ind_offset] = indices;
3824 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3827 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3828 int indices_count = 0;
3829 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3830 int offset_count = 0;
3833 unsigned int *register_list_format =
3834 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3835 if (!register_list_format)
3837 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3838 adev->gfx.rlc.reg_list_format_size_bytes);
3840 gfx_v8_0_parse_ind_reg_list(register_list_format,
3841 RLC_FormatDirectRegListLength,
3842 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3845 sizeof(unique_indices) / sizeof(int),
3846 indirect_start_offsets,
3848 sizeof(indirect_start_offsets)/sizeof(int));
3850 /* save and restore list */
3851 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3853 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3854 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3855 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3858 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3859 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3860 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3862 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3863 list_size = list_size >> 1;
3864 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3865 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3867 /* starting offsets starts */
3868 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3869 adev->gfx.rlc.starting_offsets_start);
3870 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3871 WREG32(mmRLC_GPM_SCRATCH_DATA,
3872 indirect_start_offsets[i]);
3874 /* unique indices */
3875 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3876 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3877 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3878 if (unique_indices[i] != 0) {
3879 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3880 WREG32(data + i, unique_indices[i] >> 20);
3883 kfree(register_list_format);
3888 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3890 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3893 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3897 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3899 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3900 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3901 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3902 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3903 WREG32(mmRLC_PG_DELAY, data);
3905 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3906 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3910 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3913 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3916 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3919 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3922 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3924 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
3927 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3929 if ((adev->asic_type == CHIP_CARRIZO) ||
3930 (adev->asic_type == CHIP_STONEY)) {
3931 gfx_v8_0_init_csb(adev);
3932 gfx_v8_0_init_save_restore_list(adev);
3933 gfx_v8_0_enable_save_restore_machine(adev);
3934 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3935 gfx_v8_0_init_power_gating(adev);
3936 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3937 } else if ((adev->asic_type == CHIP_POLARIS11) ||
3938 (adev->asic_type == CHIP_POLARIS12)) {
3939 gfx_v8_0_init_csb(adev);
3940 gfx_v8_0_init_save_restore_list(adev);
3941 gfx_v8_0_enable_save_restore_machine(adev);
3942 gfx_v8_0_init_power_gating(adev);
3947 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3949 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
3951 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3952 gfx_v8_0_wait_for_rlc_serdes(adev);
3955 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3957 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3960 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3964 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3966 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
3968 /* carrizo do enable cp interrupt after cp inited */
3969 if (!(adev->flags & AMD_IS_APU))
3970 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3975 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3977 const struct rlc_firmware_header_v2_0 *hdr;
3978 const __le32 *fw_data;
3979 unsigned i, fw_size;
3981 if (!adev->gfx.rlc_fw)
3984 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3985 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3987 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3988 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3989 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3991 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3992 for (i = 0; i < fw_size; i++)
3993 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3994 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3999 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4004 gfx_v8_0_rlc_stop(adev);
4007 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4008 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4009 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4010 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4011 if (adev->asic_type == CHIP_POLARIS11 ||
4012 adev->asic_type == CHIP_POLARIS10 ||
4013 adev->asic_type == CHIP_POLARIS12) {
4014 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4016 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4020 WREG32(mmRLC_PG_CNTL, 0);
4022 gfx_v8_0_rlc_reset(adev);
4023 gfx_v8_0_init_pg(adev);
4025 if (!adev->pp_enabled) {
4026 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4027 /* legacy rlc firmware loading */
4028 r = gfx_v8_0_rlc_load_microcode(adev);
4032 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4033 AMDGPU_UCODE_ID_RLC_G);
4039 gfx_v8_0_rlc_start(adev);
4044 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4047 u32 tmp = RREG32(mmCP_ME_CNTL);
4050 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4051 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4052 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4054 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4055 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4056 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4057 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4058 adev->gfx.gfx_ring[i].ready = false;
4060 WREG32(mmCP_ME_CNTL, tmp);
4064 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4066 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4067 const struct gfx_firmware_header_v1_0 *ce_hdr;
4068 const struct gfx_firmware_header_v1_0 *me_hdr;
4069 const __le32 *fw_data;
4070 unsigned i, fw_size;
4072 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4075 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4076 adev->gfx.pfp_fw->data;
4077 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4078 adev->gfx.ce_fw->data;
4079 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4080 adev->gfx.me_fw->data;
4082 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4083 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4084 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4086 gfx_v8_0_cp_gfx_enable(adev, false);
4089 fw_data = (const __le32 *)
4090 (adev->gfx.pfp_fw->data +
4091 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4092 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4093 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4094 for (i = 0; i < fw_size; i++)
4095 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4096 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4099 fw_data = (const __le32 *)
4100 (adev->gfx.ce_fw->data +
4101 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4102 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4103 WREG32(mmCP_CE_UCODE_ADDR, 0);
4104 for (i = 0; i < fw_size; i++)
4105 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4106 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4109 fw_data = (const __le32 *)
4110 (adev->gfx.me_fw->data +
4111 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4112 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4113 WREG32(mmCP_ME_RAM_WADDR, 0);
4114 for (i = 0; i < fw_size; i++)
4115 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4116 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4121 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4124 const struct cs_section_def *sect = NULL;
4125 const struct cs_extent_def *ext = NULL;
4127 /* begin clear state */
4129 /* context control state */
4132 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4133 for (ext = sect->section; ext->extent != NULL; ++ext) {
4134 if (sect->id == SECT_CONTEXT)
4135 count += 2 + ext->reg_count;
4140 /* pa_sc_raster_config/pa_sc_raster_config1 */
4142 /* end clear state */
4150 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4152 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4153 const struct cs_section_def *sect = NULL;
4154 const struct cs_extent_def *ext = NULL;
4158 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4159 WREG32(mmCP_ENDIAN_SWAP, 0);
4160 WREG32(mmCP_DEVICE_ID, 1);
4162 gfx_v8_0_cp_gfx_enable(adev, true);
4164 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4166 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4170 /* clear state buffer */
4171 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4172 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4174 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4175 amdgpu_ring_write(ring, 0x80000000);
4176 amdgpu_ring_write(ring, 0x80000000);
4178 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4179 for (ext = sect->section; ext->extent != NULL; ++ext) {
4180 if (sect->id == SECT_CONTEXT) {
4181 amdgpu_ring_write(ring,
4182 PACKET3(PACKET3_SET_CONTEXT_REG,
4184 amdgpu_ring_write(ring,
4185 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4186 for (i = 0; i < ext->reg_count; i++)
4187 amdgpu_ring_write(ring, ext->extent[i]);
4192 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4193 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4194 switch (adev->asic_type) {
4196 case CHIP_POLARIS10:
4197 amdgpu_ring_write(ring, 0x16000012);
4198 amdgpu_ring_write(ring, 0x0000002A);
4200 case CHIP_POLARIS11:
4201 case CHIP_POLARIS12:
4202 amdgpu_ring_write(ring, 0x16000012);
4203 amdgpu_ring_write(ring, 0x00000000);
4206 amdgpu_ring_write(ring, 0x3a00161a);
4207 amdgpu_ring_write(ring, 0x0000002e);
4210 amdgpu_ring_write(ring, 0x00000002);
4211 amdgpu_ring_write(ring, 0x00000000);
4214 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4215 0x00000000 : 0x00000002);
4216 amdgpu_ring_write(ring, 0x00000000);
4219 amdgpu_ring_write(ring, 0x00000000);
4220 amdgpu_ring_write(ring, 0x00000000);
4226 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4227 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4229 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4230 amdgpu_ring_write(ring, 0);
4232 /* init the CE partitions */
4233 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4234 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4235 amdgpu_ring_write(ring, 0x8000);
4236 amdgpu_ring_write(ring, 0x8000);
4238 amdgpu_ring_commit(ring);
4242 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4245 /* no gfx doorbells on iceland */
4246 if (adev->asic_type == CHIP_TOPAZ)
4249 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4251 if (ring->use_doorbell) {
4252 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4253 DOORBELL_OFFSET, ring->doorbell_index);
4254 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4256 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4259 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4262 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4264 if (adev->flags & AMD_IS_APU)
4267 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4268 DOORBELL_RANGE_LOWER,
4269 AMDGPU_DOORBELL_GFX_RING0);
4270 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4272 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4273 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4276 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4278 struct amdgpu_ring *ring;
4281 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4284 /* Set the write pointer delay */
4285 WREG32(mmCP_RB_WPTR_DELAY, 0);
4287 /* set the RB to use vmid 0 */
4288 WREG32(mmCP_RB_VMID, 0);
4290 /* Set ring buffer size */
4291 ring = &adev->gfx.gfx_ring[0];
4292 rb_bufsz = order_base_2(ring->ring_size / 8);
4293 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4294 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4295 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4296 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4298 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4300 WREG32(mmCP_RB0_CNTL, tmp);
4302 /* Initialize the ring buffer's read and write pointers */
4303 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4305 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4307 /* set the wb address wether it's enabled or not */
4308 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4309 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4310 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4312 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4313 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4314 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4316 WREG32(mmCP_RB0_CNTL, tmp);
4318 rb_addr = ring->gpu_addr >> 8;
4319 WREG32(mmCP_RB0_BASE, rb_addr);
4320 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4322 gfx_v8_0_set_cpg_door_bell(adev, ring);
4323 /* start the ring */
4324 amdgpu_ring_clear_ring(ring);
4325 gfx_v8_0_cp_gfx_start(adev);
4327 r = amdgpu_ring_test_ring(ring);
4329 ring->ready = false;
4334 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4339 WREG32(mmCP_MEC_CNTL, 0);
4341 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4342 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4343 adev->gfx.compute_ring[i].ready = false;
4344 adev->gfx.kiq.ring.ready = false;
4349 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4351 const struct gfx_firmware_header_v1_0 *mec_hdr;
4352 const __le32 *fw_data;
4353 unsigned i, fw_size;
4355 if (!adev->gfx.mec_fw)
4358 gfx_v8_0_cp_compute_enable(adev, false);
4360 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4361 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4363 fw_data = (const __le32 *)
4364 (adev->gfx.mec_fw->data +
4365 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4366 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4369 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4370 for (i = 0; i < fw_size; i++)
4371 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4372 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4374 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4375 if (adev->gfx.mec2_fw) {
4376 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4378 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4379 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4381 fw_data = (const __le32 *)
4382 (adev->gfx.mec2_fw->data +
4383 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4384 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4386 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4387 for (i = 0; i < fw_size; i++)
4388 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4389 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4396 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4399 struct amdgpu_device *adev = ring->adev;
4401 /* tell RLC which is KIQ queue */
4402 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4404 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4405 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4407 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4410 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4412 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4413 uint32_t scratch, tmp = 0;
4414 uint64_t queue_mask = 0;
4417 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4418 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4421 /* This situation may be hit in the future if a new HW
4422 * generation exposes more than 64 queues. If so, the
4423 * definition of queue_mask needs updating */
4424 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4425 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4429 queue_mask |= (1ull << i);
4432 r = amdgpu_gfx_scratch_get(adev, &scratch);
4434 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4437 WREG32(scratch, 0xCAFEDEAD);
4439 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4441 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4442 amdgpu_gfx_scratch_free(adev, scratch);
4446 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4447 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4448 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4449 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4450 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4451 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4452 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4453 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4454 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4455 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4456 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4457 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4460 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4461 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4462 amdgpu_ring_write(kiq_ring,
4463 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4464 amdgpu_ring_write(kiq_ring,
4465 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4466 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4467 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4468 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4469 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4470 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4471 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4472 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4474 /* write to scratch for completion */
4475 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4476 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4477 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4478 amdgpu_ring_commit(kiq_ring);
4480 for (i = 0; i < adev->usec_timeout; i++) {
4481 tmp = RREG32(scratch);
4482 if (tmp == 0xDEADBEEF)
4486 if (i >= adev->usec_timeout) {
4487 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4491 amdgpu_gfx_scratch_free(adev, scratch);
4496 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4500 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4501 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4502 for (i = 0; i < adev->usec_timeout; i++) {
4503 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4507 if (i == adev->usec_timeout)
4510 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4511 WREG32(mmCP_HQD_PQ_RPTR, 0);
4512 WREG32(mmCP_HQD_PQ_WPTR, 0);
4517 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4519 struct amdgpu_device *adev = ring->adev;
4520 struct vi_mqd *mqd = ring->mqd_ptr;
4521 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4524 mqd->header = 0xC0310800;
4525 mqd->compute_pipelinestat_enable = 0x00000001;
4526 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4527 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4528 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4529 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4530 mqd->compute_misc_reserved = 0x00000003;
4531 if (!(adev->flags & AMD_IS_APU)) {
4532 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4533 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4534 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4535 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4537 eop_base_addr = ring->eop_gpu_addr >> 8;
4538 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4539 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4541 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4542 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4543 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4544 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4546 mqd->cp_hqd_eop_control = tmp;
4548 /* enable doorbell? */
4549 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4550 CP_HQD_PQ_DOORBELL_CONTROL,
4552 ring->use_doorbell ? 1 : 0);
4554 mqd->cp_hqd_pq_doorbell_control = tmp;
4556 /* set the pointer to the MQD */
4557 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4558 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4560 /* set MQD vmid to 0 */
4561 tmp = RREG32(mmCP_MQD_CONTROL);
4562 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4563 mqd->cp_mqd_control = tmp;
4565 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4566 hqd_gpu_addr = ring->gpu_addr >> 8;
4567 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4568 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4570 /* set up the HQD, this is similar to CP_RB0_CNTL */
4571 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4572 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4573 (order_base_2(ring->ring_size / 4) - 1));
4574 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4575 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4577 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4579 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4580 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4581 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4582 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4583 mqd->cp_hqd_pq_control = tmp;
4585 /* set the wb address whether it's enabled or not */
4586 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4587 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4588 mqd->cp_hqd_pq_rptr_report_addr_hi =
4589 upper_32_bits(wb_gpu_addr) & 0xffff;
4591 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4592 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4593 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4594 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4597 /* enable the doorbell if requested */
4598 if (ring->use_doorbell) {
4599 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4600 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4601 DOORBELL_OFFSET, ring->doorbell_index);
4603 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4605 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4606 DOORBELL_SOURCE, 0);
4607 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4611 mqd->cp_hqd_pq_doorbell_control = tmp;
4613 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4615 mqd->cp_hqd_pq_wptr = ring->wptr;
4616 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4618 /* set the vmid for the queue */
4619 mqd->cp_hqd_vmid = 0;
4621 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4622 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4623 mqd->cp_hqd_persistent_state = tmp;
4626 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4627 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4628 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4629 mqd->cp_hqd_ib_control = tmp;
4631 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4632 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4633 mqd->cp_hqd_iq_timer = tmp;
4635 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4636 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4637 mqd->cp_hqd_ctx_save_control = tmp;
4640 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4641 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4642 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4643 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4644 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4645 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4646 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4647 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4648 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4649 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4650 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4651 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4652 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4653 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4654 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4656 /* activate the queue */
4657 mqd->cp_hqd_active = 1;
4662 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4668 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4669 mqd_data = &mqd->cp_mqd_base_addr_lo;
4671 /* disable wptr polling */
4672 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4674 /* program all HQD registers */
4675 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4676 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4678 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4679 * This is safe since EOP RPTR==WPTR for any inactive HQD
4680 * on ASICs that do not support context-save.
4681 * EOP writes/reads can start anywhere in the ring.
4683 if (adev->asic_type != CHIP_TONGA) {
4684 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4685 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4686 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4689 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4690 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4692 /* activate the HQD */
4693 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4694 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4699 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4701 struct amdgpu_device *adev = ring->adev;
4702 struct vi_mqd *mqd = ring->mqd_ptr;
4703 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4705 gfx_v8_0_kiq_setting(ring);
4707 if (adev->gfx.in_reset) { /* for GPU_RESET case */
4708 /* reset MQD to a clean status */
4709 if (adev->gfx.mec.mqd_backup[mqd_idx])
4710 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4712 /* reset ring buffer */
4714 amdgpu_ring_clear_ring(ring);
4715 mutex_lock(&adev->srbm_mutex);
4716 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4717 gfx_v8_0_mqd_commit(adev, mqd);
4718 vi_srbm_select(adev, 0, 0, 0, 0);
4719 mutex_unlock(&adev->srbm_mutex);
4721 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4722 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4723 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4724 mutex_lock(&adev->srbm_mutex);
4725 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4726 gfx_v8_0_mqd_init(ring);
4727 gfx_v8_0_mqd_commit(adev, mqd);
4728 vi_srbm_select(adev, 0, 0, 0, 0);
4729 mutex_unlock(&adev->srbm_mutex);
4731 if (adev->gfx.mec.mqd_backup[mqd_idx])
4732 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4738 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4740 struct amdgpu_device *adev = ring->adev;
4741 struct vi_mqd *mqd = ring->mqd_ptr;
4742 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4744 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
4745 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4746 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4747 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4748 mutex_lock(&adev->srbm_mutex);
4749 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4750 gfx_v8_0_mqd_init(ring);
4751 vi_srbm_select(adev, 0, 0, 0, 0);
4752 mutex_unlock(&adev->srbm_mutex);
4754 if (adev->gfx.mec.mqd_backup[mqd_idx])
4755 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4756 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4757 /* reset MQD to a clean status */
4758 if (adev->gfx.mec.mqd_backup[mqd_idx])
4759 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4760 /* reset ring buffer */
4762 amdgpu_ring_clear_ring(ring);
4764 amdgpu_ring_clear_ring(ring);
4769 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4771 if (adev->asic_type > CHIP_TONGA) {
4772 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4773 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4775 /* enable doorbells */
4776 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4779 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4781 struct amdgpu_ring *ring = NULL;
4784 gfx_v8_0_cp_compute_enable(adev, true);
4786 ring = &adev->gfx.kiq.ring;
4788 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4789 if (unlikely(r != 0))
4792 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4794 r = gfx_v8_0_kiq_init_queue(ring);
4795 amdgpu_bo_kunmap(ring->mqd_obj);
4796 ring->mqd_ptr = NULL;
4798 amdgpu_bo_unreserve(ring->mqd_obj);
4802 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4803 ring = &adev->gfx.compute_ring[i];
4805 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4806 if (unlikely(r != 0))
4808 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4810 r = gfx_v8_0_kcq_init_queue(ring);
4811 amdgpu_bo_kunmap(ring->mqd_obj);
4812 ring->mqd_ptr = NULL;
4814 amdgpu_bo_unreserve(ring->mqd_obj);
4819 gfx_v8_0_set_mec_doorbell_range(adev);
4821 r = gfx_v8_0_kiq_kcq_enable(adev);
4826 ring = &adev->gfx.kiq.ring;
4828 r = amdgpu_ring_test_ring(ring);
4830 ring->ready = false;
4835 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4836 ring = &adev->gfx.compute_ring[i];
4838 r = amdgpu_ring_test_ring(ring);
4840 ring->ready = false;
4847 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4851 if (!(adev->flags & AMD_IS_APU))
4852 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4854 if (!adev->pp_enabled) {
4855 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) {
4856 /* legacy firmware loading */
4857 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4861 r = gfx_v8_0_cp_compute_load_microcode(adev);
4865 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4866 AMDGPU_UCODE_ID_CP_CE);
4870 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4871 AMDGPU_UCODE_ID_CP_PFP);
4875 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4876 AMDGPU_UCODE_ID_CP_ME);
4880 if (adev->asic_type == CHIP_TOPAZ) {
4881 r = gfx_v8_0_cp_compute_load_microcode(adev);
4885 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4886 AMDGPU_UCODE_ID_CP_MEC1);
4893 r = gfx_v8_0_cp_gfx_resume(adev);
4897 r = gfx_v8_0_kiq_resume(adev);
4901 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4906 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4908 gfx_v8_0_cp_gfx_enable(adev, enable);
4909 gfx_v8_0_cp_compute_enable(adev, enable);
4912 static int gfx_v8_0_hw_init(void *handle)
4915 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4917 gfx_v8_0_init_golden_registers(adev);
4918 gfx_v8_0_gpu_init(adev);
4920 r = gfx_v8_0_rlc_resume(adev);
4924 r = gfx_v8_0_cp_resume(adev);
4929 static int gfx_v8_0_hw_fini(void *handle)
4931 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4933 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4934 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4935 if (amdgpu_sriov_vf(adev)) {
4936 pr_debug("For SRIOV client, shouldn't do anything.\n");
4939 gfx_v8_0_cp_enable(adev, false);
4940 gfx_v8_0_rlc_stop(adev);
4942 amdgpu_set_powergating_state(adev,
4943 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4948 static int gfx_v8_0_suspend(void *handle)
4950 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4951 adev->gfx.in_suspend = true;
4952 return gfx_v8_0_hw_fini(adev);
4955 static int gfx_v8_0_resume(void *handle)
4958 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4960 r = gfx_v8_0_hw_init(adev);
4961 adev->gfx.in_suspend = false;
4965 static bool gfx_v8_0_is_idle(void *handle)
4967 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4969 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4975 static int gfx_v8_0_wait_for_idle(void *handle)
4978 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4980 for (i = 0; i < adev->usec_timeout; i++) {
4981 if (gfx_v8_0_is_idle(handle))
4989 static bool gfx_v8_0_check_soft_reset(void *handle)
4991 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4992 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4996 tmp = RREG32(mmGRBM_STATUS);
4997 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4998 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4999 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5000 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5001 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5002 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5003 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5004 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5005 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5006 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5007 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5008 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5009 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5013 tmp = RREG32(mmGRBM_STATUS2);
5014 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5015 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5016 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5018 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5019 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5020 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5021 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5023 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5025 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5027 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5028 SOFT_RESET_GRBM, 1);
5032 tmp = RREG32(mmSRBM_STATUS);
5033 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5034 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5035 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5036 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5037 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5038 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5040 if (grbm_soft_reset || srbm_soft_reset) {
5041 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5042 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5045 adev->gfx.grbm_soft_reset = 0;
5046 adev->gfx.srbm_soft_reset = 0;
5051 static int gfx_v8_0_pre_soft_reset(void *handle)
5053 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5054 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5056 if ((!adev->gfx.grbm_soft_reset) &&
5057 (!adev->gfx.srbm_soft_reset))
5060 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5061 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5064 gfx_v8_0_rlc_stop(adev);
5066 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5067 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5068 /* Disable GFX parsing/prefetching */
5069 gfx_v8_0_cp_gfx_enable(adev, false);
5071 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5072 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5073 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5074 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5077 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5078 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5080 mutex_lock(&adev->srbm_mutex);
5081 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5082 gfx_v8_0_deactivate_hqd(adev, 2);
5083 vi_srbm_select(adev, 0, 0, 0, 0);
5084 mutex_unlock(&adev->srbm_mutex);
5086 /* Disable MEC parsing/prefetching */
5087 gfx_v8_0_cp_compute_enable(adev, false);
5093 static int gfx_v8_0_soft_reset(void *handle)
5095 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5096 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5099 if ((!adev->gfx.grbm_soft_reset) &&
5100 (!adev->gfx.srbm_soft_reset))
5103 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5104 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5106 if (grbm_soft_reset || srbm_soft_reset) {
5107 tmp = RREG32(mmGMCON_DEBUG);
5108 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5109 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5110 WREG32(mmGMCON_DEBUG, tmp);
5114 if (grbm_soft_reset) {
5115 tmp = RREG32(mmGRBM_SOFT_RESET);
5116 tmp |= grbm_soft_reset;
5117 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5118 WREG32(mmGRBM_SOFT_RESET, tmp);
5119 tmp = RREG32(mmGRBM_SOFT_RESET);
5123 tmp &= ~grbm_soft_reset;
5124 WREG32(mmGRBM_SOFT_RESET, tmp);
5125 tmp = RREG32(mmGRBM_SOFT_RESET);
5128 if (srbm_soft_reset) {
5129 tmp = RREG32(mmSRBM_SOFT_RESET);
5130 tmp |= srbm_soft_reset;
5131 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5132 WREG32(mmSRBM_SOFT_RESET, tmp);
5133 tmp = RREG32(mmSRBM_SOFT_RESET);
5137 tmp &= ~srbm_soft_reset;
5138 WREG32(mmSRBM_SOFT_RESET, tmp);
5139 tmp = RREG32(mmSRBM_SOFT_RESET);
5142 if (grbm_soft_reset || srbm_soft_reset) {
5143 tmp = RREG32(mmGMCON_DEBUG);
5144 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5145 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5146 WREG32(mmGMCON_DEBUG, tmp);
5149 /* Wait a little for things to settle down */
5155 static int gfx_v8_0_post_soft_reset(void *handle)
5157 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5158 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5160 if ((!adev->gfx.grbm_soft_reset) &&
5161 (!adev->gfx.srbm_soft_reset))
5164 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5165 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5167 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5168 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5169 gfx_v8_0_cp_gfx_resume(adev);
5171 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5172 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5173 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5174 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5177 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5178 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5180 mutex_lock(&adev->srbm_mutex);
5181 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5182 gfx_v8_0_deactivate_hqd(adev, 2);
5183 vi_srbm_select(adev, 0, 0, 0, 0);
5184 mutex_unlock(&adev->srbm_mutex);
5186 gfx_v8_0_kiq_resume(adev);
5188 gfx_v8_0_rlc_start(adev);
5194 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5196 * @adev: amdgpu_device pointer
5198 * Fetches a GPU clock counter snapshot.
5199 * Returns the 64 bit clock counter snapshot.
5201 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5205 mutex_lock(&adev->gfx.gpu_clock_mutex);
5206 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5207 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5208 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5209 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5213 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5215 uint32_t gds_base, uint32_t gds_size,
5216 uint32_t gws_base, uint32_t gws_size,
5217 uint32_t oa_base, uint32_t oa_size)
5219 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5220 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5222 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5223 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5225 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5226 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5229 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5230 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5231 WRITE_DATA_DST_SEL(0)));
5232 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5233 amdgpu_ring_write(ring, 0);
5234 amdgpu_ring_write(ring, gds_base);
5237 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5238 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5239 WRITE_DATA_DST_SEL(0)));
5240 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5241 amdgpu_ring_write(ring, 0);
5242 amdgpu_ring_write(ring, gds_size);
5245 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5246 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5247 WRITE_DATA_DST_SEL(0)));
5248 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5249 amdgpu_ring_write(ring, 0);
5250 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5253 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5254 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5255 WRITE_DATA_DST_SEL(0)));
5256 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5257 amdgpu_ring_write(ring, 0);
5258 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5261 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5263 WREG32(mmSQ_IND_INDEX,
5264 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5265 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5266 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5267 (SQ_IND_INDEX__FORCE_READ_MASK));
5268 return RREG32(mmSQ_IND_DATA);
5271 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5272 uint32_t wave, uint32_t thread,
5273 uint32_t regno, uint32_t num, uint32_t *out)
5275 WREG32(mmSQ_IND_INDEX,
5276 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5277 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5278 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5279 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5280 (SQ_IND_INDEX__FORCE_READ_MASK) |
5281 (SQ_IND_INDEX__AUTO_INCR_MASK));
5283 *(out++) = RREG32(mmSQ_IND_DATA);
5286 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5288 /* type 0 wave data */
5289 dst[(*no_fields)++] = 0;
5290 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5291 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5292 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5293 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5294 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5295 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5296 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5297 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5298 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5299 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5300 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5301 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5302 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5303 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5304 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5305 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5306 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5307 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5310 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5311 uint32_t wave, uint32_t start,
5312 uint32_t size, uint32_t *dst)
5315 adev, simd, wave, 0,
5316 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5320 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5321 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5322 .select_se_sh = &gfx_v8_0_select_se_sh,
5323 .read_wave_data = &gfx_v8_0_read_wave_data,
5324 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5327 static int gfx_v8_0_early_init(void *handle)
5329 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5331 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5332 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5333 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5334 gfx_v8_0_set_ring_funcs(adev);
5335 gfx_v8_0_set_irq_funcs(adev);
5336 gfx_v8_0_set_gds_init(adev);
5337 gfx_v8_0_set_rlc_funcs(adev);
5342 static int gfx_v8_0_late_init(void *handle)
5344 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5347 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5351 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5355 /* requires IBs so do in late init after IB pool is initialized */
5356 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5360 amdgpu_set_powergating_state(adev,
5361 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5366 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5369 if ((adev->asic_type == CHIP_POLARIS11) ||
5370 (adev->asic_type == CHIP_POLARIS12))
5371 /* Send msg to SMU via Powerplay */
5372 amdgpu_set_powergating_state(adev,
5373 AMD_IP_BLOCK_TYPE_SMC,
5375 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5377 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5380 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5383 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5386 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5389 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5392 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5395 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5398 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5401 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5403 /* Read any GFX register to wake up GFX. */
5405 RREG32(mmDB_RENDER_CONTROL);
5408 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5411 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5412 cz_enable_gfx_cg_power_gating(adev, true);
5413 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5414 cz_enable_gfx_pipeline_power_gating(adev, true);
5416 cz_enable_gfx_cg_power_gating(adev, false);
5417 cz_enable_gfx_pipeline_power_gating(adev, false);
5421 static int gfx_v8_0_set_powergating_state(void *handle,
5422 enum amd_powergating_state state)
5424 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5425 bool enable = (state == AMD_PG_STATE_GATE);
5427 if (amdgpu_sriov_vf(adev))
5430 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5431 AMD_PG_SUPPORT_RLC_SMU_HS |
5433 AMD_PG_SUPPORT_GFX_DMG))
5434 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5435 switch (adev->asic_type) {
5439 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5440 cz_enable_sck_slow_down_on_power_up(adev, true);
5441 cz_enable_sck_slow_down_on_power_down(adev, true);
5443 cz_enable_sck_slow_down_on_power_up(adev, false);
5444 cz_enable_sck_slow_down_on_power_down(adev, false);
5446 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5447 cz_enable_cp_power_gating(adev, true);
5449 cz_enable_cp_power_gating(adev, false);
5451 cz_update_gfx_cg_power_gating(adev, enable);
5453 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5454 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5456 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5458 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5459 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5461 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5463 case CHIP_POLARIS11:
5464 case CHIP_POLARIS12:
5465 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5466 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5468 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5470 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5471 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5473 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5475 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5476 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5478 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5483 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5484 AMD_PG_SUPPORT_RLC_SMU_HS |
5486 AMD_PG_SUPPORT_GFX_DMG))
5487 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5491 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5493 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5496 if (amdgpu_sriov_vf(adev))
5499 /* AMD_CG_SUPPORT_GFX_MGCG */
5500 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5501 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5502 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5504 /* AMD_CG_SUPPORT_GFX_CGLG */
5505 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5506 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5507 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5509 /* AMD_CG_SUPPORT_GFX_CGLS */
5510 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5511 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5513 /* AMD_CG_SUPPORT_GFX_CGTS */
5514 data = RREG32(mmCGTS_SM_CTRL_REG);
5515 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5516 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5518 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5519 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5520 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5522 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5523 data = RREG32(mmRLC_MEM_SLP_CNTL);
5524 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5525 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5527 /* AMD_CG_SUPPORT_GFX_CP_LS */
5528 data = RREG32(mmCP_MEM_SLP_CNTL);
5529 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5530 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5533 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5534 uint32_t reg_addr, uint32_t cmd)
5538 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5540 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5541 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5543 data = RREG32(mmRLC_SERDES_WR_CTRL);
5544 if (adev->asic_type == CHIP_STONEY)
5545 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5546 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5547 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5548 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5549 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5550 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5551 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5552 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5553 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5555 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5556 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5557 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5558 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5559 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5560 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5561 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5562 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5563 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5564 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5565 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5566 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5567 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5568 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5569 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5571 WREG32(mmRLC_SERDES_WR_CTRL, data);
5574 #define MSG_ENTER_RLC_SAFE_MODE 1
5575 #define MSG_EXIT_RLC_SAFE_MODE 0
5576 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5577 #define RLC_GPR_REG2__REQ__SHIFT 0
5578 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5579 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5581 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5586 data = RREG32(mmRLC_CNTL);
5587 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5590 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5591 data |= RLC_SAFE_MODE__CMD_MASK;
5592 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5593 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5594 WREG32(mmRLC_SAFE_MODE, data);
5596 for (i = 0; i < adev->usec_timeout; i++) {
5597 if ((RREG32(mmRLC_GPM_STAT) &
5598 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5599 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5600 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5601 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5606 for (i = 0; i < adev->usec_timeout; i++) {
5607 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5611 adev->gfx.rlc.in_safe_mode = true;
5615 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5620 data = RREG32(mmRLC_CNTL);
5621 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5624 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5625 if (adev->gfx.rlc.in_safe_mode) {
5626 data |= RLC_SAFE_MODE__CMD_MASK;
5627 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5628 WREG32(mmRLC_SAFE_MODE, data);
5629 adev->gfx.rlc.in_safe_mode = false;
5633 for (i = 0; i < adev->usec_timeout; i++) {
5634 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5640 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5641 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5642 .exit_safe_mode = iceland_exit_rlc_safe_mode
5645 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5648 uint32_t temp, data;
5650 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5652 /* It is disabled by HW by default */
5653 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5654 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5655 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5656 /* 1 - RLC memory Light sleep */
5657 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5659 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5660 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5663 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5664 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5665 if (adev->flags & AMD_IS_APU)
5666 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5667 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5668 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5670 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5671 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5672 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5673 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5676 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5678 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5679 gfx_v8_0_wait_for_rlc_serdes(adev);
5681 /* 5 - clear mgcg override */
5682 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5684 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5685 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5686 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5687 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5688 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5689 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5690 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5691 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5692 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5693 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5694 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5695 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5697 WREG32(mmCGTS_SM_CTRL_REG, data);
5701 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5702 gfx_v8_0_wait_for_rlc_serdes(adev);
5704 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5705 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5706 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5707 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5708 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5709 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5711 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5713 /* 2 - disable MGLS in RLC */
5714 data = RREG32(mmRLC_MEM_SLP_CNTL);
5715 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5716 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5717 WREG32(mmRLC_MEM_SLP_CNTL, data);
5720 /* 3 - disable MGLS in CP */
5721 data = RREG32(mmCP_MEM_SLP_CNTL);
5722 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5723 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5724 WREG32(mmCP_MEM_SLP_CNTL, data);
5727 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5728 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5729 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5730 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5732 WREG32(mmCGTS_SM_CTRL_REG, data);
5734 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5735 gfx_v8_0_wait_for_rlc_serdes(adev);
5737 /* 6 - set mgcg override */
5738 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5742 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5743 gfx_v8_0_wait_for_rlc_serdes(adev);
5746 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5749 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5752 uint32_t temp, temp1, data, data1;
5754 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5756 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5758 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5759 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5760 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5762 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5764 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5765 gfx_v8_0_wait_for_rlc_serdes(adev);
5767 /* 2 - clear cgcg override */
5768 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5770 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5771 gfx_v8_0_wait_for_rlc_serdes(adev);
5773 /* 3 - write cmd to set CGLS */
5774 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5776 /* 4 - enable cgcg */
5777 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5779 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5781 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5783 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5784 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5787 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5789 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5793 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5795 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5796 * Cmp_busy/GFX_Idle interrupts
5798 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5800 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5801 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5804 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5805 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5806 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5808 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5810 /* read gfx register to wake up cgcg */
5811 RREG32(mmCB_CGTT_SCLK_CTRL);
5812 RREG32(mmCB_CGTT_SCLK_CTRL);
5813 RREG32(mmCB_CGTT_SCLK_CTRL);
5814 RREG32(mmCB_CGTT_SCLK_CTRL);
5816 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5817 gfx_v8_0_wait_for_rlc_serdes(adev);
5819 /* write cmd to Set CGCG Overrride */
5820 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5822 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5823 gfx_v8_0_wait_for_rlc_serdes(adev);
5825 /* write cmd to Clear CGLS */
5826 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5828 /* disable cgcg, cgls should be disabled too. */
5829 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5830 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5832 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5833 /* enable interrupts again for PG */
5834 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5837 gfx_v8_0_wait_for_rlc_serdes(adev);
5839 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5841 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5845 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5846 * === MGCG + MGLS + TS(CG/LS) ===
5848 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5849 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5851 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5852 * === CGCG + CGLS ===
5854 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5855 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5860 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5861 enum amd_clockgating_state state)
5863 uint32_t msg_id, pp_state = 0;
5864 uint32_t pp_support_state = 0;
5865 void *pp_handle = adev->powerplay.pp_handle;
5867 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5868 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5869 pp_support_state = PP_STATE_SUPPORT_LS;
5870 pp_state = PP_STATE_LS;
5872 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5873 pp_support_state |= PP_STATE_SUPPORT_CG;
5874 pp_state |= PP_STATE_CG;
5876 if (state == AMD_CG_STATE_UNGATE)
5879 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5883 amd_set_clockgating_by_smu(pp_handle, msg_id);
5886 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5887 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5888 pp_support_state = PP_STATE_SUPPORT_LS;
5889 pp_state = PP_STATE_LS;
5892 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5893 pp_support_state |= PP_STATE_SUPPORT_CG;
5894 pp_state |= PP_STATE_CG;
5897 if (state == AMD_CG_STATE_UNGATE)
5900 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5904 amd_set_clockgating_by_smu(pp_handle, msg_id);
5910 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5911 enum amd_clockgating_state state)
5914 uint32_t msg_id, pp_state = 0;
5915 uint32_t pp_support_state = 0;
5916 void *pp_handle = adev->powerplay.pp_handle;
5918 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5919 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5920 pp_support_state = PP_STATE_SUPPORT_LS;
5921 pp_state = PP_STATE_LS;
5923 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5924 pp_support_state |= PP_STATE_SUPPORT_CG;
5925 pp_state |= PP_STATE_CG;
5927 if (state == AMD_CG_STATE_UNGATE)
5930 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5934 amd_set_clockgating_by_smu(pp_handle, msg_id);
5937 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5938 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5939 pp_support_state = PP_STATE_SUPPORT_LS;
5940 pp_state = PP_STATE_LS;
5942 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5943 pp_support_state |= PP_STATE_SUPPORT_CG;
5944 pp_state |= PP_STATE_CG;
5946 if (state == AMD_CG_STATE_UNGATE)
5949 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5953 amd_set_clockgating_by_smu(pp_handle, msg_id);
5956 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5957 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5958 pp_support_state = PP_STATE_SUPPORT_LS;
5959 pp_state = PP_STATE_LS;
5962 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5963 pp_support_state |= PP_STATE_SUPPORT_CG;
5964 pp_state |= PP_STATE_CG;
5967 if (state == AMD_CG_STATE_UNGATE)
5970 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5974 amd_set_clockgating_by_smu(pp_handle, msg_id);
5977 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5978 pp_support_state = PP_STATE_SUPPORT_LS;
5980 if (state == AMD_CG_STATE_UNGATE)
5983 pp_state = PP_STATE_LS;
5985 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5989 amd_set_clockgating_by_smu(pp_handle, msg_id);
5992 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5993 pp_support_state = PP_STATE_SUPPORT_LS;
5995 if (state == AMD_CG_STATE_UNGATE)
5998 pp_state = PP_STATE_LS;
5999 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6003 amd_set_clockgating_by_smu(pp_handle, msg_id);
6009 static int gfx_v8_0_set_clockgating_state(void *handle,
6010 enum amd_clockgating_state state)
6012 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6014 if (amdgpu_sriov_vf(adev))
6017 switch (adev->asic_type) {
6021 gfx_v8_0_update_gfx_clock_gating(adev,
6022 state == AMD_CG_STATE_GATE);
6025 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6027 case CHIP_POLARIS10:
6028 case CHIP_POLARIS11:
6029 case CHIP_POLARIS12:
6030 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6038 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6040 return ring->adev->wb.wb[ring->rptr_offs];
6043 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6045 struct amdgpu_device *adev = ring->adev;
6047 if (ring->use_doorbell)
6048 /* XXX check if swapping is necessary on BE */
6049 return ring->adev->wb.wb[ring->wptr_offs];
6051 return RREG32(mmCP_RB0_WPTR);
6054 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6056 struct amdgpu_device *adev = ring->adev;
6058 if (ring->use_doorbell) {
6059 /* XXX check if swapping is necessary on BE */
6060 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6061 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6063 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6064 (void)RREG32(mmCP_RB0_WPTR);
6068 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6070 u32 ref_and_mask, reg_mem_engine;
6072 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6073 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6076 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6079 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6086 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6087 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6090 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6091 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6092 WAIT_REG_MEM_FUNCTION(3) | /* == */
6094 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6095 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6096 amdgpu_ring_write(ring, ref_and_mask);
6097 amdgpu_ring_write(ring, ref_and_mask);
6098 amdgpu_ring_write(ring, 0x20); /* poll interval */
6101 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6103 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6104 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6107 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6108 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6113 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6115 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6116 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6117 WRITE_DATA_DST_SEL(0) |
6119 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6120 amdgpu_ring_write(ring, 0);
6121 amdgpu_ring_write(ring, 1);
6125 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6126 struct amdgpu_ib *ib,
6127 unsigned vm_id, bool ctx_switch)
6129 u32 header, control = 0;
6131 if (ib->flags & AMDGPU_IB_FLAG_CE)
6132 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6134 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6136 control |= ib->length_dw | (vm_id << 24);
6138 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6139 control |= INDIRECT_BUFFER_PRE_ENB(1);
6141 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6142 gfx_v8_0_ring_emit_de_meta(ring);
6145 amdgpu_ring_write(ring, header);
6146 amdgpu_ring_write(ring,
6150 (ib->gpu_addr & 0xFFFFFFFC));
6151 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6152 amdgpu_ring_write(ring, control);
6155 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6156 struct amdgpu_ib *ib,
6157 unsigned vm_id, bool ctx_switch)
6159 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6161 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6162 amdgpu_ring_write(ring,
6166 (ib->gpu_addr & 0xFFFFFFFC));
6167 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6168 amdgpu_ring_write(ring, control);
6171 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6172 u64 seq, unsigned flags)
6174 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6175 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6177 /* EVENT_WRITE_EOP - flush caches, send int */
6178 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6179 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6181 EOP_TC_WB_ACTION_EN |
6182 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6184 amdgpu_ring_write(ring, addr & 0xfffffffc);
6185 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6186 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6187 amdgpu_ring_write(ring, lower_32_bits(seq));
6188 amdgpu_ring_write(ring, upper_32_bits(seq));
6192 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6194 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6195 uint32_t seq = ring->fence_drv.sync_seq;
6196 uint64_t addr = ring->fence_drv.gpu_addr;
6198 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6199 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6200 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6201 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6202 amdgpu_ring_write(ring, addr & 0xfffffffc);
6203 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6204 amdgpu_ring_write(ring, seq);
6205 amdgpu_ring_write(ring, 0xffffffff);
6206 amdgpu_ring_write(ring, 4); /* poll interval */
6209 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6210 unsigned vm_id, uint64_t pd_addr)
6212 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6214 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6215 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6216 WRITE_DATA_DST_SEL(0)) |
6219 amdgpu_ring_write(ring,
6220 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6222 amdgpu_ring_write(ring,
6223 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6225 amdgpu_ring_write(ring, 0);
6226 amdgpu_ring_write(ring, pd_addr >> 12);
6228 /* bits 0-15 are the VM contexts0-15 */
6229 /* invalidate the cache */
6230 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6231 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6232 WRITE_DATA_DST_SEL(0)));
6233 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6234 amdgpu_ring_write(ring, 0);
6235 amdgpu_ring_write(ring, 1 << vm_id);
6237 /* wait for the invalidate to complete */
6238 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6239 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6240 WAIT_REG_MEM_FUNCTION(0) | /* always */
6241 WAIT_REG_MEM_ENGINE(0))); /* me */
6242 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6243 amdgpu_ring_write(ring, 0);
6244 amdgpu_ring_write(ring, 0); /* ref */
6245 amdgpu_ring_write(ring, 0); /* mask */
6246 amdgpu_ring_write(ring, 0x20); /* poll interval */
6248 /* compute doesn't have PFP */
6250 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6251 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6252 amdgpu_ring_write(ring, 0x0);
6256 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6258 return ring->adev->wb.wb[ring->wptr_offs];
6261 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6263 struct amdgpu_device *adev = ring->adev;
6265 /* XXX check if swapping is necessary on BE */
6266 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6267 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6270 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6274 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6275 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6277 /* RELEASE_MEM - flush caches, send int */
6278 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6279 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6281 EOP_TC_WB_ACTION_EN |
6282 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6284 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6285 amdgpu_ring_write(ring, addr & 0xfffffffc);
6286 amdgpu_ring_write(ring, upper_32_bits(addr));
6287 amdgpu_ring_write(ring, lower_32_bits(seq));
6288 amdgpu_ring_write(ring, upper_32_bits(seq));
6291 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6292 u64 seq, unsigned int flags)
6294 /* we only allocate 32bit for each seq wb address */
6295 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6297 /* write fence seq to the "addr" */
6298 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6299 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6300 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6301 amdgpu_ring_write(ring, lower_32_bits(addr));
6302 amdgpu_ring_write(ring, upper_32_bits(addr));
6303 amdgpu_ring_write(ring, lower_32_bits(seq));
6305 if (flags & AMDGPU_FENCE_FLAG_INT) {
6306 /* set register to trigger INT */
6307 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6308 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6309 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6310 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6311 amdgpu_ring_write(ring, 0);
6312 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6316 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6318 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6319 amdgpu_ring_write(ring, 0);
6322 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6326 if (amdgpu_sriov_vf(ring->adev))
6327 gfx_v8_0_ring_emit_ce_meta(ring);
6329 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6330 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6331 gfx_v8_0_ring_emit_vgt_flush(ring);
6332 /* set load_global_config & load_global_uconfig */
6334 /* set load_cs_sh_regs */
6336 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6339 /* set load_ce_ram if preamble presented */
6340 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6343 /* still load_ce_ram if this is the first time preamble presented
6344 * although there is no context switch happens.
6346 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6350 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6351 amdgpu_ring_write(ring, dw2);
6352 amdgpu_ring_write(ring, 0);
6355 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6359 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6360 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6361 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6362 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6363 ret = ring->wptr & ring->buf_mask;
6364 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6368 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6372 BUG_ON(offset > ring->buf_mask);
6373 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6375 cur = (ring->wptr & ring->buf_mask) - 1;
6376 if (likely(cur > offset))
6377 ring->ring[offset] = cur - offset;
6379 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6382 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6384 struct amdgpu_device *adev = ring->adev;
6386 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6387 amdgpu_ring_write(ring, 0 | /* src: register*/
6388 (5 << 8) | /* dst: memory */
6389 (1 << 20)); /* write confirm */
6390 amdgpu_ring_write(ring, reg);
6391 amdgpu_ring_write(ring, 0);
6392 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6393 adev->virt.reg_val_offs * 4));
6394 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6395 adev->virt.reg_val_offs * 4));
6398 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6401 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6402 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6403 amdgpu_ring_write(ring, reg);
6404 amdgpu_ring_write(ring, 0);
6405 amdgpu_ring_write(ring, val);
6408 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6409 enum amdgpu_interrupt_state state)
6411 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6412 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6415 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6417 enum amdgpu_interrupt_state state)
6419 u32 mec_int_cntl, mec_int_cntl_reg;
6422 * amdgpu controls only the first MEC. That's why this function only
6423 * handles the setting of interrupts for this specific MEC. All other
6424 * pipes' interrupts are set by amdkfd.
6430 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6433 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6436 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6439 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6442 DRM_DEBUG("invalid pipe %d\n", pipe);
6446 DRM_DEBUG("invalid me %d\n", me);
6451 case AMDGPU_IRQ_STATE_DISABLE:
6452 mec_int_cntl = RREG32(mec_int_cntl_reg);
6453 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6454 WREG32(mec_int_cntl_reg, mec_int_cntl);
6456 case AMDGPU_IRQ_STATE_ENABLE:
6457 mec_int_cntl = RREG32(mec_int_cntl_reg);
6458 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6459 WREG32(mec_int_cntl_reg, mec_int_cntl);
6466 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6467 struct amdgpu_irq_src *source,
6469 enum amdgpu_interrupt_state state)
6471 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6472 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6477 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6478 struct amdgpu_irq_src *source,
6480 enum amdgpu_interrupt_state state)
6482 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6483 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6488 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6489 struct amdgpu_irq_src *src,
6491 enum amdgpu_interrupt_state state)
6494 case AMDGPU_CP_IRQ_GFX_EOP:
6495 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6497 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6498 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6500 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6501 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6503 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6504 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6506 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6507 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6509 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6510 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6512 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6513 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6515 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6516 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6518 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6519 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6527 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6528 struct amdgpu_irq_src *source,
6529 struct amdgpu_iv_entry *entry)
6532 u8 me_id, pipe_id, queue_id;
6533 struct amdgpu_ring *ring;
6535 DRM_DEBUG("IH: CP EOP\n");
6536 me_id = (entry->ring_id & 0x0c) >> 2;
6537 pipe_id = (entry->ring_id & 0x03) >> 0;
6538 queue_id = (entry->ring_id & 0x70) >> 4;
6542 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6546 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6547 ring = &adev->gfx.compute_ring[i];
6548 /* Per-queue interrupt is supported for MEC starting from VI.
6549 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6551 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6552 amdgpu_fence_process(ring);
6559 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6560 struct amdgpu_irq_src *source,
6561 struct amdgpu_iv_entry *entry)
6563 DRM_ERROR("Illegal register access in command stream\n");
6564 schedule_work(&adev->reset_work);
6568 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6569 struct amdgpu_irq_src *source,
6570 struct amdgpu_iv_entry *entry)
6572 DRM_ERROR("Illegal instruction in command stream\n");
6573 schedule_work(&adev->reset_work);
6577 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6578 struct amdgpu_irq_src *src,
6580 enum amdgpu_interrupt_state state)
6582 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6585 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6586 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6587 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6589 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6591 GENERIC2_INT_ENABLE,
6592 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6594 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6596 GENERIC2_INT_ENABLE,
6597 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6600 BUG(); /* kiq only support GENERIC2_INT now */
6606 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6607 struct amdgpu_irq_src *source,
6608 struct amdgpu_iv_entry *entry)
6610 u8 me_id, pipe_id, queue_id;
6611 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6613 me_id = (entry->ring_id & 0x0c) >> 2;
6614 pipe_id = (entry->ring_id & 0x03) >> 0;
6615 queue_id = (entry->ring_id & 0x70) >> 4;
6616 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6617 me_id, pipe_id, queue_id);
6619 amdgpu_fence_process(ring);
6623 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6625 .early_init = gfx_v8_0_early_init,
6626 .late_init = gfx_v8_0_late_init,
6627 .sw_init = gfx_v8_0_sw_init,
6628 .sw_fini = gfx_v8_0_sw_fini,
6629 .hw_init = gfx_v8_0_hw_init,
6630 .hw_fini = gfx_v8_0_hw_fini,
6631 .suspend = gfx_v8_0_suspend,
6632 .resume = gfx_v8_0_resume,
6633 .is_idle = gfx_v8_0_is_idle,
6634 .wait_for_idle = gfx_v8_0_wait_for_idle,
6635 .check_soft_reset = gfx_v8_0_check_soft_reset,
6636 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6637 .soft_reset = gfx_v8_0_soft_reset,
6638 .post_soft_reset = gfx_v8_0_post_soft_reset,
6639 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6640 .set_powergating_state = gfx_v8_0_set_powergating_state,
6641 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6644 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6645 .type = AMDGPU_RING_TYPE_GFX,
6647 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6648 .support_64bit_ptrs = false,
6649 .get_rptr = gfx_v8_0_ring_get_rptr,
6650 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6651 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6652 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6654 7 + /* PIPELINE_SYNC */
6656 8 + /* FENCE for VM_FLUSH */
6657 20 + /* GDS switch */
6658 4 + /* double SWITCH_BUFFER,
6659 the first COND_EXEC jump to the place just
6660 prior to this double SWITCH_BUFFER */
6668 8 + 8 + /* FENCE x2 */
6669 2, /* SWITCH_BUFFER */
6670 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6671 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6672 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6673 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6674 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6675 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6676 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6677 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6678 .test_ring = gfx_v8_0_ring_test_ring,
6679 .test_ib = gfx_v8_0_ring_test_ib,
6680 .insert_nop = amdgpu_ring_insert_nop,
6681 .pad_ib = amdgpu_ring_generic_pad_ib,
6682 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6683 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6684 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6685 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6688 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6689 .type = AMDGPU_RING_TYPE_COMPUTE,
6691 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6692 .support_64bit_ptrs = false,
6693 .get_rptr = gfx_v8_0_ring_get_rptr,
6694 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6695 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6697 20 + /* gfx_v8_0_ring_emit_gds_switch */
6698 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6699 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6700 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6701 17 + /* gfx_v8_0_ring_emit_vm_flush */
6702 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6703 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6704 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6705 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6706 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6707 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6708 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6709 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6710 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6711 .test_ring = gfx_v8_0_ring_test_ring,
6712 .test_ib = gfx_v8_0_ring_test_ib,
6713 .insert_nop = amdgpu_ring_insert_nop,
6714 .pad_ib = amdgpu_ring_generic_pad_ib,
6717 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6718 .type = AMDGPU_RING_TYPE_KIQ,
6720 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6721 .support_64bit_ptrs = false,
6722 .get_rptr = gfx_v8_0_ring_get_rptr,
6723 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6724 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6726 20 + /* gfx_v8_0_ring_emit_gds_switch */
6727 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6728 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6729 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6730 17 + /* gfx_v8_0_ring_emit_vm_flush */
6731 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6732 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6733 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6734 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6735 .test_ring = gfx_v8_0_ring_test_ring,
6736 .test_ib = gfx_v8_0_ring_test_ib,
6737 .insert_nop = amdgpu_ring_insert_nop,
6738 .pad_ib = amdgpu_ring_generic_pad_ib,
6739 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6740 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6743 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6747 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6749 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6750 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6752 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6753 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6756 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6757 .set = gfx_v8_0_set_eop_interrupt_state,
6758 .process = gfx_v8_0_eop_irq,
6761 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6762 .set = gfx_v8_0_set_priv_reg_fault_state,
6763 .process = gfx_v8_0_priv_reg_irq,
6766 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6767 .set = gfx_v8_0_set_priv_inst_fault_state,
6768 .process = gfx_v8_0_priv_inst_irq,
6771 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6772 .set = gfx_v8_0_kiq_set_interrupt_state,
6773 .process = gfx_v8_0_kiq_irq,
6776 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6778 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6779 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6781 adev->gfx.priv_reg_irq.num_types = 1;
6782 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6784 adev->gfx.priv_inst_irq.num_types = 1;
6785 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6787 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6788 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6791 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6793 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6796 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6798 /* init asci gds info */
6799 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6800 adev->gds.gws.total_size = 64;
6801 adev->gds.oa.total_size = 16;
6803 if (adev->gds.mem.total_size == 64 * 1024) {
6804 adev->gds.mem.gfx_partition_size = 4096;
6805 adev->gds.mem.cs_partition_size = 4096;
6807 adev->gds.gws.gfx_partition_size = 4;
6808 adev->gds.gws.cs_partition_size = 4;
6810 adev->gds.oa.gfx_partition_size = 4;
6811 adev->gds.oa.cs_partition_size = 1;
6813 adev->gds.mem.gfx_partition_size = 1024;
6814 adev->gds.mem.cs_partition_size = 1024;
6816 adev->gds.gws.gfx_partition_size = 16;
6817 adev->gds.gws.cs_partition_size = 16;
6819 adev->gds.oa.gfx_partition_size = 4;
6820 adev->gds.oa.cs_partition_size = 4;
6824 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6832 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6833 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6835 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6838 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6842 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6843 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6845 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6847 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6850 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6852 int i, j, k, counter, active_cu_number = 0;
6853 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6854 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6855 unsigned disable_masks[4 * 2];
6858 memset(cu_info, 0, sizeof(*cu_info));
6860 if (adev->flags & AMD_IS_APU)
6863 ao_cu_num = adev->gfx.config.max_cu_per_sh;
6865 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6867 mutex_lock(&adev->grbm_idx_mutex);
6868 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6869 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6873 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6875 gfx_v8_0_set_user_cu_inactive_bitmap(
6876 adev, disable_masks[i * 2 + j]);
6877 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6878 cu_info->bitmap[i][j] = bitmap;
6880 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6881 if (bitmap & mask) {
6882 if (counter < ao_cu_num)
6888 active_cu_number += counter;
6890 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6891 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
6894 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6895 mutex_unlock(&adev->grbm_idx_mutex);
6897 cu_info->number = active_cu_number;
6898 cu_info->ao_cu_mask = ao_cu_mask;
6901 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6903 .type = AMD_IP_BLOCK_TYPE_GFX,
6907 .funcs = &gfx_v8_0_ip_funcs,
6910 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6912 .type = AMD_IP_BLOCK_TYPE_GFX,
6916 .funcs = &gfx_v8_0_ip_funcs,
6919 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
6921 uint64_t ce_payload_addr;
6924 struct vi_ce_ib_state regular;
6925 struct vi_ce_ib_state_chained_ib chained;
6928 if (ring->adev->virt.chained_ib_support) {
6929 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6930 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
6931 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
6933 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
6934 offsetof(struct vi_gfx_meta_data, ce_payload);
6935 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
6938 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
6939 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
6940 WRITE_DATA_DST_SEL(8) |
6942 WRITE_DATA_CACHE_POLICY(0));
6943 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
6944 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
6945 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
6948 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
6950 uint64_t de_payload_addr, gds_addr, csa_addr;
6953 struct vi_de_ib_state regular;
6954 struct vi_de_ib_state_chained_ib chained;
6957 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
6958 gds_addr = csa_addr + 4096;
6959 if (ring->adev->virt.chained_ib_support) {
6960 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
6961 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
6962 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
6963 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
6965 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
6966 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
6967 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
6968 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
6971 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
6972 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
6973 WRITE_DATA_DST_SEL(8) |
6975 WRITE_DATA_CACHE_POLICY(0));
6976 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
6977 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
6978 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);