2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
27 #include "amdgpu_gfx.h"
29 #include "vi_structs.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #include "smu/smu_7_1_3_d.h"
54 #include "ivsrcid/ivsrcid_vislands30.h"
56 #define GFX8_NUM_GFX_RINGS 1
57 #define GFX8_MEC_HPD_SIZE 2048
59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
64 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
65 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
66 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
67 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
68 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
69 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
70 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
71 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
72 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
82 #define SET_BPM_SERDES_CMD 1
83 #define CLE_BPM_SERDES_CMD 0
85 /* BPM Register Address*/
87 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
88 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
89 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
90 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
91 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
95 #define RLC_FormatDirectRegListLength 14
99 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
101 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
102 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
103 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
104 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
105 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
106 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
107 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
108 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
109 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
110 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
111 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
112 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
113 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
114 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
115 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
116 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
119 static const u32 golden_settings_tonga_a11[] =
121 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
122 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
123 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
124 mmGB_GPU_ID, 0x0000000f, 0x00000000,
125 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
126 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
127 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
128 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
129 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
130 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
131 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
132 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
133 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
134 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
135 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
136 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
139 static const u32 tonga_golden_common_all[] =
141 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
142 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
143 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
144 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
145 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
146 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
147 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
148 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
151 static const u32 tonga_mgcg_cgcg_init[] =
153 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
154 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
155 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
156 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
157 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
158 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
159 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
160 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
161 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
162 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
163 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
164 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
165 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
166 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
167 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
168 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
169 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
170 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
171 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
172 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
173 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
174 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
175 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
176 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
177 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
178 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
179 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
180 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
181 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
182 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
183 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
184 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
185 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
186 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
187 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
188 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
189 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
190 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
191 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
192 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
193 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
194 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
195 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
196 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
197 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
198 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
199 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
200 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
201 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
202 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
203 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
204 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
205 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
206 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
207 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
208 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
209 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
210 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
211 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
212 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
213 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
214 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
215 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
216 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
217 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
218 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
219 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
220 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
221 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
222 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
223 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
224 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
225 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
226 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
227 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
230 static const u32 golden_settings_vegam_a11[] =
232 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
233 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
234 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
235 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
236 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
237 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
238 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
239 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
240 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
241 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
242 mmSQ_CONFIG, 0x07f80000, 0x01180000,
243 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
244 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
245 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
246 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
247 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
248 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
251 static const u32 vegam_golden_common_all[] =
253 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
254 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
255 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
256 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
257 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
258 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
261 static const u32 golden_settings_polaris11_a11[] =
263 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
264 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
265 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
266 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
267 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
268 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
269 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
270 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
271 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
272 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
273 mmSQ_CONFIG, 0x07f80000, 0x01180000,
274 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
275 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
276 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
277 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
278 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
279 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
282 static const u32 polaris11_golden_common_all[] =
284 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
285 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
286 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
287 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
288 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
289 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
292 static const u32 golden_settings_polaris10_a11[] =
294 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
295 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
296 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
297 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
298 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
299 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
300 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
301 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
302 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
303 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
304 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
305 mmSQ_CONFIG, 0x07f80000, 0x07180000,
306 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
307 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
308 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
309 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
310 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
313 static const u32 polaris10_golden_common_all[] =
315 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
316 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
317 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
318 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
319 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
320 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
321 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
322 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
325 static const u32 fiji_golden_common_all[] =
327 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
328 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
329 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
330 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
331 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
332 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
333 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
334 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
335 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
336 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
339 static const u32 golden_settings_fiji_a10[] =
341 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
342 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
343 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
344 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
345 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
346 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
347 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
348 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
349 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
350 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
351 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
354 static const u32 fiji_mgcg_cgcg_init[] =
356 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
357 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
358 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
359 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
360 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
361 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
362 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
363 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
364 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
365 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
366 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
367 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
368 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
369 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
370 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
371 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
372 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
373 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
374 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
375 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
376 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
377 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
378 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
381 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
382 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
383 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
384 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
385 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
386 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
387 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
388 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
389 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
390 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
393 static const u32 golden_settings_iceland_a11[] =
395 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
396 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
397 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
398 mmGB_GPU_ID, 0x0000000f, 0x00000000,
399 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
400 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
401 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
402 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
403 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
404 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
405 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
406 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
407 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
408 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
409 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
410 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
413 static const u32 iceland_golden_common_all[] =
415 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
416 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
417 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
418 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
419 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
420 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
421 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
422 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
425 static const u32 iceland_mgcg_cgcg_init[] =
427 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
428 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
429 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
430 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
431 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
432 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
433 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
434 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
435 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
436 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
437 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
438 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
439 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
441 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
445 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
446 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
447 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
448 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
449 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
450 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
452 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
453 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
454 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
455 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
457 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
458 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
459 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
460 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
461 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
462 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
463 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
464 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
465 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
466 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
467 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
468 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
471 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
476 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
481 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
486 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
489 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
490 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
493 static const u32 cz_golden_settings_a11[] =
495 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
496 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
497 mmGB_GPU_ID, 0x0000000f, 0x00000000,
498 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
499 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
500 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
501 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
502 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
503 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
504 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
505 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
506 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
509 static const u32 cz_golden_common_all[] =
511 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
512 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
513 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
514 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
515 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
516 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
517 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
518 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
521 static const u32 cz_mgcg_cgcg_init[] =
523 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
524 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
525 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
526 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
527 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
528 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
529 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
530 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
531 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
532 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
533 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
534 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
535 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
536 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
537 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
538 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
539 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
540 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
541 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
542 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
543 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
544 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
545 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
548 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
549 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
550 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
551 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
552 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
553 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
554 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
555 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
556 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
557 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
558 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
559 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
560 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
561 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
562 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
563 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
564 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
567 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
572 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
577 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
582 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
587 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
592 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
595 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
596 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
597 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
600 static const u32 stoney_golden_settings_a11[] =
602 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
603 mmGB_GPU_ID, 0x0000000f, 0x00000000,
604 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
605 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
606 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
607 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
608 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
609 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
610 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
611 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
614 static const u32 stoney_golden_common_all[] =
616 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
617 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
618 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
619 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
620 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
621 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
622 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
623 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
626 static const u32 stoney_mgcg_cgcg_init[] =
628 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
629 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
630 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
631 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
632 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
636 static const char * const sq_edc_source_names[] = {
637 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
638 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
639 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
640 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
641 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
642 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
643 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
646 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
647 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
649 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
650 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
651 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
652 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
653 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
655 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
657 switch (adev->asic_type) {
659 amdgpu_device_program_register_sequence(adev,
660 iceland_mgcg_cgcg_init,
661 ARRAY_SIZE(iceland_mgcg_cgcg_init));
662 amdgpu_device_program_register_sequence(adev,
663 golden_settings_iceland_a11,
664 ARRAY_SIZE(golden_settings_iceland_a11));
665 amdgpu_device_program_register_sequence(adev,
666 iceland_golden_common_all,
667 ARRAY_SIZE(iceland_golden_common_all));
670 amdgpu_device_program_register_sequence(adev,
672 ARRAY_SIZE(fiji_mgcg_cgcg_init));
673 amdgpu_device_program_register_sequence(adev,
674 golden_settings_fiji_a10,
675 ARRAY_SIZE(golden_settings_fiji_a10));
676 amdgpu_device_program_register_sequence(adev,
677 fiji_golden_common_all,
678 ARRAY_SIZE(fiji_golden_common_all));
682 amdgpu_device_program_register_sequence(adev,
683 tonga_mgcg_cgcg_init,
684 ARRAY_SIZE(tonga_mgcg_cgcg_init));
685 amdgpu_device_program_register_sequence(adev,
686 golden_settings_tonga_a11,
687 ARRAY_SIZE(golden_settings_tonga_a11));
688 amdgpu_device_program_register_sequence(adev,
689 tonga_golden_common_all,
690 ARRAY_SIZE(tonga_golden_common_all));
693 amdgpu_device_program_register_sequence(adev,
694 golden_settings_vegam_a11,
695 ARRAY_SIZE(golden_settings_vegam_a11));
696 amdgpu_device_program_register_sequence(adev,
697 vegam_golden_common_all,
698 ARRAY_SIZE(vegam_golden_common_all));
702 amdgpu_device_program_register_sequence(adev,
703 golden_settings_polaris11_a11,
704 ARRAY_SIZE(golden_settings_polaris11_a11));
705 amdgpu_device_program_register_sequence(adev,
706 polaris11_golden_common_all,
707 ARRAY_SIZE(polaris11_golden_common_all));
710 amdgpu_device_program_register_sequence(adev,
711 golden_settings_polaris10_a11,
712 ARRAY_SIZE(golden_settings_polaris10_a11));
713 amdgpu_device_program_register_sequence(adev,
714 polaris10_golden_common_all,
715 ARRAY_SIZE(polaris10_golden_common_all));
716 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
717 if (adev->pdev->revision == 0xc7 &&
718 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
719 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
720 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
721 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
722 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
726 amdgpu_device_program_register_sequence(adev,
728 ARRAY_SIZE(cz_mgcg_cgcg_init));
729 amdgpu_device_program_register_sequence(adev,
730 cz_golden_settings_a11,
731 ARRAY_SIZE(cz_golden_settings_a11));
732 amdgpu_device_program_register_sequence(adev,
733 cz_golden_common_all,
734 ARRAY_SIZE(cz_golden_common_all));
737 amdgpu_device_program_register_sequence(adev,
738 stoney_mgcg_cgcg_init,
739 ARRAY_SIZE(stoney_mgcg_cgcg_init));
740 amdgpu_device_program_register_sequence(adev,
741 stoney_golden_settings_a11,
742 ARRAY_SIZE(stoney_golden_settings_a11));
743 amdgpu_device_program_register_sequence(adev,
744 stoney_golden_common_all,
745 ARRAY_SIZE(stoney_golden_common_all));
752 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
754 adev->gfx.scratch.num_reg = 8;
755 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
756 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
759 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
761 struct amdgpu_device *adev = ring->adev;
767 r = amdgpu_gfx_scratch_get(adev, &scratch);
769 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
772 WREG32(scratch, 0xCAFEDEAD);
773 r = amdgpu_ring_alloc(ring, 3);
775 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
777 amdgpu_gfx_scratch_free(adev, scratch);
780 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
781 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
782 amdgpu_ring_write(ring, 0xDEADBEEF);
783 amdgpu_ring_commit(ring);
785 for (i = 0; i < adev->usec_timeout; i++) {
786 tmp = RREG32(scratch);
787 if (tmp == 0xDEADBEEF)
791 if (i < adev->usec_timeout) {
792 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
795 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
796 ring->idx, scratch, tmp);
799 amdgpu_gfx_scratch_free(adev, scratch);
803 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
805 struct amdgpu_device *adev = ring->adev;
807 struct dma_fence *f = NULL;
814 r = amdgpu_device_wb_get(adev, &index);
816 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
820 gpu_addr = adev->wb.gpu_addr + (index * 4);
821 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
822 memset(&ib, 0, sizeof(ib));
823 r = amdgpu_ib_get(adev, NULL, 16, &ib);
825 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
828 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
829 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
830 ib.ptr[2] = lower_32_bits(gpu_addr);
831 ib.ptr[3] = upper_32_bits(gpu_addr);
832 ib.ptr[4] = 0xDEADBEEF;
835 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
839 r = dma_fence_wait_timeout(f, false, timeout);
841 DRM_ERROR("amdgpu: IB test timed out.\n");
845 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
849 tmp = adev->wb.wb[index];
850 if (tmp == 0xDEADBEEF) {
851 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
854 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
859 amdgpu_ib_free(adev, &ib, NULL);
862 amdgpu_device_wb_free(adev, index);
867 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
869 release_firmware(adev->gfx.pfp_fw);
870 adev->gfx.pfp_fw = NULL;
871 release_firmware(adev->gfx.me_fw);
872 adev->gfx.me_fw = NULL;
873 release_firmware(adev->gfx.ce_fw);
874 adev->gfx.ce_fw = NULL;
875 release_firmware(adev->gfx.rlc_fw);
876 adev->gfx.rlc_fw = NULL;
877 release_firmware(adev->gfx.mec_fw);
878 adev->gfx.mec_fw = NULL;
879 if ((adev->asic_type != CHIP_STONEY) &&
880 (adev->asic_type != CHIP_TOPAZ))
881 release_firmware(adev->gfx.mec2_fw);
882 adev->gfx.mec2_fw = NULL;
884 kfree(adev->gfx.rlc.register_list_format);
887 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
889 const char *chip_name;
892 struct amdgpu_firmware_info *info = NULL;
893 const struct common_firmware_header *header = NULL;
894 const struct gfx_firmware_header_v1_0 *cp_hdr;
895 const struct rlc_firmware_header_v2_0 *rlc_hdr;
896 unsigned int *tmp = NULL, i;
900 switch (adev->asic_type) {
908 chip_name = "carrizo";
914 chip_name = "stoney";
917 chip_name = "polaris10";
920 chip_name = "polaris11";
923 chip_name = "polaris12";
932 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
933 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
934 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
935 if (err == -ENOENT) {
936 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
937 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
940 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
941 err = reject_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
945 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
948 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
949 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
950 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
952 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
953 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
954 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
955 if (err == -ENOENT) {
956 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
957 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
960 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
961 err = reject_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
965 err = amdgpu_ucode_validate(adev->gfx.me_fw);
968 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
969 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
971 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
973 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
974 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
975 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
976 if (err == -ENOENT) {
977 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
978 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
981 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
982 err = reject_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
986 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
989 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
990 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
991 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
994 * Support for MCBP/Virtualization in combination with chained IBs is
995 * formal released on feature version #46
997 if (adev->gfx.ce_feature_version >= 46 &&
998 adev->gfx.pfp_feature_version >= 46) {
999 adev->virt.chained_ib_support = true;
1000 DRM_INFO("Chained IB support enabled!\n");
1002 adev->virt.chained_ib_support = false;
1004 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1005 err = reject_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1008 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1009 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1010 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1011 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1013 adev->gfx.rlc.save_and_restore_offset =
1014 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1015 adev->gfx.rlc.clear_state_descriptor_offset =
1016 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1017 adev->gfx.rlc.avail_scratch_ram_locations =
1018 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1019 adev->gfx.rlc.reg_restore_list_size =
1020 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1021 adev->gfx.rlc.reg_list_format_start =
1022 le32_to_cpu(rlc_hdr->reg_list_format_start);
1023 adev->gfx.rlc.reg_list_format_separate_start =
1024 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1025 adev->gfx.rlc.starting_offsets_start =
1026 le32_to_cpu(rlc_hdr->starting_offsets_start);
1027 adev->gfx.rlc.reg_list_format_size_bytes =
1028 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1029 adev->gfx.rlc.reg_list_size_bytes =
1030 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1032 adev->gfx.rlc.register_list_format =
1033 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1034 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1036 if (!adev->gfx.rlc.register_list_format) {
1041 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1042 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1043 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1044 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1046 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1048 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1049 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1050 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1051 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1053 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1054 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1055 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1056 if (err == -ENOENT) {
1057 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1058 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1061 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1062 err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1066 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1069 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1070 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1071 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1073 if ((adev->asic_type != CHIP_STONEY) &&
1074 (adev->asic_type != CHIP_TOPAZ)) {
1075 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1076 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1077 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1078 if (err == -ENOENT) {
1079 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1080 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1083 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1084 err = reject_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1087 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1090 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1091 adev->gfx.mec2_fw->data;
1092 adev->gfx.mec2_fw_version =
1093 le32_to_cpu(cp_hdr->header.ucode_version);
1094 adev->gfx.mec2_feature_version =
1095 le32_to_cpu(cp_hdr->ucode_feature_version);
1098 adev->gfx.mec2_fw = NULL;
1102 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1103 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1104 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1105 info->fw = adev->gfx.pfp_fw;
1106 header = (const struct common_firmware_header *)info->fw->data;
1107 adev->firmware.fw_size +=
1108 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1110 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1111 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1112 info->fw = adev->gfx.me_fw;
1113 header = (const struct common_firmware_header *)info->fw->data;
1114 adev->firmware.fw_size +=
1115 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1117 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1118 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1119 info->fw = adev->gfx.ce_fw;
1120 header = (const struct common_firmware_header *)info->fw->data;
1121 adev->firmware.fw_size +=
1122 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1124 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1125 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1126 info->fw = adev->gfx.rlc_fw;
1127 header = (const struct common_firmware_header *)info->fw->data;
1128 adev->firmware.fw_size +=
1129 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1131 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1132 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1133 info->fw = adev->gfx.mec_fw;
1134 header = (const struct common_firmware_header *)info->fw->data;
1135 adev->firmware.fw_size +=
1136 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1138 /* we need account JT in */
1139 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1140 adev->firmware.fw_size +=
1141 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1143 if (amdgpu_sriov_vf(adev)) {
1144 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1145 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1146 info->fw = adev->gfx.mec_fw;
1147 adev->firmware.fw_size +=
1148 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1151 if (adev->gfx.mec2_fw) {
1152 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1153 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1154 info->fw = adev->gfx.mec2_fw;
1155 header = (const struct common_firmware_header *)info->fw->data;
1156 adev->firmware.fw_size +=
1157 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1165 "gfx8: Failed to load firmware \"%s\"\n",
1167 release_firmware(adev->gfx.pfp_fw);
1168 adev->gfx.pfp_fw = NULL;
1169 release_firmware(adev->gfx.me_fw);
1170 adev->gfx.me_fw = NULL;
1171 release_firmware(adev->gfx.ce_fw);
1172 adev->gfx.ce_fw = NULL;
1173 release_firmware(adev->gfx.rlc_fw);
1174 adev->gfx.rlc_fw = NULL;
1175 release_firmware(adev->gfx.mec_fw);
1176 adev->gfx.mec_fw = NULL;
1177 release_firmware(adev->gfx.mec2_fw);
1178 adev->gfx.mec2_fw = NULL;
1183 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1184 volatile u32 *buffer)
1187 const struct cs_section_def *sect = NULL;
1188 const struct cs_extent_def *ext = NULL;
1190 if (adev->gfx.rlc.cs_data == NULL)
1195 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1196 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1198 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1199 buffer[count++] = cpu_to_le32(0x80000000);
1200 buffer[count++] = cpu_to_le32(0x80000000);
1202 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1203 for (ext = sect->section; ext->extent != NULL; ++ext) {
1204 if (sect->id == SECT_CONTEXT) {
1206 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1207 buffer[count++] = cpu_to_le32(ext->reg_index -
1208 PACKET3_SET_CONTEXT_REG_START);
1209 for (i = 0; i < ext->reg_count; i++)
1210 buffer[count++] = cpu_to_le32(ext->extent[i]);
1217 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1218 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1219 PACKET3_SET_CONTEXT_REG_START);
1220 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1221 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1223 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1224 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1226 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1227 buffer[count++] = cpu_to_le32(0);
1230 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1232 const __le32 *fw_data;
1233 volatile u32 *dst_ptr;
1234 int me, i, max_me = 4;
1236 u32 table_offset, table_size;
1238 if (adev->asic_type == CHIP_CARRIZO)
1241 /* write the cp table buffer */
1242 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1243 for (me = 0; me < max_me; me++) {
1245 const struct gfx_firmware_header_v1_0 *hdr =
1246 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1247 fw_data = (const __le32 *)
1248 (adev->gfx.ce_fw->data +
1249 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1250 table_offset = le32_to_cpu(hdr->jt_offset);
1251 table_size = le32_to_cpu(hdr->jt_size);
1252 } else if (me == 1) {
1253 const struct gfx_firmware_header_v1_0 *hdr =
1254 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1255 fw_data = (const __le32 *)
1256 (adev->gfx.pfp_fw->data +
1257 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1258 table_offset = le32_to_cpu(hdr->jt_offset);
1259 table_size = le32_to_cpu(hdr->jt_size);
1260 } else if (me == 2) {
1261 const struct gfx_firmware_header_v1_0 *hdr =
1262 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1263 fw_data = (const __le32 *)
1264 (adev->gfx.me_fw->data +
1265 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1266 table_offset = le32_to_cpu(hdr->jt_offset);
1267 table_size = le32_to_cpu(hdr->jt_size);
1268 } else if (me == 3) {
1269 const struct gfx_firmware_header_v1_0 *hdr =
1270 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1271 fw_data = (const __le32 *)
1272 (adev->gfx.mec_fw->data +
1273 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1274 table_offset = le32_to_cpu(hdr->jt_offset);
1275 table_size = le32_to_cpu(hdr->jt_size);
1276 } else if (me == 4) {
1277 const struct gfx_firmware_header_v1_0 *hdr =
1278 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1279 fw_data = (const __le32 *)
1280 (adev->gfx.mec2_fw->data +
1281 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1282 table_offset = le32_to_cpu(hdr->jt_offset);
1283 table_size = le32_to_cpu(hdr->jt_size);
1286 for (i = 0; i < table_size; i ++) {
1287 dst_ptr[bo_offset + i] =
1288 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1291 bo_offset += table_size;
1295 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1297 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1298 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1301 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1303 volatile u32 *dst_ptr;
1305 const struct cs_section_def *cs_data;
1308 adev->gfx.rlc.cs_data = vi_cs_data;
1310 cs_data = adev->gfx.rlc.cs_data;
1313 /* clear state block */
1314 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1316 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1317 AMDGPU_GEM_DOMAIN_VRAM,
1318 &adev->gfx.rlc.clear_state_obj,
1319 &adev->gfx.rlc.clear_state_gpu_addr,
1320 (void **)&adev->gfx.rlc.cs_ptr);
1322 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1323 gfx_v8_0_rlc_fini(adev);
1327 /* set up the cs buffer */
1328 dst_ptr = adev->gfx.rlc.cs_ptr;
1329 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1330 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1331 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1334 if ((adev->asic_type == CHIP_CARRIZO) ||
1335 (adev->asic_type == CHIP_STONEY)) {
1336 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1337 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1338 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1339 &adev->gfx.rlc.cp_table_obj,
1340 &adev->gfx.rlc.cp_table_gpu_addr,
1341 (void **)&adev->gfx.rlc.cp_table_ptr);
1343 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1347 cz_init_cp_jump_table(adev);
1349 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1350 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1356 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1358 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1361 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1365 size_t mec_hpd_size;
1367 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1369 /* take ownership of the relevant compute queues */
1370 amdgpu_gfx_compute_queue_acquire(adev);
1372 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1374 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1375 AMDGPU_GEM_DOMAIN_GTT,
1376 &adev->gfx.mec.hpd_eop_obj,
1377 &adev->gfx.mec.hpd_eop_gpu_addr,
1380 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1384 memset(hpd, 0, mec_hpd_size);
1386 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1387 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1392 static const u32 vgpr_init_compute_shader[] =
1394 0x7e000209, 0x7e020208,
1395 0x7e040207, 0x7e060206,
1396 0x7e080205, 0x7e0a0204,
1397 0x7e0c0203, 0x7e0e0202,
1398 0x7e100201, 0x7e120200,
1399 0x7e140209, 0x7e160208,
1400 0x7e180207, 0x7e1a0206,
1401 0x7e1c0205, 0x7e1e0204,
1402 0x7e200203, 0x7e220202,
1403 0x7e240201, 0x7e260200,
1404 0x7e280209, 0x7e2a0208,
1405 0x7e2c0207, 0x7e2e0206,
1406 0x7e300205, 0x7e320204,
1407 0x7e340203, 0x7e360202,
1408 0x7e380201, 0x7e3a0200,
1409 0x7e3c0209, 0x7e3e0208,
1410 0x7e400207, 0x7e420206,
1411 0x7e440205, 0x7e460204,
1412 0x7e480203, 0x7e4a0202,
1413 0x7e4c0201, 0x7e4e0200,
1414 0x7e500209, 0x7e520208,
1415 0x7e540207, 0x7e560206,
1416 0x7e580205, 0x7e5a0204,
1417 0x7e5c0203, 0x7e5e0202,
1418 0x7e600201, 0x7e620200,
1419 0x7e640209, 0x7e660208,
1420 0x7e680207, 0x7e6a0206,
1421 0x7e6c0205, 0x7e6e0204,
1422 0x7e700203, 0x7e720202,
1423 0x7e740201, 0x7e760200,
1424 0x7e780209, 0x7e7a0208,
1425 0x7e7c0207, 0x7e7e0206,
1426 0xbf8a0000, 0xbf810000,
1429 static const u32 sgpr_init_compute_shader[] =
1431 0xbe8a0100, 0xbe8c0102,
1432 0xbe8e0104, 0xbe900106,
1433 0xbe920108, 0xbe940100,
1434 0xbe960102, 0xbe980104,
1435 0xbe9a0106, 0xbe9c0108,
1436 0xbe9e0100, 0xbea00102,
1437 0xbea20104, 0xbea40106,
1438 0xbea60108, 0xbea80100,
1439 0xbeaa0102, 0xbeac0104,
1440 0xbeae0106, 0xbeb00108,
1441 0xbeb20100, 0xbeb40102,
1442 0xbeb60104, 0xbeb80106,
1443 0xbeba0108, 0xbebc0100,
1444 0xbebe0102, 0xbec00104,
1445 0xbec20106, 0xbec40108,
1446 0xbec60100, 0xbec80102,
1447 0xbee60004, 0xbee70005,
1448 0xbeea0006, 0xbeeb0007,
1449 0xbee80008, 0xbee90009,
1450 0xbefc0000, 0xbf8a0000,
1451 0xbf810000, 0x00000000,
1454 static const u32 vgpr_init_regs[] =
1456 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1457 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1458 mmCOMPUTE_NUM_THREAD_X, 256*4,
1459 mmCOMPUTE_NUM_THREAD_Y, 1,
1460 mmCOMPUTE_NUM_THREAD_Z, 1,
1461 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1462 mmCOMPUTE_PGM_RSRC2, 20,
1463 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1464 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1465 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1466 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1467 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1468 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1469 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1470 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1471 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1472 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1475 static const u32 sgpr1_init_regs[] =
1477 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1478 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1479 mmCOMPUTE_NUM_THREAD_X, 256*5,
1480 mmCOMPUTE_NUM_THREAD_Y, 1,
1481 mmCOMPUTE_NUM_THREAD_Z, 1,
1482 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1483 mmCOMPUTE_PGM_RSRC2, 20,
1484 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1485 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1486 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1487 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1488 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1489 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1490 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1491 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1492 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1493 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1496 static const u32 sgpr2_init_regs[] =
1498 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1499 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1500 mmCOMPUTE_NUM_THREAD_X, 256*5,
1501 mmCOMPUTE_NUM_THREAD_Y, 1,
1502 mmCOMPUTE_NUM_THREAD_Z, 1,
1503 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1504 mmCOMPUTE_PGM_RSRC2, 20,
1505 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1506 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1507 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1508 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1509 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1510 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1511 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1512 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1513 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1514 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1517 static const u32 sec_ded_counter_registers[] =
1520 mmCPC_EDC_SCRATCH_CNT,
1521 mmCPC_EDC_UCODE_CNT,
1528 mmDC_EDC_CSINVOC_CNT,
1529 mmDC_EDC_RESTORE_CNT,
1535 mmSQC_ATC_EDC_GATCL1_CNT,
1541 mmTCP_ATC_EDC_GATCL1_CNT,
1546 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1548 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1549 struct amdgpu_ib ib;
1550 struct dma_fence *f = NULL;
1553 unsigned total_size, vgpr_offset, sgpr_offset;
1556 /* only supported on CZ */
1557 if (adev->asic_type != CHIP_CARRIZO)
1560 /* bail if the compute ring is not ready */
1564 tmp = RREG32(mmGB_EDC_MODE);
1565 WREG32(mmGB_EDC_MODE, 0);
1568 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1570 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1572 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1573 total_size = ALIGN(total_size, 256);
1574 vgpr_offset = total_size;
1575 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1576 sgpr_offset = total_size;
1577 total_size += sizeof(sgpr_init_compute_shader);
1579 /* allocate an indirect buffer to put the commands in */
1580 memset(&ib, 0, sizeof(ib));
1581 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1583 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1587 /* load the compute shaders */
1588 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1589 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1591 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1592 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1594 /* init the ib length to 0 */
1598 /* write the register state for the compute dispatch */
1599 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1600 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1601 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1602 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1604 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1605 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1606 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1607 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1608 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1609 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1611 /* write dispatch packet */
1612 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1613 ib.ptr[ib.length_dw++] = 8; /* x */
1614 ib.ptr[ib.length_dw++] = 1; /* y */
1615 ib.ptr[ib.length_dw++] = 1; /* z */
1616 ib.ptr[ib.length_dw++] =
1617 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1619 /* write CS partial flush packet */
1620 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1621 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1624 /* write the register state for the compute dispatch */
1625 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1626 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1627 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1628 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1630 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1631 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1632 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1633 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1634 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1635 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1637 /* write dispatch packet */
1638 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1639 ib.ptr[ib.length_dw++] = 8; /* x */
1640 ib.ptr[ib.length_dw++] = 1; /* y */
1641 ib.ptr[ib.length_dw++] = 1; /* z */
1642 ib.ptr[ib.length_dw++] =
1643 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1645 /* write CS partial flush packet */
1646 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1647 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1650 /* write the register state for the compute dispatch */
1651 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1652 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1653 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1654 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1656 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1657 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1658 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1659 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1660 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1661 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1663 /* write dispatch packet */
1664 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1665 ib.ptr[ib.length_dw++] = 8; /* x */
1666 ib.ptr[ib.length_dw++] = 1; /* y */
1667 ib.ptr[ib.length_dw++] = 1; /* z */
1668 ib.ptr[ib.length_dw++] =
1669 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1671 /* write CS partial flush packet */
1672 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1673 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1675 /* shedule the ib on the ring */
1676 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1678 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1682 /* wait for the GPU to finish processing the IB */
1683 r = dma_fence_wait(f, false);
1685 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1689 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1690 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1691 WREG32(mmGB_EDC_MODE, tmp);
1693 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1694 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1695 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1698 /* read back registers to clear the counters */
1699 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1700 RREG32(sec_ded_counter_registers[i]);
1703 amdgpu_ib_free(adev, &ib, NULL);
1709 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1712 u32 mc_shared_chmap, mc_arb_ramcfg;
1713 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1717 switch (adev->asic_type) {
1719 adev->gfx.config.max_shader_engines = 1;
1720 adev->gfx.config.max_tile_pipes = 2;
1721 adev->gfx.config.max_cu_per_sh = 6;
1722 adev->gfx.config.max_sh_per_se = 1;
1723 adev->gfx.config.max_backends_per_se = 2;
1724 adev->gfx.config.max_texture_channel_caches = 2;
1725 adev->gfx.config.max_gprs = 256;
1726 adev->gfx.config.max_gs_threads = 32;
1727 adev->gfx.config.max_hw_contexts = 8;
1729 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1730 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1731 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1732 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1733 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1736 adev->gfx.config.max_shader_engines = 4;
1737 adev->gfx.config.max_tile_pipes = 16;
1738 adev->gfx.config.max_cu_per_sh = 16;
1739 adev->gfx.config.max_sh_per_se = 1;
1740 adev->gfx.config.max_backends_per_se = 4;
1741 adev->gfx.config.max_texture_channel_caches = 16;
1742 adev->gfx.config.max_gprs = 256;
1743 adev->gfx.config.max_gs_threads = 32;
1744 adev->gfx.config.max_hw_contexts = 8;
1746 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1747 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1748 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1749 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1750 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1752 case CHIP_POLARIS11:
1753 case CHIP_POLARIS12:
1754 ret = amdgpu_atombios_get_gfx_info(adev);
1757 adev->gfx.config.max_gprs = 256;
1758 adev->gfx.config.max_gs_threads = 32;
1759 adev->gfx.config.max_hw_contexts = 8;
1761 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1762 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1763 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1764 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1765 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1767 case CHIP_POLARIS10:
1769 ret = amdgpu_atombios_get_gfx_info(adev);
1772 adev->gfx.config.max_gprs = 256;
1773 adev->gfx.config.max_gs_threads = 32;
1774 adev->gfx.config.max_hw_contexts = 8;
1776 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1777 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1778 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1779 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1780 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1783 adev->gfx.config.max_shader_engines = 4;
1784 adev->gfx.config.max_tile_pipes = 8;
1785 adev->gfx.config.max_cu_per_sh = 8;
1786 adev->gfx.config.max_sh_per_se = 1;
1787 adev->gfx.config.max_backends_per_se = 2;
1788 adev->gfx.config.max_texture_channel_caches = 8;
1789 adev->gfx.config.max_gprs = 256;
1790 adev->gfx.config.max_gs_threads = 32;
1791 adev->gfx.config.max_hw_contexts = 8;
1793 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1794 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1795 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1796 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1797 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1800 adev->gfx.config.max_shader_engines = 1;
1801 adev->gfx.config.max_tile_pipes = 2;
1802 adev->gfx.config.max_sh_per_se = 1;
1803 adev->gfx.config.max_backends_per_se = 2;
1804 adev->gfx.config.max_cu_per_sh = 8;
1805 adev->gfx.config.max_texture_channel_caches = 2;
1806 adev->gfx.config.max_gprs = 256;
1807 adev->gfx.config.max_gs_threads = 32;
1808 adev->gfx.config.max_hw_contexts = 8;
1810 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1811 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1812 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1813 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1814 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1817 adev->gfx.config.max_shader_engines = 1;
1818 adev->gfx.config.max_tile_pipes = 2;
1819 adev->gfx.config.max_sh_per_se = 1;
1820 adev->gfx.config.max_backends_per_se = 1;
1821 adev->gfx.config.max_cu_per_sh = 3;
1822 adev->gfx.config.max_texture_channel_caches = 2;
1823 adev->gfx.config.max_gprs = 256;
1824 adev->gfx.config.max_gs_threads = 16;
1825 adev->gfx.config.max_hw_contexts = 8;
1827 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1828 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1829 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1830 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1831 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1834 adev->gfx.config.max_shader_engines = 2;
1835 adev->gfx.config.max_tile_pipes = 4;
1836 adev->gfx.config.max_cu_per_sh = 2;
1837 adev->gfx.config.max_sh_per_se = 1;
1838 adev->gfx.config.max_backends_per_se = 2;
1839 adev->gfx.config.max_texture_channel_caches = 4;
1840 adev->gfx.config.max_gprs = 256;
1841 adev->gfx.config.max_gs_threads = 32;
1842 adev->gfx.config.max_hw_contexts = 8;
1844 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1845 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1846 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1847 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1848 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1852 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1853 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1854 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1856 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1857 adev->gfx.config.mem_max_burst_length_bytes = 256;
1858 if (adev->flags & AMD_IS_APU) {
1859 /* Get memory bank mapping mode. */
1860 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1861 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1862 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1864 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1865 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1866 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1868 /* Validate settings in case only one DIMM installed. */
1869 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1870 dimm00_addr_map = 0;
1871 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1872 dimm01_addr_map = 0;
1873 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1874 dimm10_addr_map = 0;
1875 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1876 dimm11_addr_map = 0;
1878 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1879 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1880 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1881 adev->gfx.config.mem_row_size_in_kb = 2;
1883 adev->gfx.config.mem_row_size_in_kb = 1;
1885 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1886 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1887 if (adev->gfx.config.mem_row_size_in_kb > 4)
1888 adev->gfx.config.mem_row_size_in_kb = 4;
1891 adev->gfx.config.shader_engine_tile_size = 32;
1892 adev->gfx.config.num_gpus = 1;
1893 adev->gfx.config.multi_gpu_tile_size = 64;
1895 /* fix up row size */
1896 switch (adev->gfx.config.mem_row_size_in_kb) {
1899 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1902 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1905 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1908 adev->gfx.config.gb_addr_config = gb_addr_config;
1913 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1914 int mec, int pipe, int queue)
1918 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1920 ring = &adev->gfx.compute_ring[ring_id];
1925 ring->queue = queue;
1927 ring->ring_obj = NULL;
1928 ring->use_doorbell = true;
1929 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1930 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1931 + (ring_id * GFX8_MEC_HPD_SIZE);
1932 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1934 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1935 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1938 /* type-2 packets are deprecated on MEC, use type-3 instead */
1939 r = amdgpu_ring_init(adev, ring, 1024,
1940 &adev->gfx.eop_irq, irq_type);
1948 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1950 static int gfx_v8_0_sw_init(void *handle)
1952 int i, j, k, r, ring_id;
1953 struct amdgpu_ring *ring;
1954 struct amdgpu_kiq *kiq;
1955 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1957 switch (adev->asic_type) {
1961 case CHIP_POLARIS10:
1962 case CHIP_POLARIS11:
1963 case CHIP_POLARIS12:
1965 adev->gfx.mec.num_mec = 2;
1970 adev->gfx.mec.num_mec = 1;
1974 adev->gfx.mec.num_pipe_per_mec = 4;
1975 adev->gfx.mec.num_queue_per_pipe = 8;
1978 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq);
1983 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1987 /* Privileged reg */
1988 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1989 &adev->gfx.priv_reg_irq);
1993 /* Privileged inst */
1994 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1995 &adev->gfx.priv_inst_irq);
1999 /* Add CP EDC/ECC irq */
2000 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
2001 &adev->gfx.cp_ecc_error_irq);
2005 /* SQ interrupts. */
2006 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
2009 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2013 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2015 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2017 gfx_v8_0_scratch_init(adev);
2019 r = gfx_v8_0_init_microcode(adev);
2021 DRM_ERROR("Failed to load gfx firmware!\n");
2025 r = gfx_v8_0_rlc_init(adev);
2027 DRM_ERROR("Failed to init rlc BOs!\n");
2031 r = gfx_v8_0_mec_init(adev);
2033 DRM_ERROR("Failed to init MEC BOs!\n");
2037 /* set up the gfx ring */
2038 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2039 ring = &adev->gfx.gfx_ring[i];
2040 ring->ring_obj = NULL;
2041 sprintf(ring->name, "gfx");
2042 /* no gfx doorbells on iceland */
2043 if (adev->asic_type != CHIP_TOPAZ) {
2044 ring->use_doorbell = true;
2045 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2048 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2049 AMDGPU_CP_IRQ_GFX_EOP);
2055 /* set up the compute queues - allocate horizontally across pipes */
2057 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2058 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2059 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2060 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2063 r = gfx_v8_0_compute_ring_init(adev,
2074 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2076 DRM_ERROR("Failed to init KIQ BOs!\n");
2080 kiq = &adev->gfx.kiq;
2081 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2085 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2086 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2090 /* reserve GDS, GWS and OA resource for gfx */
2091 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2092 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2093 &adev->gds.gds_gfx_bo, NULL, NULL);
2097 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2098 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2099 &adev->gds.gws_gfx_bo, NULL, NULL);
2103 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2104 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2105 &adev->gds.oa_gfx_bo, NULL, NULL);
2109 adev->gfx.ce_ram_size = 0x8000;
2111 r = gfx_v8_0_gpu_early_init(adev);
2118 static int gfx_v8_0_sw_fini(void *handle)
2121 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2123 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2124 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2125 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2127 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2128 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2129 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2130 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2132 amdgpu_gfx_compute_mqd_sw_fini(adev);
2133 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2134 amdgpu_gfx_kiq_fini(adev);
2136 gfx_v8_0_mec_fini(adev);
2137 gfx_v8_0_rlc_fini(adev);
2138 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2139 &adev->gfx.rlc.clear_state_gpu_addr,
2140 (void **)&adev->gfx.rlc.cs_ptr);
2141 if ((adev->asic_type == CHIP_CARRIZO) ||
2142 (adev->asic_type == CHIP_STONEY)) {
2143 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2144 &adev->gfx.rlc.cp_table_gpu_addr,
2145 (void **)&adev->gfx.rlc.cp_table_ptr);
2147 gfx_v8_0_free_microcode(adev);
2152 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2154 uint32_t *modearray, *mod2array;
2155 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2156 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2159 modearray = adev->gfx.config.tile_mode_array;
2160 mod2array = adev->gfx.config.macrotile_mode_array;
2162 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2163 modearray[reg_offset] = 0;
2165 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2166 mod2array[reg_offset] = 0;
2168 switch (adev->asic_type) {
2170 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2171 PIPE_CONFIG(ADDR_SURF_P2) |
2172 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2173 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2174 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2175 PIPE_CONFIG(ADDR_SURF_P2) |
2176 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2177 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2178 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2179 PIPE_CONFIG(ADDR_SURF_P2) |
2180 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2181 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2182 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2183 PIPE_CONFIG(ADDR_SURF_P2) |
2184 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2185 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2186 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2187 PIPE_CONFIG(ADDR_SURF_P2) |
2188 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2189 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2190 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2191 PIPE_CONFIG(ADDR_SURF_P2) |
2192 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2193 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2194 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2195 PIPE_CONFIG(ADDR_SURF_P2) |
2196 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2197 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2198 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2199 PIPE_CONFIG(ADDR_SURF_P2));
2200 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2201 PIPE_CONFIG(ADDR_SURF_P2) |
2202 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2203 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2204 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2205 PIPE_CONFIG(ADDR_SURF_P2) |
2206 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2207 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2208 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2209 PIPE_CONFIG(ADDR_SURF_P2) |
2210 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2211 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2212 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2213 PIPE_CONFIG(ADDR_SURF_P2) |
2214 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2215 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2216 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2217 PIPE_CONFIG(ADDR_SURF_P2) |
2218 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2219 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2220 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2221 PIPE_CONFIG(ADDR_SURF_P2) |
2222 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2223 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2224 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2225 PIPE_CONFIG(ADDR_SURF_P2) |
2226 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2227 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2228 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2229 PIPE_CONFIG(ADDR_SURF_P2) |
2230 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2231 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2232 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2233 PIPE_CONFIG(ADDR_SURF_P2) |
2234 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2235 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2236 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2237 PIPE_CONFIG(ADDR_SURF_P2) |
2238 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2239 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2240 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2241 PIPE_CONFIG(ADDR_SURF_P2) |
2242 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2243 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2244 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2245 PIPE_CONFIG(ADDR_SURF_P2) |
2246 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2248 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2249 PIPE_CONFIG(ADDR_SURF_P2) |
2250 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2251 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2252 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2253 PIPE_CONFIG(ADDR_SURF_P2) |
2254 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2255 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2256 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2257 PIPE_CONFIG(ADDR_SURF_P2) |
2258 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2260 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2261 PIPE_CONFIG(ADDR_SURF_P2) |
2262 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2264 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2265 PIPE_CONFIG(ADDR_SURF_P2) |
2266 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2268 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2269 PIPE_CONFIG(ADDR_SURF_P2) |
2270 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2273 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2274 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2275 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276 NUM_BANKS(ADDR_SURF_8_BANK));
2277 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2278 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2279 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2280 NUM_BANKS(ADDR_SURF_8_BANK));
2281 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2282 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2283 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2284 NUM_BANKS(ADDR_SURF_8_BANK));
2285 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2287 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288 NUM_BANKS(ADDR_SURF_8_BANK));
2289 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2290 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2291 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2292 NUM_BANKS(ADDR_SURF_8_BANK));
2293 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2294 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2295 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2296 NUM_BANKS(ADDR_SURF_8_BANK));
2297 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2299 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2300 NUM_BANKS(ADDR_SURF_8_BANK));
2301 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2302 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2303 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2304 NUM_BANKS(ADDR_SURF_16_BANK));
2305 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2306 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2307 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2308 NUM_BANKS(ADDR_SURF_16_BANK));
2309 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2312 NUM_BANKS(ADDR_SURF_16_BANK));
2313 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2314 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2315 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2316 NUM_BANKS(ADDR_SURF_16_BANK));
2317 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2318 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2319 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2320 NUM_BANKS(ADDR_SURF_16_BANK));
2321 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2324 NUM_BANKS(ADDR_SURF_16_BANK));
2325 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2326 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2327 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2328 NUM_BANKS(ADDR_SURF_8_BANK));
2330 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2331 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2333 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2335 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2336 if (reg_offset != 7)
2337 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2342 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2344 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2345 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2346 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2347 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2350 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2351 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2353 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2354 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2355 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2356 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2357 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2358 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2359 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2361 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2362 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2363 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2365 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2366 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2367 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2369 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2370 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2371 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2372 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2373 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2374 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2375 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2376 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2377 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2380 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2383 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2384 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2385 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2387 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2388 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2389 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2390 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2392 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2393 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2395 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2396 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2397 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2399 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2400 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2401 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2403 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2405 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2407 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2408 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2409 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2410 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2411 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2412 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2413 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2415 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2416 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2417 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2420 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2421 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2424 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2425 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2427 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2428 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2429 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2432 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2433 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2436 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2437 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2440 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2441 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2444 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2445 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2446 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2448 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2451 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2452 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2453 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2456 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2457 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2459 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2460 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2461 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2462 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2465 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2467 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2468 NUM_BANKS(ADDR_SURF_8_BANK));
2469 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2472 NUM_BANKS(ADDR_SURF_8_BANK));
2473 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476 NUM_BANKS(ADDR_SURF_8_BANK));
2477 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2479 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480 NUM_BANKS(ADDR_SURF_8_BANK));
2481 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484 NUM_BANKS(ADDR_SURF_8_BANK));
2485 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488 NUM_BANKS(ADDR_SURF_8_BANK));
2489 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492 NUM_BANKS(ADDR_SURF_8_BANK));
2493 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2495 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2496 NUM_BANKS(ADDR_SURF_8_BANK));
2497 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2499 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2500 NUM_BANKS(ADDR_SURF_8_BANK));
2501 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504 NUM_BANKS(ADDR_SURF_8_BANK));
2505 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2508 NUM_BANKS(ADDR_SURF_8_BANK));
2509 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2511 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512 NUM_BANKS(ADDR_SURF_8_BANK));
2513 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2516 NUM_BANKS(ADDR_SURF_8_BANK));
2517 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520 NUM_BANKS(ADDR_SURF_4_BANK));
2522 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2523 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2525 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2526 if (reg_offset != 7)
2527 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2531 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2534 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2535 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2536 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2537 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2538 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2539 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2540 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2542 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2543 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2545 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2546 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2547 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2548 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2549 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2551 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2552 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2554 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2555 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2558 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2559 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2560 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2561 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2562 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2563 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2564 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2565 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2566 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2568 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2569 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2570 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2572 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2573 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2574 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2576 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2577 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2578 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2579 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2580 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2581 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2582 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2584 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2586 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2588 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2589 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2590 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2592 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2593 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2594 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2596 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2597 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2598 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2600 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2601 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2602 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2604 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2605 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2606 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2608 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2609 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2612 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2613 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2614 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2616 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2617 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2618 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2620 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2621 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2622 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2623 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2624 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2625 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2626 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2628 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2629 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2630 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2632 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2633 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2634 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2636 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2637 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2638 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2640 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2641 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2644 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2645 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2646 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2648 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2649 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2650 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2652 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2654 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2656 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2657 NUM_BANKS(ADDR_SURF_16_BANK));
2658 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2660 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2661 NUM_BANKS(ADDR_SURF_16_BANK));
2662 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2664 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2665 NUM_BANKS(ADDR_SURF_16_BANK));
2666 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2668 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2669 NUM_BANKS(ADDR_SURF_16_BANK));
2670 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2672 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2673 NUM_BANKS(ADDR_SURF_16_BANK));
2674 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2676 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2677 NUM_BANKS(ADDR_SURF_16_BANK));
2678 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2681 NUM_BANKS(ADDR_SURF_16_BANK));
2682 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2683 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2684 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2685 NUM_BANKS(ADDR_SURF_16_BANK));
2686 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2688 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2689 NUM_BANKS(ADDR_SURF_16_BANK));
2690 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2692 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2693 NUM_BANKS(ADDR_SURF_16_BANK));
2694 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2695 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2696 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2697 NUM_BANKS(ADDR_SURF_16_BANK));
2698 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2699 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2700 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2701 NUM_BANKS(ADDR_SURF_8_BANK));
2702 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2704 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2705 NUM_BANKS(ADDR_SURF_4_BANK));
2706 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2708 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2709 NUM_BANKS(ADDR_SURF_4_BANK));
2711 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2712 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2714 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2715 if (reg_offset != 7)
2716 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2719 case CHIP_POLARIS11:
2720 case CHIP_POLARIS12:
2721 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2724 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2725 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2726 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2728 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2729 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2730 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2732 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2733 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2734 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2736 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2737 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2740 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2741 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2742 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2744 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2745 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2746 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2748 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2749 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2750 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2752 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2753 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2754 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2755 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2756 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2758 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2760 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2762 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2763 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2764 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2766 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2767 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2768 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2770 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2771 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2772 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2774 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2775 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2778 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2779 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2780 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2782 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2784 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2786 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2787 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2788 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2791 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2792 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2795 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2796 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2798 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2799 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2800 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2802 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2803 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2804 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2806 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2807 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2808 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2810 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2811 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2812 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2814 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2815 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2816 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2818 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2819 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2820 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2822 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2823 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2824 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2826 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2827 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2828 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2829 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2830 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2831 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2832 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2834 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2836 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2837 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2838 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2839 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2840 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2841 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2842 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2844 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2846 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847 NUM_BANKS(ADDR_SURF_16_BANK));
2849 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852 NUM_BANKS(ADDR_SURF_16_BANK));
2854 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2855 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2856 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2857 NUM_BANKS(ADDR_SURF_16_BANK));
2859 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2860 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2861 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2862 NUM_BANKS(ADDR_SURF_16_BANK));
2864 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2866 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2867 NUM_BANKS(ADDR_SURF_16_BANK));
2869 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2871 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2872 NUM_BANKS(ADDR_SURF_16_BANK));
2874 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2875 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2876 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2877 NUM_BANKS(ADDR_SURF_16_BANK));
2879 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2880 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2881 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2882 NUM_BANKS(ADDR_SURF_16_BANK));
2884 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2885 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2886 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2887 NUM_BANKS(ADDR_SURF_16_BANK));
2889 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2890 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2891 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2892 NUM_BANKS(ADDR_SURF_16_BANK));
2894 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2895 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2896 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2897 NUM_BANKS(ADDR_SURF_16_BANK));
2899 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2900 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2901 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2902 NUM_BANKS(ADDR_SURF_16_BANK));
2904 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2905 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2906 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2907 NUM_BANKS(ADDR_SURF_8_BANK));
2909 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2910 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2911 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2912 NUM_BANKS(ADDR_SURF_4_BANK));
2914 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2915 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2917 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2918 if (reg_offset != 7)
2919 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2922 case CHIP_POLARIS10:
2923 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2924 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2925 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2926 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2927 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2928 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2929 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2930 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2931 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2932 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2933 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2934 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2935 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2936 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2937 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2938 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2939 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2940 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2942 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2943 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2944 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2945 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2946 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2947 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2949 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2950 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2951 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2953 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2954 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2955 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2956 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2957 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2958 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2960 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2961 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2962 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2964 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2965 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2966 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2968 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2969 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2970 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2971 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2972 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2973 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2974 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2976 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2977 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2978 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2980 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2981 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2982 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2984 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2985 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2986 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2988 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2989 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2990 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2991 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2992 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2993 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2994 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2996 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2997 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2998 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3000 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3001 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3002 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3004 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3005 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3006 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3008 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3009 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3010 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3012 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3013 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3014 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3015 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3016 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3017 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3018 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3020 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3021 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3022 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3024 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3025 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3026 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3027 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3029 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3030 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3031 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3032 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3033 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3034 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3035 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3036 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3037 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3038 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3039 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3041 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3042 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3043 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3044 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3046 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3048 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3049 NUM_BANKS(ADDR_SURF_16_BANK));
3051 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3053 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3054 NUM_BANKS(ADDR_SURF_16_BANK));
3056 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3058 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3059 NUM_BANKS(ADDR_SURF_16_BANK));
3061 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3062 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3063 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3064 NUM_BANKS(ADDR_SURF_16_BANK));
3066 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3067 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3068 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3069 NUM_BANKS(ADDR_SURF_16_BANK));
3071 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3072 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3073 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3074 NUM_BANKS(ADDR_SURF_16_BANK));
3076 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3077 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3078 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3079 NUM_BANKS(ADDR_SURF_16_BANK));
3081 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3083 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3084 NUM_BANKS(ADDR_SURF_16_BANK));
3086 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3087 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3088 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3089 NUM_BANKS(ADDR_SURF_16_BANK));
3091 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3092 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3093 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3094 NUM_BANKS(ADDR_SURF_16_BANK));
3096 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3097 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3098 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3099 NUM_BANKS(ADDR_SURF_16_BANK));
3101 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3102 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3103 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3104 NUM_BANKS(ADDR_SURF_8_BANK));
3106 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3107 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3108 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3109 NUM_BANKS(ADDR_SURF_4_BANK));
3111 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3112 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3113 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3114 NUM_BANKS(ADDR_SURF_4_BANK));
3116 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3117 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3119 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3120 if (reg_offset != 7)
3121 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3125 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3126 PIPE_CONFIG(ADDR_SURF_P2) |
3127 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3128 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3129 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3130 PIPE_CONFIG(ADDR_SURF_P2) |
3131 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3132 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3133 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3134 PIPE_CONFIG(ADDR_SURF_P2) |
3135 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3136 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3137 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3138 PIPE_CONFIG(ADDR_SURF_P2) |
3139 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3140 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3141 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3142 PIPE_CONFIG(ADDR_SURF_P2) |
3143 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3144 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3145 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3146 PIPE_CONFIG(ADDR_SURF_P2) |
3147 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3148 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3149 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3150 PIPE_CONFIG(ADDR_SURF_P2) |
3151 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3152 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3153 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3154 PIPE_CONFIG(ADDR_SURF_P2));
3155 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3156 PIPE_CONFIG(ADDR_SURF_P2) |
3157 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3158 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3159 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3160 PIPE_CONFIG(ADDR_SURF_P2) |
3161 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3162 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3163 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3164 PIPE_CONFIG(ADDR_SURF_P2) |
3165 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3166 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3167 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3168 PIPE_CONFIG(ADDR_SURF_P2) |
3169 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3170 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3171 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3172 PIPE_CONFIG(ADDR_SURF_P2) |
3173 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3174 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3175 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3176 PIPE_CONFIG(ADDR_SURF_P2) |
3177 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3178 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3179 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3180 PIPE_CONFIG(ADDR_SURF_P2) |
3181 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3182 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3183 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3184 PIPE_CONFIG(ADDR_SURF_P2) |
3185 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3186 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3187 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3188 PIPE_CONFIG(ADDR_SURF_P2) |
3189 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3190 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3191 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3192 PIPE_CONFIG(ADDR_SURF_P2) |
3193 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3194 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3195 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3196 PIPE_CONFIG(ADDR_SURF_P2) |
3197 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3199 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3200 PIPE_CONFIG(ADDR_SURF_P2) |
3201 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3203 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3204 PIPE_CONFIG(ADDR_SURF_P2) |
3205 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3207 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3208 PIPE_CONFIG(ADDR_SURF_P2) |
3209 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3211 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3212 PIPE_CONFIG(ADDR_SURF_P2) |
3213 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3215 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3216 PIPE_CONFIG(ADDR_SURF_P2) |
3217 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3219 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3220 PIPE_CONFIG(ADDR_SURF_P2) |
3221 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3222 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3223 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3224 PIPE_CONFIG(ADDR_SURF_P2) |
3225 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3226 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3228 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3229 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3230 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3231 NUM_BANKS(ADDR_SURF_8_BANK));
3232 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3233 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3234 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235 NUM_BANKS(ADDR_SURF_8_BANK));
3236 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3239 NUM_BANKS(ADDR_SURF_8_BANK));
3240 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3241 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3242 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3243 NUM_BANKS(ADDR_SURF_8_BANK));
3244 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3246 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3247 NUM_BANKS(ADDR_SURF_8_BANK));
3248 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3251 NUM_BANKS(ADDR_SURF_8_BANK));
3252 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3253 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3254 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3255 NUM_BANKS(ADDR_SURF_8_BANK));
3256 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3257 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3258 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259 NUM_BANKS(ADDR_SURF_16_BANK));
3260 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3263 NUM_BANKS(ADDR_SURF_16_BANK));
3264 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3265 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3266 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3267 NUM_BANKS(ADDR_SURF_16_BANK));
3268 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3269 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3270 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3271 NUM_BANKS(ADDR_SURF_16_BANK));
3272 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3275 NUM_BANKS(ADDR_SURF_16_BANK));
3276 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3279 NUM_BANKS(ADDR_SURF_16_BANK));
3280 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3282 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3283 NUM_BANKS(ADDR_SURF_8_BANK));
3285 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3286 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3288 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3290 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3291 if (reg_offset != 7)
3292 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3297 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3301 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3302 PIPE_CONFIG(ADDR_SURF_P2) |
3303 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3304 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3305 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3306 PIPE_CONFIG(ADDR_SURF_P2) |
3307 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3308 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3309 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3310 PIPE_CONFIG(ADDR_SURF_P2) |
3311 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3312 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3313 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3314 PIPE_CONFIG(ADDR_SURF_P2) |
3315 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3316 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3317 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3318 PIPE_CONFIG(ADDR_SURF_P2) |
3319 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3320 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3321 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3322 PIPE_CONFIG(ADDR_SURF_P2) |
3323 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3324 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3325 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3326 PIPE_CONFIG(ADDR_SURF_P2) |
3327 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3328 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3329 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3330 PIPE_CONFIG(ADDR_SURF_P2));
3331 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3332 PIPE_CONFIG(ADDR_SURF_P2) |
3333 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3335 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3336 PIPE_CONFIG(ADDR_SURF_P2) |
3337 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3338 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3339 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3340 PIPE_CONFIG(ADDR_SURF_P2) |
3341 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3343 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3344 PIPE_CONFIG(ADDR_SURF_P2) |
3345 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3347 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3348 PIPE_CONFIG(ADDR_SURF_P2) |
3349 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3351 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3352 PIPE_CONFIG(ADDR_SURF_P2) |
3353 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3355 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3356 PIPE_CONFIG(ADDR_SURF_P2) |
3357 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3359 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3360 PIPE_CONFIG(ADDR_SURF_P2) |
3361 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3363 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3364 PIPE_CONFIG(ADDR_SURF_P2) |
3365 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3367 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3368 PIPE_CONFIG(ADDR_SURF_P2) |
3369 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3371 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3372 PIPE_CONFIG(ADDR_SURF_P2) |
3373 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3374 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3375 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3376 PIPE_CONFIG(ADDR_SURF_P2) |
3377 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3379 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3380 PIPE_CONFIG(ADDR_SURF_P2) |
3381 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3382 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3383 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3384 PIPE_CONFIG(ADDR_SURF_P2) |
3385 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3386 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3387 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3388 PIPE_CONFIG(ADDR_SURF_P2) |
3389 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3390 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3391 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3392 PIPE_CONFIG(ADDR_SURF_P2) |
3393 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3394 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3395 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3396 PIPE_CONFIG(ADDR_SURF_P2) |
3397 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3398 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3399 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3400 PIPE_CONFIG(ADDR_SURF_P2) |
3401 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3402 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3404 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3407 NUM_BANKS(ADDR_SURF_8_BANK));
3408 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3409 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3410 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3411 NUM_BANKS(ADDR_SURF_8_BANK));
3412 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3413 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3414 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3415 NUM_BANKS(ADDR_SURF_8_BANK));
3416 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3419 NUM_BANKS(ADDR_SURF_8_BANK));
3420 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3423 NUM_BANKS(ADDR_SURF_8_BANK));
3424 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3425 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3426 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3427 NUM_BANKS(ADDR_SURF_8_BANK));
3428 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3429 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3430 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3431 NUM_BANKS(ADDR_SURF_8_BANK));
3432 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3433 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3434 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3435 NUM_BANKS(ADDR_SURF_16_BANK));
3436 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3437 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3438 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3439 NUM_BANKS(ADDR_SURF_16_BANK));
3440 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3443 NUM_BANKS(ADDR_SURF_16_BANK));
3444 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3447 NUM_BANKS(ADDR_SURF_16_BANK));
3448 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3449 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3450 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3451 NUM_BANKS(ADDR_SURF_16_BANK));
3452 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3455 NUM_BANKS(ADDR_SURF_16_BANK));
3456 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3459 NUM_BANKS(ADDR_SURF_8_BANK));
3461 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3462 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3464 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3466 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3467 if (reg_offset != 7)
3468 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3474 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3475 u32 se_num, u32 sh_num, u32 instance)
3479 if (instance == 0xffffffff)
3480 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3482 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3484 if (se_num == 0xffffffff)
3485 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3487 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3489 if (sh_num == 0xffffffff)
3490 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3492 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3494 WREG32(mmGRBM_GFX_INDEX, data);
3497 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3498 u32 me, u32 pipe, u32 q)
3500 vi_srbm_select(adev, me, pipe, q, 0);
3503 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3507 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3508 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3510 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3512 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3513 adev->gfx.config.max_sh_per_se);
3515 return (~data) & mask;
3519 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3521 switch (adev->asic_type) {
3524 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3525 RB_XSEL2(1) | PKR_MAP(2) |
3526 PKR_XSEL(1) | PKR_YSEL(1) |
3527 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3528 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3532 case CHIP_POLARIS10:
3533 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3534 SE_XSEL(1) | SE_YSEL(1);
3535 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3540 *rconf |= RB_MAP_PKR0(2);
3543 case CHIP_POLARIS11:
3544 case CHIP_POLARIS12:
3545 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3546 SE_XSEL(1) | SE_YSEL(1);
3554 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3560 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3561 u32 raster_config, u32 raster_config_1,
3562 unsigned rb_mask, unsigned num_rb)
3564 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3565 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3566 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3567 unsigned rb_per_se = num_rb / num_se;
3568 unsigned se_mask[4];
3571 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3572 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3573 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3574 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3576 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3577 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3578 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3580 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3581 (!se_mask[2] && !se_mask[3]))) {
3582 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3584 if (!se_mask[0] && !se_mask[1]) {
3586 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3589 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3593 for (se = 0; se < num_se; se++) {
3594 unsigned raster_config_se = raster_config;
3595 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3596 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3597 int idx = (se / 2) * 2;
3599 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3600 raster_config_se &= ~SE_MAP_MASK;
3602 if (!se_mask[idx]) {
3603 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3605 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3609 pkr0_mask &= rb_mask;
3610 pkr1_mask &= rb_mask;
3611 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3612 raster_config_se &= ~PKR_MAP_MASK;
3615 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3617 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3621 if (rb_per_se >= 2) {
3622 unsigned rb0_mask = 1 << (se * rb_per_se);
3623 unsigned rb1_mask = rb0_mask << 1;
3625 rb0_mask &= rb_mask;
3626 rb1_mask &= rb_mask;
3627 if (!rb0_mask || !rb1_mask) {
3628 raster_config_se &= ~RB_MAP_PKR0_MASK;
3632 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3635 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3639 if (rb_per_se > 2) {
3640 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3641 rb1_mask = rb0_mask << 1;
3642 rb0_mask &= rb_mask;
3643 rb1_mask &= rb_mask;
3644 if (!rb0_mask || !rb1_mask) {
3645 raster_config_se &= ~RB_MAP_PKR1_MASK;
3649 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3652 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3658 /* GRBM_GFX_INDEX has a different offset on VI */
3659 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3660 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3661 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3664 /* GRBM_GFX_INDEX has a different offset on VI */
3665 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3668 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3672 u32 raster_config = 0, raster_config_1 = 0;
3674 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3675 adev->gfx.config.max_sh_per_se;
3676 unsigned num_rb_pipes;
3678 mutex_lock(&adev->grbm_idx_mutex);
3679 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3680 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3681 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3682 data = gfx_v8_0_get_rb_active_bitmap(adev);
3683 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3684 rb_bitmap_width_per_sh);
3687 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3689 adev->gfx.config.backend_enable_mask = active_rbs;
3690 adev->gfx.config.num_rbs = hweight32(active_rbs);
3692 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3693 adev->gfx.config.max_shader_engines, 16);
3695 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3697 if (!adev->gfx.config.backend_enable_mask ||
3698 adev->gfx.config.num_rbs >= num_rb_pipes) {
3699 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3700 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3702 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3703 adev->gfx.config.backend_enable_mask,
3707 /* cache the values for userspace */
3708 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3709 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3710 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3711 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3712 RREG32(mmCC_RB_BACKEND_DISABLE);
3713 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3714 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3715 adev->gfx.config.rb_config[i][j].raster_config =
3716 RREG32(mmPA_SC_RASTER_CONFIG);
3717 adev->gfx.config.rb_config[i][j].raster_config_1 =
3718 RREG32(mmPA_SC_RASTER_CONFIG_1);
3721 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3722 mutex_unlock(&adev->grbm_idx_mutex);
3726 * gfx_v8_0_init_compute_vmid - gart enable
3728 * @adev: amdgpu_device pointer
3730 * Initialize compute vmid sh_mem registers
3733 #define DEFAULT_SH_MEM_BASES (0x6000)
3734 #define FIRST_COMPUTE_VMID (8)
3735 #define LAST_COMPUTE_VMID (16)
3736 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3739 uint32_t sh_mem_config;
3740 uint32_t sh_mem_bases;
3743 * Configure apertures:
3744 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3745 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3746 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3748 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3750 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3751 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3752 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3753 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3754 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3755 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3757 mutex_lock(&adev->srbm_mutex);
3758 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3759 vi_srbm_select(adev, 0, 0, 0, i);
3760 /* CP and shaders */
3761 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3762 WREG32(mmSH_MEM_APE1_BASE, 1);
3763 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3764 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3766 vi_srbm_select(adev, 0, 0, 0, 0);
3767 mutex_unlock(&adev->srbm_mutex);
3770 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3772 switch (adev->asic_type) {
3774 adev->gfx.config.double_offchip_lds_buf = 1;
3778 adev->gfx.config.double_offchip_lds_buf = 0;
3783 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3785 u32 tmp, sh_static_mem_cfg;
3788 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3789 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3790 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3791 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3793 gfx_v8_0_tiling_mode_table_init(adev);
3794 gfx_v8_0_setup_rb(adev);
3795 gfx_v8_0_get_cu_info(adev);
3796 gfx_v8_0_config_init(adev);
3798 /* XXX SH_MEM regs */
3799 /* where to put LDS, scratch, GPUVM in FSA64 space */
3800 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3802 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3804 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3806 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3808 mutex_lock(&adev->srbm_mutex);
3809 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3810 vi_srbm_select(adev, 0, 0, 0, i);
3811 /* CP and shaders */
3813 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3814 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3815 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3816 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3817 WREG32(mmSH_MEM_CONFIG, tmp);
3818 WREG32(mmSH_MEM_BASES, 0);
3820 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3821 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3822 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3823 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3824 WREG32(mmSH_MEM_CONFIG, tmp);
3825 tmp = adev->gmc.shared_aperture_start >> 48;
3826 WREG32(mmSH_MEM_BASES, tmp);
3829 WREG32(mmSH_MEM_APE1_BASE, 1);
3830 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3832 vi_srbm_select(adev, 0, 0, 0, 0);
3833 mutex_unlock(&adev->srbm_mutex);
3835 gfx_v8_0_init_compute_vmid(adev);
3837 mutex_lock(&adev->grbm_idx_mutex);
3839 * making sure that the following register writes will be broadcasted
3840 * to all the shaders
3842 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3844 WREG32(mmPA_SC_FIFO_SIZE,
3845 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3846 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3847 (adev->gfx.config.sc_prim_fifo_size_backend <<
3848 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3849 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3850 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3851 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3852 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3854 tmp = RREG32(mmSPI_ARB_PRIORITY);
3855 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3856 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3857 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3858 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3859 WREG32(mmSPI_ARB_PRIORITY, tmp);
3861 mutex_unlock(&adev->grbm_idx_mutex);
3865 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3870 mutex_lock(&adev->grbm_idx_mutex);
3871 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3872 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3873 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3874 for (k = 0; k < adev->usec_timeout; k++) {
3875 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3879 if (k == adev->usec_timeout) {
3880 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3881 0xffffffff, 0xffffffff);
3882 mutex_unlock(&adev->grbm_idx_mutex);
3883 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3889 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3890 mutex_unlock(&adev->grbm_idx_mutex);
3892 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3893 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3894 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3895 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3896 for (k = 0; k < adev->usec_timeout; k++) {
3897 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3903 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3906 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3908 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3909 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3910 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3911 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3913 WREG32(mmCP_INT_CNTL_RING0, tmp);
3916 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3919 WREG32(mmRLC_CSIB_ADDR_HI,
3920 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3921 WREG32(mmRLC_CSIB_ADDR_LO,
3922 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3923 WREG32(mmRLC_CSIB_LENGTH,
3924 adev->gfx.rlc.clear_state_size);
3927 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3930 int *unique_indices,
3933 int *ind_start_offsets,
3938 bool new_entry = true;
3940 for (; ind_offset < list_size; ind_offset++) {
3944 ind_start_offsets[*offset_count] = ind_offset;
3945 *offset_count = *offset_count + 1;
3946 BUG_ON(*offset_count >= max_offset);
3949 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3956 /* look for the matching indice */
3958 indices < *indices_count;
3960 if (unique_indices[indices] ==
3961 register_list_format[ind_offset])
3965 if (indices >= *indices_count) {
3966 unique_indices[*indices_count] =
3967 register_list_format[ind_offset];
3968 indices = *indices_count;
3969 *indices_count = *indices_count + 1;
3970 BUG_ON(*indices_count >= max_indices);
3973 register_list_format[ind_offset] = indices;
3977 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3980 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3981 int indices_count = 0;
3982 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3983 int offset_count = 0;
3986 unsigned int *register_list_format =
3987 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3988 if (!register_list_format)
3990 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3991 adev->gfx.rlc.reg_list_format_size_bytes);
3993 gfx_v8_0_parse_ind_reg_list(register_list_format,
3994 RLC_FormatDirectRegListLength,
3995 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3998 ARRAY_SIZE(unique_indices),
3999 indirect_start_offsets,
4001 ARRAY_SIZE(indirect_start_offsets));
4003 /* save and restore list */
4004 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4006 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4007 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4008 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4011 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4012 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4013 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4015 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4016 list_size = list_size >> 1;
4017 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4018 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4020 /* starting offsets starts */
4021 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4022 adev->gfx.rlc.starting_offsets_start);
4023 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4024 WREG32(mmRLC_GPM_SCRATCH_DATA,
4025 indirect_start_offsets[i]);
4027 /* unique indices */
4028 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4029 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4030 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4031 if (unique_indices[i] != 0) {
4032 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4033 WREG32(data + i, unique_indices[i] >> 20);
4036 kfree(register_list_format);
4041 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4043 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4046 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4050 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4052 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4053 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4054 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4055 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4056 WREG32(mmRLC_PG_DELAY, data);
4058 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4059 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4063 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4066 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4069 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4072 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4075 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4077 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4080 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4082 if ((adev->asic_type == CHIP_CARRIZO) ||
4083 (adev->asic_type == CHIP_STONEY)) {
4084 gfx_v8_0_init_csb(adev);
4085 gfx_v8_0_init_save_restore_list(adev);
4086 gfx_v8_0_enable_save_restore_machine(adev);
4087 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4088 gfx_v8_0_init_power_gating(adev);
4089 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4090 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4091 (adev->asic_type == CHIP_POLARIS12) ||
4092 (adev->asic_type == CHIP_VEGAM)) {
4093 gfx_v8_0_init_csb(adev);
4094 gfx_v8_0_init_save_restore_list(adev);
4095 gfx_v8_0_enable_save_restore_machine(adev);
4096 gfx_v8_0_init_power_gating(adev);
4101 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4103 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4105 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4106 gfx_v8_0_wait_for_rlc_serdes(adev);
4109 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4111 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4114 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4118 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4120 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4122 /* carrizo do enable cp interrupt after cp inited */
4123 if (!(adev->flags & AMD_IS_APU))
4124 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4129 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4131 const struct rlc_firmware_header_v2_0 *hdr;
4132 const __le32 *fw_data;
4133 unsigned i, fw_size;
4135 if (!adev->gfx.rlc_fw)
4138 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4139 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4141 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4142 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4143 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4145 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4146 for (i = 0; i < fw_size; i++)
4147 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4148 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4153 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4158 gfx_v8_0_rlc_stop(adev);
4161 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4162 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4163 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4164 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4165 if (adev->asic_type == CHIP_POLARIS11 ||
4166 adev->asic_type == CHIP_POLARIS10 ||
4167 adev->asic_type == CHIP_POLARIS12 ||
4168 adev->asic_type == CHIP_VEGAM) {
4169 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4171 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4175 WREG32(mmRLC_PG_CNTL, 0);
4177 gfx_v8_0_rlc_reset(adev);
4178 gfx_v8_0_init_pg(adev);
4181 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4182 /* legacy rlc firmware loading */
4183 r = gfx_v8_0_rlc_load_microcode(adev);
4188 gfx_v8_0_rlc_start(adev);
4193 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4196 u32 tmp = RREG32(mmCP_ME_CNTL);
4199 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4200 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4201 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4203 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4204 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4205 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4206 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4207 adev->gfx.gfx_ring[i].ready = false;
4209 WREG32(mmCP_ME_CNTL, tmp);
4213 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4215 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4216 const struct gfx_firmware_header_v1_0 *ce_hdr;
4217 const struct gfx_firmware_header_v1_0 *me_hdr;
4218 const __le32 *fw_data;
4219 unsigned i, fw_size;
4221 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4224 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4225 adev->gfx.pfp_fw->data;
4226 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4227 adev->gfx.ce_fw->data;
4228 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4229 adev->gfx.me_fw->data;
4231 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4232 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4233 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4235 gfx_v8_0_cp_gfx_enable(adev, false);
4238 fw_data = (const __le32 *)
4239 (adev->gfx.pfp_fw->data +
4240 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4241 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4242 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4243 for (i = 0; i < fw_size; i++)
4244 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4245 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4248 fw_data = (const __le32 *)
4249 (adev->gfx.ce_fw->data +
4250 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4251 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4252 WREG32(mmCP_CE_UCODE_ADDR, 0);
4253 for (i = 0; i < fw_size; i++)
4254 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4255 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4258 fw_data = (const __le32 *)
4259 (adev->gfx.me_fw->data +
4260 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4261 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4262 WREG32(mmCP_ME_RAM_WADDR, 0);
4263 for (i = 0; i < fw_size; i++)
4264 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4265 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4270 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4273 const struct cs_section_def *sect = NULL;
4274 const struct cs_extent_def *ext = NULL;
4276 /* begin clear state */
4278 /* context control state */
4281 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4282 for (ext = sect->section; ext->extent != NULL; ++ext) {
4283 if (sect->id == SECT_CONTEXT)
4284 count += 2 + ext->reg_count;
4289 /* pa_sc_raster_config/pa_sc_raster_config1 */
4291 /* end clear state */
4299 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4301 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4302 const struct cs_section_def *sect = NULL;
4303 const struct cs_extent_def *ext = NULL;
4307 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4308 WREG32(mmCP_ENDIAN_SWAP, 0);
4309 WREG32(mmCP_DEVICE_ID, 1);
4311 gfx_v8_0_cp_gfx_enable(adev, true);
4313 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4315 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4319 /* clear state buffer */
4320 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4321 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4323 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4324 amdgpu_ring_write(ring, 0x80000000);
4325 amdgpu_ring_write(ring, 0x80000000);
4327 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4328 for (ext = sect->section; ext->extent != NULL; ++ext) {
4329 if (sect->id == SECT_CONTEXT) {
4330 amdgpu_ring_write(ring,
4331 PACKET3(PACKET3_SET_CONTEXT_REG,
4333 amdgpu_ring_write(ring,
4334 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4335 for (i = 0; i < ext->reg_count; i++)
4336 amdgpu_ring_write(ring, ext->extent[i]);
4341 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4342 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4343 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4344 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4346 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4347 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4349 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4350 amdgpu_ring_write(ring, 0);
4352 /* init the CE partitions */
4353 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4354 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4355 amdgpu_ring_write(ring, 0x8000);
4356 amdgpu_ring_write(ring, 0x8000);
4358 amdgpu_ring_commit(ring);
4362 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4365 /* no gfx doorbells on iceland */
4366 if (adev->asic_type == CHIP_TOPAZ)
4369 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4371 if (ring->use_doorbell) {
4372 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4373 DOORBELL_OFFSET, ring->doorbell_index);
4374 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4376 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4379 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4382 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4384 if (adev->flags & AMD_IS_APU)
4387 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4388 DOORBELL_RANGE_LOWER,
4389 AMDGPU_DOORBELL_GFX_RING0);
4390 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4392 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4393 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4396 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4398 struct amdgpu_ring *ring;
4401 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4404 /* Set the write pointer delay */
4405 WREG32(mmCP_RB_WPTR_DELAY, 0);
4407 /* set the RB to use vmid 0 */
4408 WREG32(mmCP_RB_VMID, 0);
4410 /* Set ring buffer size */
4411 ring = &adev->gfx.gfx_ring[0];
4412 rb_bufsz = order_base_2(ring->ring_size / 8);
4413 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4414 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4415 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4416 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4418 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4420 WREG32(mmCP_RB0_CNTL, tmp);
4422 /* Initialize the ring buffer's read and write pointers */
4423 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4425 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4427 /* set the wb address wether it's enabled or not */
4428 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4429 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4430 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4432 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4433 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4434 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4436 WREG32(mmCP_RB0_CNTL, tmp);
4438 rb_addr = ring->gpu_addr >> 8;
4439 WREG32(mmCP_RB0_BASE, rb_addr);
4440 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4442 gfx_v8_0_set_cpg_door_bell(adev, ring);
4443 /* start the ring */
4444 amdgpu_ring_clear_ring(ring);
4445 gfx_v8_0_cp_gfx_start(adev);
4447 r = amdgpu_ring_test_ring(ring);
4449 ring->ready = false;
4454 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4459 WREG32(mmCP_MEC_CNTL, 0);
4461 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4462 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4463 adev->gfx.compute_ring[i].ready = false;
4464 adev->gfx.kiq.ring.ready = false;
4469 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4471 const struct gfx_firmware_header_v1_0 *mec_hdr;
4472 const __le32 *fw_data;
4473 unsigned i, fw_size;
4475 if (!adev->gfx.mec_fw)
4478 gfx_v8_0_cp_compute_enable(adev, false);
4480 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4481 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4483 fw_data = (const __le32 *)
4484 (adev->gfx.mec_fw->data +
4485 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4486 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4489 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4490 for (i = 0; i < fw_size; i++)
4491 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4492 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4494 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4495 if (adev->gfx.mec2_fw) {
4496 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4498 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4499 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4501 fw_data = (const __le32 *)
4502 (adev->gfx.mec2_fw->data +
4503 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4504 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4506 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4507 for (i = 0; i < fw_size; i++)
4508 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4509 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4516 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4519 struct amdgpu_device *adev = ring->adev;
4521 /* tell RLC which is KIQ queue */
4522 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4524 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4525 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4527 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4530 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4532 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4533 uint32_t scratch, tmp = 0;
4534 uint64_t queue_mask = 0;
4537 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4538 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4541 /* This situation may be hit in the future if a new HW
4542 * generation exposes more than 64 queues. If so, the
4543 * definition of queue_mask needs updating */
4544 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4545 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4549 queue_mask |= (1ull << i);
4552 r = amdgpu_gfx_scratch_get(adev, &scratch);
4554 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4557 WREG32(scratch, 0xCAFEDEAD);
4559 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4561 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4562 amdgpu_gfx_scratch_free(adev, scratch);
4566 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4567 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4568 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4569 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4570 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4571 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4572 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4573 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4574 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4575 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4576 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4577 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4580 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4581 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4582 amdgpu_ring_write(kiq_ring,
4583 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4584 amdgpu_ring_write(kiq_ring,
4585 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4586 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4587 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4588 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4589 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4590 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4591 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4592 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4594 /* write to scratch for completion */
4595 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4596 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4597 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4598 amdgpu_ring_commit(kiq_ring);
4600 for (i = 0; i < adev->usec_timeout; i++) {
4601 tmp = RREG32(scratch);
4602 if (tmp == 0xDEADBEEF)
4606 if (i >= adev->usec_timeout) {
4607 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4611 amdgpu_gfx_scratch_free(adev, scratch);
4616 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4620 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4621 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4622 for (i = 0; i < adev->usec_timeout; i++) {
4623 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4627 if (i == adev->usec_timeout)
4630 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4631 WREG32(mmCP_HQD_PQ_RPTR, 0);
4632 WREG32(mmCP_HQD_PQ_WPTR, 0);
4637 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4639 struct amdgpu_device *adev = ring->adev;
4640 struct vi_mqd *mqd = ring->mqd_ptr;
4641 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4644 mqd->header = 0xC0310800;
4645 mqd->compute_pipelinestat_enable = 0x00000001;
4646 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4647 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4648 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4649 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4650 mqd->compute_misc_reserved = 0x00000003;
4651 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4652 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4653 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4654 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4655 eop_base_addr = ring->eop_gpu_addr >> 8;
4656 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4657 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4659 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4660 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4661 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4662 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4664 mqd->cp_hqd_eop_control = tmp;
4666 /* enable doorbell? */
4667 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4668 CP_HQD_PQ_DOORBELL_CONTROL,
4670 ring->use_doorbell ? 1 : 0);
4672 mqd->cp_hqd_pq_doorbell_control = tmp;
4674 /* set the pointer to the MQD */
4675 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4676 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4678 /* set MQD vmid to 0 */
4679 tmp = RREG32(mmCP_MQD_CONTROL);
4680 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4681 mqd->cp_mqd_control = tmp;
4683 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4684 hqd_gpu_addr = ring->gpu_addr >> 8;
4685 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4686 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4688 /* set up the HQD, this is similar to CP_RB0_CNTL */
4689 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4690 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4691 (order_base_2(ring->ring_size / 4) - 1));
4692 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4693 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4695 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4697 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4698 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4699 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4700 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4701 mqd->cp_hqd_pq_control = tmp;
4703 /* set the wb address whether it's enabled or not */
4704 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4705 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4706 mqd->cp_hqd_pq_rptr_report_addr_hi =
4707 upper_32_bits(wb_gpu_addr) & 0xffff;
4709 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4710 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4711 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4712 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4715 /* enable the doorbell if requested */
4716 if (ring->use_doorbell) {
4717 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4718 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4719 DOORBELL_OFFSET, ring->doorbell_index);
4721 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4723 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4724 DOORBELL_SOURCE, 0);
4725 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4729 mqd->cp_hqd_pq_doorbell_control = tmp;
4731 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4733 mqd->cp_hqd_pq_wptr = ring->wptr;
4734 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4736 /* set the vmid for the queue */
4737 mqd->cp_hqd_vmid = 0;
4739 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4740 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4741 mqd->cp_hqd_persistent_state = tmp;
4744 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4745 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4746 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4747 mqd->cp_hqd_ib_control = tmp;
4749 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4750 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4751 mqd->cp_hqd_iq_timer = tmp;
4753 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4754 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4755 mqd->cp_hqd_ctx_save_control = tmp;
4758 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4759 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4760 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4761 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4762 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4763 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4764 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4765 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4766 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4767 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4768 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4769 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4770 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4771 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4772 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4774 /* activate the queue */
4775 mqd->cp_hqd_active = 1;
4780 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4786 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4787 mqd_data = &mqd->cp_mqd_base_addr_lo;
4789 /* disable wptr polling */
4790 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4792 /* program all HQD registers */
4793 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4794 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4796 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4797 * This is safe since EOP RPTR==WPTR for any inactive HQD
4798 * on ASICs that do not support context-save.
4799 * EOP writes/reads can start anywhere in the ring.
4801 if (adev->asic_type != CHIP_TONGA) {
4802 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4803 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4804 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4807 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4808 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4810 /* activate the HQD */
4811 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4812 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4817 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4819 struct amdgpu_device *adev = ring->adev;
4820 struct vi_mqd *mqd = ring->mqd_ptr;
4821 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4823 gfx_v8_0_kiq_setting(ring);
4825 if (adev->in_gpu_reset) { /* for GPU_RESET case */
4826 /* reset MQD to a clean status */
4827 if (adev->gfx.mec.mqd_backup[mqd_idx])
4828 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4830 /* reset ring buffer */
4832 amdgpu_ring_clear_ring(ring);
4833 mutex_lock(&adev->srbm_mutex);
4834 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4835 gfx_v8_0_mqd_commit(adev, mqd);
4836 vi_srbm_select(adev, 0, 0, 0, 0);
4837 mutex_unlock(&adev->srbm_mutex);
4839 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4840 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4841 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4842 mutex_lock(&adev->srbm_mutex);
4843 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4844 gfx_v8_0_mqd_init(ring);
4845 gfx_v8_0_mqd_commit(adev, mqd);
4846 vi_srbm_select(adev, 0, 0, 0, 0);
4847 mutex_unlock(&adev->srbm_mutex);
4849 if (adev->gfx.mec.mqd_backup[mqd_idx])
4850 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4856 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4858 struct amdgpu_device *adev = ring->adev;
4859 struct vi_mqd *mqd = ring->mqd_ptr;
4860 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4862 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4863 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4864 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4865 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4866 mutex_lock(&adev->srbm_mutex);
4867 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4868 gfx_v8_0_mqd_init(ring);
4869 vi_srbm_select(adev, 0, 0, 0, 0);
4870 mutex_unlock(&adev->srbm_mutex);
4872 if (adev->gfx.mec.mqd_backup[mqd_idx])
4873 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4874 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4875 /* reset MQD to a clean status */
4876 if (adev->gfx.mec.mqd_backup[mqd_idx])
4877 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4878 /* reset ring buffer */
4880 amdgpu_ring_clear_ring(ring);
4882 amdgpu_ring_clear_ring(ring);
4887 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4889 if (adev->asic_type > CHIP_TONGA) {
4890 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4891 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4893 /* enable doorbells */
4894 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4897 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4899 struct amdgpu_ring *ring = NULL;
4902 gfx_v8_0_cp_compute_enable(adev, true);
4904 ring = &adev->gfx.kiq.ring;
4906 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4907 if (unlikely(r != 0))
4910 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4912 r = gfx_v8_0_kiq_init_queue(ring);
4913 amdgpu_bo_kunmap(ring->mqd_obj);
4914 ring->mqd_ptr = NULL;
4916 amdgpu_bo_unreserve(ring->mqd_obj);
4920 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4921 ring = &adev->gfx.compute_ring[i];
4923 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4924 if (unlikely(r != 0))
4926 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4928 r = gfx_v8_0_kcq_init_queue(ring);
4929 amdgpu_bo_kunmap(ring->mqd_obj);
4930 ring->mqd_ptr = NULL;
4932 amdgpu_bo_unreserve(ring->mqd_obj);
4937 gfx_v8_0_set_mec_doorbell_range(adev);
4939 r = gfx_v8_0_kiq_kcq_enable(adev);
4944 ring = &adev->gfx.kiq.ring;
4946 r = amdgpu_ring_test_ring(ring);
4948 ring->ready = false;
4953 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4954 ring = &adev->gfx.compute_ring[i];
4956 r = amdgpu_ring_test_ring(ring);
4958 ring->ready = false;
4965 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4969 if (!(adev->flags & AMD_IS_APU))
4970 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4972 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4973 /* legacy firmware loading */
4974 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4978 r = gfx_v8_0_cp_compute_load_microcode(adev);
4983 r = gfx_v8_0_cp_gfx_resume(adev);
4987 r = gfx_v8_0_kiq_resume(adev);
4991 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4996 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4998 gfx_v8_0_cp_gfx_enable(adev, enable);
4999 gfx_v8_0_cp_compute_enable(adev, enable);
5002 static int gfx_v8_0_hw_init(void *handle)
5005 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5007 gfx_v8_0_init_golden_registers(adev);
5008 gfx_v8_0_gpu_init(adev);
5010 r = gfx_v8_0_rlc_resume(adev);
5014 r = gfx_v8_0_cp_resume(adev);
5019 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5021 struct amdgpu_device *adev = kiq_ring->adev;
5022 uint32_t scratch, tmp = 0;
5025 r = amdgpu_gfx_scratch_get(adev, &scratch);
5027 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5030 WREG32(scratch, 0xCAFEDEAD);
5032 r = amdgpu_ring_alloc(kiq_ring, 10);
5034 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5035 amdgpu_gfx_scratch_free(adev, scratch);
5040 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5041 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5042 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5043 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5044 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5045 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5046 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5047 amdgpu_ring_write(kiq_ring, 0);
5048 amdgpu_ring_write(kiq_ring, 0);
5049 amdgpu_ring_write(kiq_ring, 0);
5050 /* write to scratch for completion */
5051 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5052 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5053 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5054 amdgpu_ring_commit(kiq_ring);
5056 for (i = 0; i < adev->usec_timeout; i++) {
5057 tmp = RREG32(scratch);
5058 if (tmp == 0xDEADBEEF)
5062 if (i >= adev->usec_timeout) {
5063 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5066 amdgpu_gfx_scratch_free(adev, scratch);
5070 static int gfx_v8_0_hw_fini(void *handle)
5072 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5075 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5076 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5078 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
5080 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
5082 /* disable KCQ to avoid CPC touch memory not valid anymore */
5083 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5084 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5086 if (amdgpu_sriov_vf(adev)) {
5087 pr_debug("For SRIOV client, shouldn't do anything.\n");
5090 gfx_v8_0_cp_enable(adev, false);
5091 gfx_v8_0_rlc_stop(adev);
5093 amdgpu_device_ip_set_powergating_state(adev,
5094 AMD_IP_BLOCK_TYPE_GFX,
5095 AMD_PG_STATE_UNGATE);
5100 static int gfx_v8_0_suspend(void *handle)
5102 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5103 adev->gfx.in_suspend = true;
5104 return gfx_v8_0_hw_fini(adev);
5107 static int gfx_v8_0_resume(void *handle)
5110 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5112 r = gfx_v8_0_hw_init(adev);
5113 adev->gfx.in_suspend = false;
5117 static bool gfx_v8_0_is_idle(void *handle)
5119 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5121 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5127 static int gfx_v8_0_wait_for_idle(void *handle)
5130 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5132 for (i = 0; i < adev->usec_timeout; i++) {
5133 if (gfx_v8_0_is_idle(handle))
5141 static bool gfx_v8_0_check_soft_reset(void *handle)
5143 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5144 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5148 tmp = RREG32(mmGRBM_STATUS);
5149 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5150 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5151 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5152 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5153 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5154 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5155 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5156 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5157 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5158 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5159 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5160 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5161 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5165 tmp = RREG32(mmGRBM_STATUS2);
5166 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5167 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5168 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5170 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5171 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5172 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5173 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5175 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5177 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5179 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5180 SOFT_RESET_GRBM, 1);
5184 tmp = RREG32(mmSRBM_STATUS);
5185 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5186 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5187 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5188 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5189 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5190 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5192 if (grbm_soft_reset || srbm_soft_reset) {
5193 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5194 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5197 adev->gfx.grbm_soft_reset = 0;
5198 adev->gfx.srbm_soft_reset = 0;
5203 static int gfx_v8_0_pre_soft_reset(void *handle)
5205 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5206 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5208 if ((!adev->gfx.grbm_soft_reset) &&
5209 (!adev->gfx.srbm_soft_reset))
5212 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5213 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5216 gfx_v8_0_rlc_stop(adev);
5218 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5219 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5220 /* Disable GFX parsing/prefetching */
5221 gfx_v8_0_cp_gfx_enable(adev, false);
5223 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5224 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5225 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5226 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5229 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5230 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5232 mutex_lock(&adev->srbm_mutex);
5233 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5234 gfx_v8_0_deactivate_hqd(adev, 2);
5235 vi_srbm_select(adev, 0, 0, 0, 0);
5236 mutex_unlock(&adev->srbm_mutex);
5238 /* Disable MEC parsing/prefetching */
5239 gfx_v8_0_cp_compute_enable(adev, false);
5245 static int gfx_v8_0_soft_reset(void *handle)
5247 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5248 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5251 if ((!adev->gfx.grbm_soft_reset) &&
5252 (!adev->gfx.srbm_soft_reset))
5255 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5256 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5258 if (grbm_soft_reset || srbm_soft_reset) {
5259 tmp = RREG32(mmGMCON_DEBUG);
5260 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5261 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5262 WREG32(mmGMCON_DEBUG, tmp);
5266 if (grbm_soft_reset) {
5267 tmp = RREG32(mmGRBM_SOFT_RESET);
5268 tmp |= grbm_soft_reset;
5269 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5270 WREG32(mmGRBM_SOFT_RESET, tmp);
5271 tmp = RREG32(mmGRBM_SOFT_RESET);
5275 tmp &= ~grbm_soft_reset;
5276 WREG32(mmGRBM_SOFT_RESET, tmp);
5277 tmp = RREG32(mmGRBM_SOFT_RESET);
5280 if (srbm_soft_reset) {
5281 tmp = RREG32(mmSRBM_SOFT_RESET);
5282 tmp |= srbm_soft_reset;
5283 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5284 WREG32(mmSRBM_SOFT_RESET, tmp);
5285 tmp = RREG32(mmSRBM_SOFT_RESET);
5289 tmp &= ~srbm_soft_reset;
5290 WREG32(mmSRBM_SOFT_RESET, tmp);
5291 tmp = RREG32(mmSRBM_SOFT_RESET);
5294 if (grbm_soft_reset || srbm_soft_reset) {
5295 tmp = RREG32(mmGMCON_DEBUG);
5296 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5297 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5298 WREG32(mmGMCON_DEBUG, tmp);
5301 /* Wait a little for things to settle down */
5307 static int gfx_v8_0_post_soft_reset(void *handle)
5309 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5310 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5312 if ((!adev->gfx.grbm_soft_reset) &&
5313 (!adev->gfx.srbm_soft_reset))
5316 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5317 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5319 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5320 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5321 gfx_v8_0_cp_gfx_resume(adev);
5323 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5324 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5325 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5326 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5329 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5330 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5332 mutex_lock(&adev->srbm_mutex);
5333 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5334 gfx_v8_0_deactivate_hqd(adev, 2);
5335 vi_srbm_select(adev, 0, 0, 0, 0);
5336 mutex_unlock(&adev->srbm_mutex);
5338 gfx_v8_0_kiq_resume(adev);
5340 gfx_v8_0_rlc_start(adev);
5346 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5348 * @adev: amdgpu_device pointer
5350 * Fetches a GPU clock counter snapshot.
5351 * Returns the 64 bit clock counter snapshot.
5353 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5357 mutex_lock(&adev->gfx.gpu_clock_mutex);
5358 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5359 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5360 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5361 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5365 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5367 uint32_t gds_base, uint32_t gds_size,
5368 uint32_t gws_base, uint32_t gws_size,
5369 uint32_t oa_base, uint32_t oa_size)
5371 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5372 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5374 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5375 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5377 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5378 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5381 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5382 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5383 WRITE_DATA_DST_SEL(0)));
5384 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5385 amdgpu_ring_write(ring, 0);
5386 amdgpu_ring_write(ring, gds_base);
5389 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5390 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5391 WRITE_DATA_DST_SEL(0)));
5392 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5393 amdgpu_ring_write(ring, 0);
5394 amdgpu_ring_write(ring, gds_size);
5397 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5398 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5399 WRITE_DATA_DST_SEL(0)));
5400 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5401 amdgpu_ring_write(ring, 0);
5402 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5405 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5406 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5407 WRITE_DATA_DST_SEL(0)));
5408 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5409 amdgpu_ring_write(ring, 0);
5410 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5413 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5415 WREG32(mmSQ_IND_INDEX,
5416 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5417 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5418 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5419 (SQ_IND_INDEX__FORCE_READ_MASK));
5420 return RREG32(mmSQ_IND_DATA);
5423 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5424 uint32_t wave, uint32_t thread,
5425 uint32_t regno, uint32_t num, uint32_t *out)
5427 WREG32(mmSQ_IND_INDEX,
5428 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5429 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5430 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5431 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5432 (SQ_IND_INDEX__FORCE_READ_MASK) |
5433 (SQ_IND_INDEX__AUTO_INCR_MASK));
5435 *(out++) = RREG32(mmSQ_IND_DATA);
5438 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5440 /* type 0 wave data */
5441 dst[(*no_fields)++] = 0;
5442 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5443 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5444 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5445 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5446 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5447 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5448 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5449 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5450 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5451 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5452 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5453 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5454 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5455 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5456 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5457 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5458 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5459 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5462 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5463 uint32_t wave, uint32_t start,
5464 uint32_t size, uint32_t *dst)
5467 adev, simd, wave, 0,
5468 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5472 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5473 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5474 .select_se_sh = &gfx_v8_0_select_se_sh,
5475 .read_wave_data = &gfx_v8_0_read_wave_data,
5476 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5477 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5480 static int gfx_v8_0_early_init(void *handle)
5482 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5484 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5485 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5486 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5487 gfx_v8_0_set_ring_funcs(adev);
5488 gfx_v8_0_set_irq_funcs(adev);
5489 gfx_v8_0_set_gds_init(adev);
5490 gfx_v8_0_set_rlc_funcs(adev);
5495 static int gfx_v8_0_late_init(void *handle)
5497 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5500 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5504 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5508 /* requires IBs so do in late init after IB pool is initialized */
5509 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5513 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5515 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5519 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5522 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5530 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5533 if (((adev->asic_type == CHIP_POLARIS11) ||
5534 (adev->asic_type == CHIP_POLARIS12) ||
5535 (adev->asic_type == CHIP_VEGAM)) &&
5536 adev->powerplay.pp_funcs->set_powergating_by_smu)
5537 /* Send msg to SMU via Powerplay */
5538 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5540 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5543 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5546 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5549 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5552 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5555 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5558 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5561 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5564 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5566 /* Read any GFX register to wake up GFX. */
5568 RREG32(mmDB_RENDER_CONTROL);
5571 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5574 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5575 cz_enable_gfx_cg_power_gating(adev, true);
5576 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5577 cz_enable_gfx_pipeline_power_gating(adev, true);
5579 cz_enable_gfx_cg_power_gating(adev, false);
5580 cz_enable_gfx_pipeline_power_gating(adev, false);
5584 static int gfx_v8_0_set_powergating_state(void *handle,
5585 enum amd_powergating_state state)
5587 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5588 bool enable = (state == AMD_PG_STATE_GATE);
5590 if (amdgpu_sriov_vf(adev))
5593 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5594 AMD_PG_SUPPORT_RLC_SMU_HS |
5596 AMD_PG_SUPPORT_GFX_DMG))
5597 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5598 switch (adev->asic_type) {
5602 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5603 cz_enable_sck_slow_down_on_power_up(adev, true);
5604 cz_enable_sck_slow_down_on_power_down(adev, true);
5606 cz_enable_sck_slow_down_on_power_up(adev, false);
5607 cz_enable_sck_slow_down_on_power_down(adev, false);
5609 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5610 cz_enable_cp_power_gating(adev, true);
5612 cz_enable_cp_power_gating(adev, false);
5614 cz_update_gfx_cg_power_gating(adev, enable);
5616 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5617 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5619 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5621 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5622 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5624 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5626 case CHIP_POLARIS11:
5627 case CHIP_POLARIS12:
5629 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5630 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5632 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5634 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5635 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5637 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5639 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5640 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5642 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5647 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5648 AMD_PG_SUPPORT_RLC_SMU_HS |
5650 AMD_PG_SUPPORT_GFX_DMG))
5651 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5655 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5657 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5660 if (amdgpu_sriov_vf(adev))
5663 /* AMD_CG_SUPPORT_GFX_MGCG */
5664 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5665 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5666 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5668 /* AMD_CG_SUPPORT_GFX_CGLG */
5669 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5670 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5671 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5673 /* AMD_CG_SUPPORT_GFX_CGLS */
5674 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5675 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5677 /* AMD_CG_SUPPORT_GFX_CGTS */
5678 data = RREG32(mmCGTS_SM_CTRL_REG);
5679 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5680 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5682 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5683 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5684 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5686 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5687 data = RREG32(mmRLC_MEM_SLP_CNTL);
5688 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5689 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5691 /* AMD_CG_SUPPORT_GFX_CP_LS */
5692 data = RREG32(mmCP_MEM_SLP_CNTL);
5693 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5694 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5697 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5698 uint32_t reg_addr, uint32_t cmd)
5702 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5704 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5705 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5707 data = RREG32(mmRLC_SERDES_WR_CTRL);
5708 if (adev->asic_type == CHIP_STONEY)
5709 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5710 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5711 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5712 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5713 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5714 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5715 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5716 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5717 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5719 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5720 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5721 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5722 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5723 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5724 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5725 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5726 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5727 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5728 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5729 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5730 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5731 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5732 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5733 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5735 WREG32(mmRLC_SERDES_WR_CTRL, data);
5738 #define MSG_ENTER_RLC_SAFE_MODE 1
5739 #define MSG_EXIT_RLC_SAFE_MODE 0
5740 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5741 #define RLC_GPR_REG2__REQ__SHIFT 0
5742 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5743 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5745 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5750 data = RREG32(mmRLC_CNTL);
5751 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5754 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5755 data |= RLC_SAFE_MODE__CMD_MASK;
5756 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5757 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5758 WREG32(mmRLC_SAFE_MODE, data);
5760 for (i = 0; i < adev->usec_timeout; i++) {
5761 if ((RREG32(mmRLC_GPM_STAT) &
5762 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5763 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5764 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5765 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5770 for (i = 0; i < adev->usec_timeout; i++) {
5771 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5775 adev->gfx.rlc.in_safe_mode = true;
5779 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5784 data = RREG32(mmRLC_CNTL);
5785 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5788 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5789 if (adev->gfx.rlc.in_safe_mode) {
5790 data |= RLC_SAFE_MODE__CMD_MASK;
5791 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5792 WREG32(mmRLC_SAFE_MODE, data);
5793 adev->gfx.rlc.in_safe_mode = false;
5797 for (i = 0; i < adev->usec_timeout; i++) {
5798 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5804 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5805 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5806 .exit_safe_mode = iceland_exit_rlc_safe_mode
5809 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5812 uint32_t temp, data;
5814 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5816 /* It is disabled by HW by default */
5817 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5818 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5819 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5820 /* 1 - RLC memory Light sleep */
5821 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5823 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5824 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5827 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5828 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5829 if (adev->flags & AMD_IS_APU)
5830 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5831 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5832 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5834 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5835 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5836 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5837 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5840 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5842 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5843 gfx_v8_0_wait_for_rlc_serdes(adev);
5845 /* 5 - clear mgcg override */
5846 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5848 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5849 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5850 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5851 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5852 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5853 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5854 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5855 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5856 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5857 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5858 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5859 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5861 WREG32(mmCGTS_SM_CTRL_REG, data);
5865 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5866 gfx_v8_0_wait_for_rlc_serdes(adev);
5868 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5869 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5870 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5871 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5872 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5873 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5875 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5877 /* 2 - disable MGLS in RLC */
5878 data = RREG32(mmRLC_MEM_SLP_CNTL);
5879 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5880 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5881 WREG32(mmRLC_MEM_SLP_CNTL, data);
5884 /* 3 - disable MGLS in CP */
5885 data = RREG32(mmCP_MEM_SLP_CNTL);
5886 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5887 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5888 WREG32(mmCP_MEM_SLP_CNTL, data);
5891 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5892 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5893 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5894 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5896 WREG32(mmCGTS_SM_CTRL_REG, data);
5898 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5899 gfx_v8_0_wait_for_rlc_serdes(adev);
5901 /* 6 - set mgcg override */
5902 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5906 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5907 gfx_v8_0_wait_for_rlc_serdes(adev);
5910 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5913 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5916 uint32_t temp, temp1, data, data1;
5918 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5920 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5922 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5923 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5924 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5926 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5928 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5929 gfx_v8_0_wait_for_rlc_serdes(adev);
5931 /* 2 - clear cgcg override */
5932 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5934 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5935 gfx_v8_0_wait_for_rlc_serdes(adev);
5937 /* 3 - write cmd to set CGLS */
5938 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5940 /* 4 - enable cgcg */
5941 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5943 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5945 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5947 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5948 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5951 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5953 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5957 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5959 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5960 * Cmp_busy/GFX_Idle interrupts
5962 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5964 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5965 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5968 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5969 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5970 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5972 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5974 /* read gfx register to wake up cgcg */
5975 RREG32(mmCB_CGTT_SCLK_CTRL);
5976 RREG32(mmCB_CGTT_SCLK_CTRL);
5977 RREG32(mmCB_CGTT_SCLK_CTRL);
5978 RREG32(mmCB_CGTT_SCLK_CTRL);
5980 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5981 gfx_v8_0_wait_for_rlc_serdes(adev);
5983 /* write cmd to Set CGCG Overrride */
5984 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5986 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5987 gfx_v8_0_wait_for_rlc_serdes(adev);
5989 /* write cmd to Clear CGLS */
5990 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5992 /* disable cgcg, cgls should be disabled too. */
5993 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5994 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5996 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5997 /* enable interrupts again for PG */
5998 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6001 gfx_v8_0_wait_for_rlc_serdes(adev);
6003 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6005 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6009 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6010 * === MGCG + MGLS + TS(CG/LS) ===
6012 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6013 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6015 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6016 * === CGCG + CGLS ===
6018 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6019 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6024 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6025 enum amd_clockgating_state state)
6027 uint32_t msg_id, pp_state = 0;
6028 uint32_t pp_support_state = 0;
6030 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6031 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6032 pp_support_state = PP_STATE_SUPPORT_LS;
6033 pp_state = PP_STATE_LS;
6035 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6036 pp_support_state |= PP_STATE_SUPPORT_CG;
6037 pp_state |= PP_STATE_CG;
6039 if (state == AMD_CG_STATE_UNGATE)
6042 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6046 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6047 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6050 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6051 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6052 pp_support_state = PP_STATE_SUPPORT_LS;
6053 pp_state = PP_STATE_LS;
6056 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6057 pp_support_state |= PP_STATE_SUPPORT_CG;
6058 pp_state |= PP_STATE_CG;
6061 if (state == AMD_CG_STATE_UNGATE)
6064 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6068 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6069 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6075 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6076 enum amd_clockgating_state state)
6079 uint32_t msg_id, pp_state = 0;
6080 uint32_t pp_support_state = 0;
6082 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6083 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6084 pp_support_state = PP_STATE_SUPPORT_LS;
6085 pp_state = PP_STATE_LS;
6087 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6088 pp_support_state |= PP_STATE_SUPPORT_CG;
6089 pp_state |= PP_STATE_CG;
6091 if (state == AMD_CG_STATE_UNGATE)
6094 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6098 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6099 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6102 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6103 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6104 pp_support_state = PP_STATE_SUPPORT_LS;
6105 pp_state = PP_STATE_LS;
6107 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6108 pp_support_state |= PP_STATE_SUPPORT_CG;
6109 pp_state |= PP_STATE_CG;
6111 if (state == AMD_CG_STATE_UNGATE)
6114 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6118 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6119 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6122 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6123 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6124 pp_support_state = PP_STATE_SUPPORT_LS;
6125 pp_state = PP_STATE_LS;
6128 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6129 pp_support_state |= PP_STATE_SUPPORT_CG;
6130 pp_state |= PP_STATE_CG;
6133 if (state == AMD_CG_STATE_UNGATE)
6136 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6140 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6141 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6144 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6145 pp_support_state = PP_STATE_SUPPORT_LS;
6147 if (state == AMD_CG_STATE_UNGATE)
6150 pp_state = PP_STATE_LS;
6152 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6156 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6157 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6160 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6161 pp_support_state = PP_STATE_SUPPORT_LS;
6163 if (state == AMD_CG_STATE_UNGATE)
6166 pp_state = PP_STATE_LS;
6167 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6171 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6172 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6178 static int gfx_v8_0_set_clockgating_state(void *handle,
6179 enum amd_clockgating_state state)
6181 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6183 if (amdgpu_sriov_vf(adev))
6186 switch (adev->asic_type) {
6190 gfx_v8_0_update_gfx_clock_gating(adev,
6191 state == AMD_CG_STATE_GATE);
6194 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6196 case CHIP_POLARIS10:
6197 case CHIP_POLARIS11:
6198 case CHIP_POLARIS12:
6200 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6208 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6210 return ring->adev->wb.wb[ring->rptr_offs];
6213 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6215 struct amdgpu_device *adev = ring->adev;
6217 if (ring->use_doorbell)
6218 /* XXX check if swapping is necessary on BE */
6219 return ring->adev->wb.wb[ring->wptr_offs];
6221 return RREG32(mmCP_RB0_WPTR);
6224 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6226 struct amdgpu_device *adev = ring->adev;
6228 if (ring->use_doorbell) {
6229 /* XXX check if swapping is necessary on BE */
6230 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6231 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6233 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6234 (void)RREG32(mmCP_RB0_WPTR);
6238 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6240 u32 ref_and_mask, reg_mem_engine;
6242 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6243 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6246 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6249 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6256 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6257 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6260 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6261 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6262 WAIT_REG_MEM_FUNCTION(3) | /* == */
6264 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6265 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6266 amdgpu_ring_write(ring, ref_and_mask);
6267 amdgpu_ring_write(ring, ref_and_mask);
6268 amdgpu_ring_write(ring, 0x20); /* poll interval */
6271 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6273 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6274 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6277 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6278 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6282 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6283 struct amdgpu_ib *ib,
6284 unsigned vmid, bool ctx_switch)
6286 u32 header, control = 0;
6288 if (ib->flags & AMDGPU_IB_FLAG_CE)
6289 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6291 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6293 control |= ib->length_dw | (vmid << 24);
6295 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6296 control |= INDIRECT_BUFFER_PRE_ENB(1);
6298 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6299 gfx_v8_0_ring_emit_de_meta(ring);
6302 amdgpu_ring_write(ring, header);
6303 amdgpu_ring_write(ring,
6307 (ib->gpu_addr & 0xFFFFFFFC));
6308 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6309 amdgpu_ring_write(ring, control);
6312 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6313 struct amdgpu_ib *ib,
6314 unsigned vmid, bool ctx_switch)
6316 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6318 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6319 amdgpu_ring_write(ring,
6323 (ib->gpu_addr & 0xFFFFFFFC));
6324 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6325 amdgpu_ring_write(ring, control);
6328 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6329 u64 seq, unsigned flags)
6331 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6332 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6334 /* Workaround for cache flush problems. First send a dummy EOP
6335 * event down the pipe with seq one below.
6337 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6338 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6340 EOP_TC_WB_ACTION_EN |
6341 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6343 amdgpu_ring_write(ring, addr & 0xfffffffc);
6344 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6345 DATA_SEL(1) | INT_SEL(0));
6346 amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6347 amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6349 /* Then send the real EOP event down the pipe:
6350 * EVENT_WRITE_EOP - flush caches, send int */
6351 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6352 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6354 EOP_TC_WB_ACTION_EN |
6355 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6357 amdgpu_ring_write(ring, addr & 0xfffffffc);
6358 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6359 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6360 amdgpu_ring_write(ring, lower_32_bits(seq));
6361 amdgpu_ring_write(ring, upper_32_bits(seq));
6365 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6367 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6368 uint32_t seq = ring->fence_drv.sync_seq;
6369 uint64_t addr = ring->fence_drv.gpu_addr;
6371 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6372 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6373 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6374 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6375 amdgpu_ring_write(ring, addr & 0xfffffffc);
6376 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6377 amdgpu_ring_write(ring, seq);
6378 amdgpu_ring_write(ring, 0xffffffff);
6379 amdgpu_ring_write(ring, 4); /* poll interval */
6382 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6383 unsigned vmid, uint64_t pd_addr)
6385 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6387 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6389 /* wait for the invalidate to complete */
6390 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6391 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6392 WAIT_REG_MEM_FUNCTION(0) | /* always */
6393 WAIT_REG_MEM_ENGINE(0))); /* me */
6394 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6395 amdgpu_ring_write(ring, 0);
6396 amdgpu_ring_write(ring, 0); /* ref */
6397 amdgpu_ring_write(ring, 0); /* mask */
6398 amdgpu_ring_write(ring, 0x20); /* poll interval */
6400 /* compute doesn't have PFP */
6402 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6403 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6404 amdgpu_ring_write(ring, 0x0);
6408 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6410 return ring->adev->wb.wb[ring->wptr_offs];
6413 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6415 struct amdgpu_device *adev = ring->adev;
6417 /* XXX check if swapping is necessary on BE */
6418 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6419 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6422 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6425 struct amdgpu_device *adev = ring->adev;
6426 int pipe_num, tmp, reg;
6427 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6429 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6431 /* first me only has 2 entries, GFX and HP3D */
6435 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6437 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6441 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6442 struct amdgpu_ring *ring,
6447 struct amdgpu_ring *iring;
6449 mutex_lock(&adev->gfx.pipe_reserve_mutex);
6450 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6452 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6454 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6456 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6457 /* Clear all reservations - everyone reacquires all resources */
6458 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6459 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6462 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6463 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6466 /* Lower all pipes without a current reservation */
6467 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6468 iring = &adev->gfx.gfx_ring[i];
6469 pipe = amdgpu_gfx_queue_to_bit(adev,
6473 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6474 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6477 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6478 iring = &adev->gfx.compute_ring[i];
6479 pipe = amdgpu_gfx_queue_to_bit(adev,
6483 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6484 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6488 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6491 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6492 struct amdgpu_ring *ring,
6495 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6496 uint32_t queue_priority = acquire ? 0xf : 0x0;
6498 mutex_lock(&adev->srbm_mutex);
6499 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6501 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6502 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6504 vi_srbm_select(adev, 0, 0, 0, 0);
6505 mutex_unlock(&adev->srbm_mutex);
6507 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6508 enum drm_sched_priority priority)
6510 struct amdgpu_device *adev = ring->adev;
6511 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6513 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6516 gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6517 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6520 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6524 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6525 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6527 /* RELEASE_MEM - flush caches, send int */
6528 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6529 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6531 EOP_TC_WB_ACTION_EN |
6532 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6534 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6535 amdgpu_ring_write(ring, addr & 0xfffffffc);
6536 amdgpu_ring_write(ring, upper_32_bits(addr));
6537 amdgpu_ring_write(ring, lower_32_bits(seq));
6538 amdgpu_ring_write(ring, upper_32_bits(seq));
6541 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6542 u64 seq, unsigned int flags)
6544 /* we only allocate 32bit for each seq wb address */
6545 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6547 /* write fence seq to the "addr" */
6548 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6549 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6550 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6551 amdgpu_ring_write(ring, lower_32_bits(addr));
6552 amdgpu_ring_write(ring, upper_32_bits(addr));
6553 amdgpu_ring_write(ring, lower_32_bits(seq));
6555 if (flags & AMDGPU_FENCE_FLAG_INT) {
6556 /* set register to trigger INT */
6557 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6558 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6559 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6560 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6561 amdgpu_ring_write(ring, 0);
6562 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6566 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6568 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6569 amdgpu_ring_write(ring, 0);
6572 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6576 if (amdgpu_sriov_vf(ring->adev))
6577 gfx_v8_0_ring_emit_ce_meta(ring);
6579 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6580 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6581 gfx_v8_0_ring_emit_vgt_flush(ring);
6582 /* set load_global_config & load_global_uconfig */
6584 /* set load_cs_sh_regs */
6586 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6589 /* set load_ce_ram if preamble presented */
6590 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6593 /* still load_ce_ram if this is the first time preamble presented
6594 * although there is no context switch happens.
6596 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6600 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6601 amdgpu_ring_write(ring, dw2);
6602 amdgpu_ring_write(ring, 0);
6605 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6609 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6610 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6611 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6612 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6613 ret = ring->wptr & ring->buf_mask;
6614 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6618 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6622 BUG_ON(offset > ring->buf_mask);
6623 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6625 cur = (ring->wptr & ring->buf_mask) - 1;
6626 if (likely(cur > offset))
6627 ring->ring[offset] = cur - offset;
6629 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6632 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6634 struct amdgpu_device *adev = ring->adev;
6636 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6637 amdgpu_ring_write(ring, 0 | /* src: register*/
6638 (5 << 8) | /* dst: memory */
6639 (1 << 20)); /* write confirm */
6640 amdgpu_ring_write(ring, reg);
6641 amdgpu_ring_write(ring, 0);
6642 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6643 adev->virt.reg_val_offs * 4));
6644 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6645 adev->virt.reg_val_offs * 4));
6648 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6653 switch (ring->funcs->type) {
6654 case AMDGPU_RING_TYPE_GFX:
6655 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6657 case AMDGPU_RING_TYPE_KIQ:
6658 cmd = 1 << 16; /* no inc addr */
6665 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6666 amdgpu_ring_write(ring, cmd);
6667 amdgpu_ring_write(ring, reg);
6668 amdgpu_ring_write(ring, 0);
6669 amdgpu_ring_write(ring, val);
6672 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6673 enum amdgpu_interrupt_state state)
6675 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6676 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6679 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6681 enum amdgpu_interrupt_state state)
6683 u32 mec_int_cntl, mec_int_cntl_reg;
6686 * amdgpu controls only the first MEC. That's why this function only
6687 * handles the setting of interrupts for this specific MEC. All other
6688 * pipes' interrupts are set by amdkfd.
6694 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6697 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6700 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6703 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6706 DRM_DEBUG("invalid pipe %d\n", pipe);
6710 DRM_DEBUG("invalid me %d\n", me);
6715 case AMDGPU_IRQ_STATE_DISABLE:
6716 mec_int_cntl = RREG32(mec_int_cntl_reg);
6717 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6718 WREG32(mec_int_cntl_reg, mec_int_cntl);
6720 case AMDGPU_IRQ_STATE_ENABLE:
6721 mec_int_cntl = RREG32(mec_int_cntl_reg);
6722 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6723 WREG32(mec_int_cntl_reg, mec_int_cntl);
6730 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6731 struct amdgpu_irq_src *source,
6733 enum amdgpu_interrupt_state state)
6735 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6736 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6741 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6742 struct amdgpu_irq_src *source,
6744 enum amdgpu_interrupt_state state)
6746 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6747 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6752 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6753 struct amdgpu_irq_src *src,
6755 enum amdgpu_interrupt_state state)
6758 case AMDGPU_CP_IRQ_GFX_EOP:
6759 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6761 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6762 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6764 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6765 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6767 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6768 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6770 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6771 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6773 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6774 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6776 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6777 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6779 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6780 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6782 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6783 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6791 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6792 struct amdgpu_irq_src *source,
6794 enum amdgpu_interrupt_state state)
6799 case AMDGPU_IRQ_STATE_DISABLE:
6803 case AMDGPU_IRQ_STATE_ENABLE:
6811 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6812 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6813 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6814 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6815 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6816 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6818 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6820 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6822 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6824 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6826 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6828 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6830 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6836 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6837 struct amdgpu_irq_src *source,
6839 enum amdgpu_interrupt_state state)
6844 case AMDGPU_IRQ_STATE_DISABLE:
6848 case AMDGPU_IRQ_STATE_ENABLE:
6856 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6862 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6863 struct amdgpu_irq_src *source,
6864 struct amdgpu_iv_entry *entry)
6867 u8 me_id, pipe_id, queue_id;
6868 struct amdgpu_ring *ring;
6870 DRM_DEBUG("IH: CP EOP\n");
6871 me_id = (entry->ring_id & 0x0c) >> 2;
6872 pipe_id = (entry->ring_id & 0x03) >> 0;
6873 queue_id = (entry->ring_id & 0x70) >> 4;
6877 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6881 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6882 ring = &adev->gfx.compute_ring[i];
6883 /* Per-queue interrupt is supported for MEC starting from VI.
6884 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6886 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6887 amdgpu_fence_process(ring);
6894 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6895 struct amdgpu_irq_src *source,
6896 struct amdgpu_iv_entry *entry)
6898 DRM_ERROR("Illegal register access in command stream\n");
6899 schedule_work(&adev->reset_work);
6903 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6904 struct amdgpu_irq_src *source,
6905 struct amdgpu_iv_entry *entry)
6907 DRM_ERROR("Illegal instruction in command stream\n");
6908 schedule_work(&adev->reset_work);
6912 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6913 struct amdgpu_irq_src *source,
6914 struct amdgpu_iv_entry *entry)
6916 DRM_ERROR("CP EDC/ECC error detected.");
6920 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6922 u32 enc, se_id, sh_id, cu_id;
6924 int sq_edc_source = -1;
6926 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6927 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6931 DRM_INFO("SQ general purpose intr detected:"
6932 "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6933 "host_cmd_overflow %d, cmd_timestamp %d,"
6934 "reg_timestamp %d, thread_trace_buff_full %d,"
6935 "wlt %d, thread_trace %d.\n",
6937 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6938 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6939 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6940 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6941 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6942 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6943 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6944 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6950 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6951 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6954 * This function can be called either directly from ISR
6955 * or from BH in which case we can access SQ_EDC_INFO
6959 mutex_lock(&adev->grbm_idx_mutex);
6960 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6962 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6964 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6965 mutex_unlock(&adev->grbm_idx_mutex);
6969 sprintf(type, "instruction intr");
6971 sprintf(type, "EDC/ECC error");
6975 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6976 "trap %s, sq_ed_info.source %s.\n",
6977 type, se_id, sh_id, cu_id,
6978 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6979 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6980 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6981 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6982 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6986 DRM_ERROR("SQ invalid encoding type\n.");
6990 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6993 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6994 struct sq_work *sq_work = container_of(work, struct sq_work, work);
6996 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6999 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
7000 struct amdgpu_irq_src *source,
7001 struct amdgpu_iv_entry *entry)
7003 unsigned ih_data = entry->src_data[0];
7006 * Try to submit work so SQ_EDC_INFO can be accessed from
7007 * BH. If previous work submission hasn't finished yet
7008 * just print whatever info is possible directly from the ISR.
7010 if (work_pending(&adev->gfx.sq_work.work)) {
7011 gfx_v8_0_parse_sq_irq(adev, ih_data);
7013 adev->gfx.sq_work.ih_data = ih_data;
7014 schedule_work(&adev->gfx.sq_work.work);
7020 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
7021 struct amdgpu_irq_src *src,
7023 enum amdgpu_interrupt_state state)
7025 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7028 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
7029 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
7030 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7032 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
7034 GENERIC2_INT_ENABLE,
7035 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7037 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
7039 GENERIC2_INT_ENABLE,
7040 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
7043 BUG(); /* kiq only support GENERIC2_INT now */
7049 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
7050 struct amdgpu_irq_src *source,
7051 struct amdgpu_iv_entry *entry)
7053 u8 me_id, pipe_id, queue_id;
7054 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
7056 me_id = (entry->ring_id & 0x0c) >> 2;
7057 pipe_id = (entry->ring_id & 0x03) >> 0;
7058 queue_id = (entry->ring_id & 0x70) >> 4;
7059 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
7060 me_id, pipe_id, queue_id);
7062 amdgpu_fence_process(ring);
7066 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
7068 .early_init = gfx_v8_0_early_init,
7069 .late_init = gfx_v8_0_late_init,
7070 .sw_init = gfx_v8_0_sw_init,
7071 .sw_fini = gfx_v8_0_sw_fini,
7072 .hw_init = gfx_v8_0_hw_init,
7073 .hw_fini = gfx_v8_0_hw_fini,
7074 .suspend = gfx_v8_0_suspend,
7075 .resume = gfx_v8_0_resume,
7076 .is_idle = gfx_v8_0_is_idle,
7077 .wait_for_idle = gfx_v8_0_wait_for_idle,
7078 .check_soft_reset = gfx_v8_0_check_soft_reset,
7079 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
7080 .soft_reset = gfx_v8_0_soft_reset,
7081 .post_soft_reset = gfx_v8_0_post_soft_reset,
7082 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
7083 .set_powergating_state = gfx_v8_0_set_powergating_state,
7084 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
7087 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7088 .type = AMDGPU_RING_TYPE_GFX,
7090 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7091 .support_64bit_ptrs = false,
7092 .get_rptr = gfx_v8_0_ring_get_rptr,
7093 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7094 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7095 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
7097 7 + /* PIPELINE_SYNC */
7098 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
7099 12 + /* FENCE for VM_FLUSH */
7100 20 + /* GDS switch */
7101 4 + /* double SWITCH_BUFFER,
7102 the first COND_EXEC jump to the place just
7103 prior to this double SWITCH_BUFFER */
7111 12 + 12 + /* FENCE x2 */
7112 2, /* SWITCH_BUFFER */
7113 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
7114 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7115 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7116 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7117 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7118 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7119 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7120 .test_ring = gfx_v8_0_ring_test_ring,
7121 .test_ib = gfx_v8_0_ring_test_ib,
7122 .insert_nop = amdgpu_ring_insert_nop,
7123 .pad_ib = amdgpu_ring_generic_pad_ib,
7124 .emit_switch_buffer = gfx_v8_ring_emit_sb,
7125 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7126 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7127 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
7128 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7131 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7132 .type = AMDGPU_RING_TYPE_COMPUTE,
7134 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7135 .support_64bit_ptrs = false,
7136 .get_rptr = gfx_v8_0_ring_get_rptr,
7137 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7138 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7140 20 + /* gfx_v8_0_ring_emit_gds_switch */
7141 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7142 5 + /* hdp_invalidate */
7143 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7144 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7145 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7146 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7147 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7148 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
7149 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7150 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7151 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7152 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7153 .test_ring = gfx_v8_0_ring_test_ring,
7154 .test_ib = gfx_v8_0_ring_test_ib,
7155 .insert_nop = amdgpu_ring_insert_nop,
7156 .pad_ib = amdgpu_ring_generic_pad_ib,
7157 .set_priority = gfx_v8_0_ring_set_priority_compute,
7158 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7161 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7162 .type = AMDGPU_RING_TYPE_KIQ,
7164 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7165 .support_64bit_ptrs = false,
7166 .get_rptr = gfx_v8_0_ring_get_rptr,
7167 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
7168 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
7170 20 + /* gfx_v8_0_ring_emit_gds_switch */
7171 7 + /* gfx_v8_0_ring_emit_hdp_flush */
7172 5 + /* hdp_invalidate */
7173 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7174 17 + /* gfx_v8_0_ring_emit_vm_flush */
7175 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7176 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
7177 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
7178 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7179 .test_ring = gfx_v8_0_ring_test_ring,
7180 .test_ib = gfx_v8_0_ring_test_ib,
7181 .insert_nop = amdgpu_ring_insert_nop,
7182 .pad_ib = amdgpu_ring_generic_pad_ib,
7183 .emit_rreg = gfx_v8_0_ring_emit_rreg,
7184 .emit_wreg = gfx_v8_0_ring_emit_wreg,
7187 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7191 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7193 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7194 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7196 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7197 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7200 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7201 .set = gfx_v8_0_set_eop_interrupt_state,
7202 .process = gfx_v8_0_eop_irq,
7205 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7206 .set = gfx_v8_0_set_priv_reg_fault_state,
7207 .process = gfx_v8_0_priv_reg_irq,
7210 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7211 .set = gfx_v8_0_set_priv_inst_fault_state,
7212 .process = gfx_v8_0_priv_inst_irq,
7215 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7216 .set = gfx_v8_0_kiq_set_interrupt_state,
7217 .process = gfx_v8_0_kiq_irq,
7220 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7221 .set = gfx_v8_0_set_cp_ecc_int_state,
7222 .process = gfx_v8_0_cp_ecc_error_irq,
7225 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7226 .set = gfx_v8_0_set_sq_int_state,
7227 .process = gfx_v8_0_sq_irq,
7230 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7232 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7233 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7235 adev->gfx.priv_reg_irq.num_types = 1;
7236 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7238 adev->gfx.priv_inst_irq.num_types = 1;
7239 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7241 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7242 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7244 adev->gfx.cp_ecc_error_irq.num_types = 1;
7245 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7247 adev->gfx.sq_irq.num_types = 1;
7248 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7251 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7253 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7256 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7258 /* init asci gds info */
7259 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7260 adev->gds.gws.total_size = 64;
7261 adev->gds.oa.total_size = 16;
7263 if (adev->gds.mem.total_size == 64 * 1024) {
7264 adev->gds.mem.gfx_partition_size = 4096;
7265 adev->gds.mem.cs_partition_size = 4096;
7267 adev->gds.gws.gfx_partition_size = 4;
7268 adev->gds.gws.cs_partition_size = 4;
7270 adev->gds.oa.gfx_partition_size = 4;
7271 adev->gds.oa.cs_partition_size = 1;
7273 adev->gds.mem.gfx_partition_size = 1024;
7274 adev->gds.mem.cs_partition_size = 1024;
7276 adev->gds.gws.gfx_partition_size = 16;
7277 adev->gds.gws.cs_partition_size = 16;
7279 adev->gds.oa.gfx_partition_size = 4;
7280 adev->gds.oa.cs_partition_size = 4;
7284 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7292 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7293 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7295 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7298 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7302 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7303 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7305 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7307 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7310 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7312 int i, j, k, counter, active_cu_number = 0;
7313 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7314 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7315 unsigned disable_masks[4 * 2];
7318 memset(cu_info, 0, sizeof(*cu_info));
7320 if (adev->flags & AMD_IS_APU)
7323 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7325 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7327 mutex_lock(&adev->grbm_idx_mutex);
7328 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7329 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7333 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7335 gfx_v8_0_set_user_cu_inactive_bitmap(
7336 adev, disable_masks[i * 2 + j]);
7337 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7338 cu_info->bitmap[i][j] = bitmap;
7340 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7341 if (bitmap & mask) {
7342 if (counter < ao_cu_num)
7348 active_cu_number += counter;
7350 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7351 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7354 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7355 mutex_unlock(&adev->grbm_idx_mutex);
7357 cu_info->number = active_cu_number;
7358 cu_info->ao_cu_mask = ao_cu_mask;
7359 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7360 cu_info->max_waves_per_simd = 10;
7361 cu_info->max_scratch_slots_per_cu = 32;
7362 cu_info->wave_front_size = 64;
7363 cu_info->lds_size = 64;
7366 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7368 .type = AMD_IP_BLOCK_TYPE_GFX,
7372 .funcs = &gfx_v8_0_ip_funcs,
7375 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7377 .type = AMD_IP_BLOCK_TYPE_GFX,
7381 .funcs = &gfx_v8_0_ip_funcs,
7384 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7386 uint64_t ce_payload_addr;
7389 struct vi_ce_ib_state regular;
7390 struct vi_ce_ib_state_chained_ib chained;
7393 if (ring->adev->virt.chained_ib_support) {
7394 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7395 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7396 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7398 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7399 offsetof(struct vi_gfx_meta_data, ce_payload);
7400 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7403 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7404 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7405 WRITE_DATA_DST_SEL(8) |
7407 WRITE_DATA_CACHE_POLICY(0));
7408 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7409 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7410 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7413 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7415 uint64_t de_payload_addr, gds_addr, csa_addr;
7418 struct vi_de_ib_state regular;
7419 struct vi_de_ib_state_chained_ib chained;
7422 csa_addr = amdgpu_csa_vaddr(ring->adev);
7423 gds_addr = csa_addr + 4096;
7424 if (ring->adev->virt.chained_ib_support) {
7425 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7426 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7427 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7428 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7430 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7431 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7432 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7433 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7436 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7437 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7438 WRITE_DATA_DST_SEL(8) |
7440 WRITE_DATA_CACHE_POLICY(0));
7441 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7442 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7443 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);