2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
38 #define SH_MEM_CONFIG_GFX_DEFAULT \
39 ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
43 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
44 extern void r600_ih_ring_fini(struct radeon_device *rdev);
45 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
46 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
47 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
48 extern void sumo_rlc_fini(struct radeon_device *rdev);
49 extern int sumo_rlc_init(struct radeon_device *rdev);
50 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
51 extern void si_rlc_reset(struct radeon_device *rdev);
52 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
53 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
54 extern int cik_sdma_resume(struct radeon_device *rdev);
55 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
56 extern void cik_sdma_fini(struct radeon_device *rdev);
57 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
58 static void cik_rlc_stop(struct radeon_device *rdev);
59 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
60 static void cik_program_aspm(struct radeon_device *rdev);
61 static void cik_init_pg(struct radeon_device *rdev);
62 static void cik_init_cg(struct radeon_device *rdev);
63 static void cik_fini_pg(struct radeon_device *rdev);
64 static void cik_fini_cg(struct radeon_device *rdev);
65 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
69 * cik_get_allowed_info_register - fetch the register for the info ioctl
71 * @rdev: radeon_device pointer
72 * @reg: register offset in bytes
73 * @val: register value
75 * Returns 0 for success or -EINVAL for an invalid register
78 int cik_get_allowed_info_register(struct radeon_device *rdev,
90 case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
91 case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
102 * Indirect registers accessor
104 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
109 spin_lock_irqsave(&rdev->didt_idx_lock, flags);
110 WREG32(CIK_DIDT_IND_INDEX, (reg));
111 r = RREG32(CIK_DIDT_IND_DATA);
112 spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
116 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
120 spin_lock_irqsave(&rdev->didt_idx_lock, flags);
121 WREG32(CIK_DIDT_IND_INDEX, (reg));
122 WREG32(CIK_DIDT_IND_DATA, (v));
123 spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
126 /* get temperature in millidegrees */
127 int ci_get_temp(struct radeon_device *rdev)
132 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
138 actual_temp = temp & 0x1ff;
140 actual_temp = actual_temp * 1000;
145 /* get temperature in millidegrees */
146 int kv_get_temp(struct radeon_device *rdev)
151 temp = RREG32_SMC(0xC0300E0C);
154 actual_temp = (temp / 8) - 49;
158 actual_temp = actual_temp * 1000;
164 * Indirect registers accessor
166 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
171 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
172 WREG32(PCIE_INDEX, reg);
173 (void)RREG32(PCIE_INDEX);
174 r = RREG32(PCIE_DATA);
175 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
179 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
183 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
184 WREG32(PCIE_INDEX, reg);
185 (void)RREG32(PCIE_INDEX);
186 WREG32(PCIE_DATA, v);
187 (void)RREG32(PCIE_DATA);
188 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
191 static const u32 spectre_rlc_save_restore_register_list[] =
193 (0x0e00 << 16) | (0xc12c >> 2),
195 (0x0e00 << 16) | (0xc140 >> 2),
197 (0x0e00 << 16) | (0xc150 >> 2),
199 (0x0e00 << 16) | (0xc15c >> 2),
201 (0x0e00 << 16) | (0xc168 >> 2),
203 (0x0e00 << 16) | (0xc170 >> 2),
205 (0x0e00 << 16) | (0xc178 >> 2),
207 (0x0e00 << 16) | (0xc204 >> 2),
209 (0x0e00 << 16) | (0xc2b4 >> 2),
211 (0x0e00 << 16) | (0xc2b8 >> 2),
213 (0x0e00 << 16) | (0xc2bc >> 2),
215 (0x0e00 << 16) | (0xc2c0 >> 2),
217 (0x0e00 << 16) | (0x8228 >> 2),
219 (0x0e00 << 16) | (0x829c >> 2),
221 (0x0e00 << 16) | (0x869c >> 2),
223 (0x0600 << 16) | (0x98f4 >> 2),
225 (0x0e00 << 16) | (0x98f8 >> 2),
227 (0x0e00 << 16) | (0x9900 >> 2),
229 (0x0e00 << 16) | (0xc260 >> 2),
231 (0x0e00 << 16) | (0x90e8 >> 2),
233 (0x0e00 << 16) | (0x3c000 >> 2),
235 (0x0e00 << 16) | (0x3c00c >> 2),
237 (0x0e00 << 16) | (0x8c1c >> 2),
239 (0x0e00 << 16) | (0x9700 >> 2),
241 (0x0e00 << 16) | (0xcd20 >> 2),
243 (0x4e00 << 16) | (0xcd20 >> 2),
245 (0x5e00 << 16) | (0xcd20 >> 2),
247 (0x6e00 << 16) | (0xcd20 >> 2),
249 (0x7e00 << 16) | (0xcd20 >> 2),
251 (0x8e00 << 16) | (0xcd20 >> 2),
253 (0x9e00 << 16) | (0xcd20 >> 2),
255 (0xae00 << 16) | (0xcd20 >> 2),
257 (0xbe00 << 16) | (0xcd20 >> 2),
259 (0x0e00 << 16) | (0x89bc >> 2),
261 (0x0e00 << 16) | (0x8900 >> 2),
264 (0x0e00 << 16) | (0xc130 >> 2),
266 (0x0e00 << 16) | (0xc134 >> 2),
268 (0x0e00 << 16) | (0xc1fc >> 2),
270 (0x0e00 << 16) | (0xc208 >> 2),
272 (0x0e00 << 16) | (0xc264 >> 2),
274 (0x0e00 << 16) | (0xc268 >> 2),
276 (0x0e00 << 16) | (0xc26c >> 2),
278 (0x0e00 << 16) | (0xc270 >> 2),
280 (0x0e00 << 16) | (0xc274 >> 2),
282 (0x0e00 << 16) | (0xc278 >> 2),
284 (0x0e00 << 16) | (0xc27c >> 2),
286 (0x0e00 << 16) | (0xc280 >> 2),
288 (0x0e00 << 16) | (0xc284 >> 2),
290 (0x0e00 << 16) | (0xc288 >> 2),
292 (0x0e00 << 16) | (0xc28c >> 2),
294 (0x0e00 << 16) | (0xc290 >> 2),
296 (0x0e00 << 16) | (0xc294 >> 2),
298 (0x0e00 << 16) | (0xc298 >> 2),
300 (0x0e00 << 16) | (0xc29c >> 2),
302 (0x0e00 << 16) | (0xc2a0 >> 2),
304 (0x0e00 << 16) | (0xc2a4 >> 2),
306 (0x0e00 << 16) | (0xc2a8 >> 2),
308 (0x0e00 << 16) | (0xc2ac >> 2),
310 (0x0e00 << 16) | (0xc2b0 >> 2),
312 (0x0e00 << 16) | (0x301d0 >> 2),
314 (0x0e00 << 16) | (0x30238 >> 2),
316 (0x0e00 << 16) | (0x30250 >> 2),
318 (0x0e00 << 16) | (0x30254 >> 2),
320 (0x0e00 << 16) | (0x30258 >> 2),
322 (0x0e00 << 16) | (0x3025c >> 2),
324 (0x4e00 << 16) | (0xc900 >> 2),
326 (0x5e00 << 16) | (0xc900 >> 2),
328 (0x6e00 << 16) | (0xc900 >> 2),
330 (0x7e00 << 16) | (0xc900 >> 2),
332 (0x8e00 << 16) | (0xc900 >> 2),
334 (0x9e00 << 16) | (0xc900 >> 2),
336 (0xae00 << 16) | (0xc900 >> 2),
338 (0xbe00 << 16) | (0xc900 >> 2),
340 (0x4e00 << 16) | (0xc904 >> 2),
342 (0x5e00 << 16) | (0xc904 >> 2),
344 (0x6e00 << 16) | (0xc904 >> 2),
346 (0x7e00 << 16) | (0xc904 >> 2),
348 (0x8e00 << 16) | (0xc904 >> 2),
350 (0x9e00 << 16) | (0xc904 >> 2),
352 (0xae00 << 16) | (0xc904 >> 2),
354 (0xbe00 << 16) | (0xc904 >> 2),
356 (0x4e00 << 16) | (0xc908 >> 2),
358 (0x5e00 << 16) | (0xc908 >> 2),
360 (0x6e00 << 16) | (0xc908 >> 2),
362 (0x7e00 << 16) | (0xc908 >> 2),
364 (0x8e00 << 16) | (0xc908 >> 2),
366 (0x9e00 << 16) | (0xc908 >> 2),
368 (0xae00 << 16) | (0xc908 >> 2),
370 (0xbe00 << 16) | (0xc908 >> 2),
372 (0x4e00 << 16) | (0xc90c >> 2),
374 (0x5e00 << 16) | (0xc90c >> 2),
376 (0x6e00 << 16) | (0xc90c >> 2),
378 (0x7e00 << 16) | (0xc90c >> 2),
380 (0x8e00 << 16) | (0xc90c >> 2),
382 (0x9e00 << 16) | (0xc90c >> 2),
384 (0xae00 << 16) | (0xc90c >> 2),
386 (0xbe00 << 16) | (0xc90c >> 2),
388 (0x4e00 << 16) | (0xc910 >> 2),
390 (0x5e00 << 16) | (0xc910 >> 2),
392 (0x6e00 << 16) | (0xc910 >> 2),
394 (0x7e00 << 16) | (0xc910 >> 2),
396 (0x8e00 << 16) | (0xc910 >> 2),
398 (0x9e00 << 16) | (0xc910 >> 2),
400 (0xae00 << 16) | (0xc910 >> 2),
402 (0xbe00 << 16) | (0xc910 >> 2),
404 (0x0e00 << 16) | (0xc99c >> 2),
406 (0x0e00 << 16) | (0x9834 >> 2),
408 (0x0000 << 16) | (0x30f00 >> 2),
410 (0x0001 << 16) | (0x30f00 >> 2),
412 (0x0000 << 16) | (0x30f04 >> 2),
414 (0x0001 << 16) | (0x30f04 >> 2),
416 (0x0000 << 16) | (0x30f08 >> 2),
418 (0x0001 << 16) | (0x30f08 >> 2),
420 (0x0000 << 16) | (0x30f0c >> 2),
422 (0x0001 << 16) | (0x30f0c >> 2),
424 (0x0600 << 16) | (0x9b7c >> 2),
426 (0x0e00 << 16) | (0x8a14 >> 2),
428 (0x0e00 << 16) | (0x8a18 >> 2),
430 (0x0600 << 16) | (0x30a00 >> 2),
432 (0x0e00 << 16) | (0x8bf0 >> 2),
434 (0x0e00 << 16) | (0x8bcc >> 2),
436 (0x0e00 << 16) | (0x8b24 >> 2),
438 (0x0e00 << 16) | (0x30a04 >> 2),
440 (0x0600 << 16) | (0x30a10 >> 2),
442 (0x0600 << 16) | (0x30a14 >> 2),
444 (0x0600 << 16) | (0x30a18 >> 2),
446 (0x0600 << 16) | (0x30a2c >> 2),
448 (0x0e00 << 16) | (0xc700 >> 2),
450 (0x0e00 << 16) | (0xc704 >> 2),
452 (0x0e00 << 16) | (0xc708 >> 2),
454 (0x0e00 << 16) | (0xc768 >> 2),
456 (0x0400 << 16) | (0xc770 >> 2),
458 (0x0400 << 16) | (0xc774 >> 2),
460 (0x0400 << 16) | (0xc778 >> 2),
462 (0x0400 << 16) | (0xc77c >> 2),
464 (0x0400 << 16) | (0xc780 >> 2),
466 (0x0400 << 16) | (0xc784 >> 2),
468 (0x0400 << 16) | (0xc788 >> 2),
470 (0x0400 << 16) | (0xc78c >> 2),
472 (0x0400 << 16) | (0xc798 >> 2),
474 (0x0400 << 16) | (0xc79c >> 2),
476 (0x0400 << 16) | (0xc7a0 >> 2),
478 (0x0400 << 16) | (0xc7a4 >> 2),
480 (0x0400 << 16) | (0xc7a8 >> 2),
482 (0x0400 << 16) | (0xc7ac >> 2),
484 (0x0400 << 16) | (0xc7b0 >> 2),
486 (0x0400 << 16) | (0xc7b4 >> 2),
488 (0x0e00 << 16) | (0x9100 >> 2),
490 (0x0e00 << 16) | (0x3c010 >> 2),
492 (0x0e00 << 16) | (0x92a8 >> 2),
494 (0x0e00 << 16) | (0x92ac >> 2),
496 (0x0e00 << 16) | (0x92b4 >> 2),
498 (0x0e00 << 16) | (0x92b8 >> 2),
500 (0x0e00 << 16) | (0x92bc >> 2),
502 (0x0e00 << 16) | (0x92c0 >> 2),
504 (0x0e00 << 16) | (0x92c4 >> 2),
506 (0x0e00 << 16) | (0x92c8 >> 2),
508 (0x0e00 << 16) | (0x92cc >> 2),
510 (0x0e00 << 16) | (0x92d0 >> 2),
512 (0x0e00 << 16) | (0x8c00 >> 2),
514 (0x0e00 << 16) | (0x8c04 >> 2),
516 (0x0e00 << 16) | (0x8c20 >> 2),
518 (0x0e00 << 16) | (0x8c38 >> 2),
520 (0x0e00 << 16) | (0x8c3c >> 2),
522 (0x0e00 << 16) | (0xae00 >> 2),
524 (0x0e00 << 16) | (0x9604 >> 2),
526 (0x0e00 << 16) | (0xac08 >> 2),
528 (0x0e00 << 16) | (0xac0c >> 2),
530 (0x0e00 << 16) | (0xac10 >> 2),
532 (0x0e00 << 16) | (0xac14 >> 2),
534 (0x0e00 << 16) | (0xac58 >> 2),
536 (0x0e00 << 16) | (0xac68 >> 2),
538 (0x0e00 << 16) | (0xac6c >> 2),
540 (0x0e00 << 16) | (0xac70 >> 2),
542 (0x0e00 << 16) | (0xac74 >> 2),
544 (0x0e00 << 16) | (0xac78 >> 2),
546 (0x0e00 << 16) | (0xac7c >> 2),
548 (0x0e00 << 16) | (0xac80 >> 2),
550 (0x0e00 << 16) | (0xac84 >> 2),
552 (0x0e00 << 16) | (0xac88 >> 2),
554 (0x0e00 << 16) | (0xac8c >> 2),
556 (0x0e00 << 16) | (0x970c >> 2),
558 (0x0e00 << 16) | (0x9714 >> 2),
560 (0x0e00 << 16) | (0x9718 >> 2),
562 (0x0e00 << 16) | (0x971c >> 2),
564 (0x0e00 << 16) | (0x31068 >> 2),
566 (0x4e00 << 16) | (0x31068 >> 2),
568 (0x5e00 << 16) | (0x31068 >> 2),
570 (0x6e00 << 16) | (0x31068 >> 2),
572 (0x7e00 << 16) | (0x31068 >> 2),
574 (0x8e00 << 16) | (0x31068 >> 2),
576 (0x9e00 << 16) | (0x31068 >> 2),
578 (0xae00 << 16) | (0x31068 >> 2),
580 (0xbe00 << 16) | (0x31068 >> 2),
582 (0x0e00 << 16) | (0xcd10 >> 2),
584 (0x0e00 << 16) | (0xcd14 >> 2),
586 (0x0e00 << 16) | (0x88b0 >> 2),
588 (0x0e00 << 16) | (0x88b4 >> 2),
590 (0x0e00 << 16) | (0x88b8 >> 2),
592 (0x0e00 << 16) | (0x88bc >> 2),
594 (0x0400 << 16) | (0x89c0 >> 2),
596 (0x0e00 << 16) | (0x88c4 >> 2),
598 (0x0e00 << 16) | (0x88c8 >> 2),
600 (0x0e00 << 16) | (0x88d0 >> 2),
602 (0x0e00 << 16) | (0x88d4 >> 2),
604 (0x0e00 << 16) | (0x88d8 >> 2),
606 (0x0e00 << 16) | (0x8980 >> 2),
608 (0x0e00 << 16) | (0x30938 >> 2),
610 (0x0e00 << 16) | (0x3093c >> 2),
612 (0x0e00 << 16) | (0x30940 >> 2),
614 (0x0e00 << 16) | (0x89a0 >> 2),
616 (0x0e00 << 16) | (0x30900 >> 2),
618 (0x0e00 << 16) | (0x30904 >> 2),
620 (0x0e00 << 16) | (0x89b4 >> 2),
622 (0x0e00 << 16) | (0x3c210 >> 2),
624 (0x0e00 << 16) | (0x3c214 >> 2),
626 (0x0e00 << 16) | (0x3c218 >> 2),
628 (0x0e00 << 16) | (0x8904 >> 2),
631 (0x0e00 << 16) | (0x8c28 >> 2),
632 (0x0e00 << 16) | (0x8c2c >> 2),
633 (0x0e00 << 16) | (0x8c30 >> 2),
634 (0x0e00 << 16) | (0x8c34 >> 2),
635 (0x0e00 << 16) | (0x9600 >> 2),
638 static const u32 kalindi_rlc_save_restore_register_list[] =
640 (0x0e00 << 16) | (0xc12c >> 2),
642 (0x0e00 << 16) | (0xc140 >> 2),
644 (0x0e00 << 16) | (0xc150 >> 2),
646 (0x0e00 << 16) | (0xc15c >> 2),
648 (0x0e00 << 16) | (0xc168 >> 2),
650 (0x0e00 << 16) | (0xc170 >> 2),
652 (0x0e00 << 16) | (0xc204 >> 2),
654 (0x0e00 << 16) | (0xc2b4 >> 2),
656 (0x0e00 << 16) | (0xc2b8 >> 2),
658 (0x0e00 << 16) | (0xc2bc >> 2),
660 (0x0e00 << 16) | (0xc2c0 >> 2),
662 (0x0e00 << 16) | (0x8228 >> 2),
664 (0x0e00 << 16) | (0x829c >> 2),
666 (0x0e00 << 16) | (0x869c >> 2),
668 (0x0600 << 16) | (0x98f4 >> 2),
670 (0x0e00 << 16) | (0x98f8 >> 2),
672 (0x0e00 << 16) | (0x9900 >> 2),
674 (0x0e00 << 16) | (0xc260 >> 2),
676 (0x0e00 << 16) | (0x90e8 >> 2),
678 (0x0e00 << 16) | (0x3c000 >> 2),
680 (0x0e00 << 16) | (0x3c00c >> 2),
682 (0x0e00 << 16) | (0x8c1c >> 2),
684 (0x0e00 << 16) | (0x9700 >> 2),
686 (0x0e00 << 16) | (0xcd20 >> 2),
688 (0x4e00 << 16) | (0xcd20 >> 2),
690 (0x5e00 << 16) | (0xcd20 >> 2),
692 (0x6e00 << 16) | (0xcd20 >> 2),
694 (0x7e00 << 16) | (0xcd20 >> 2),
696 (0x0e00 << 16) | (0x89bc >> 2),
698 (0x0e00 << 16) | (0x8900 >> 2),
701 (0x0e00 << 16) | (0xc130 >> 2),
703 (0x0e00 << 16) | (0xc134 >> 2),
705 (0x0e00 << 16) | (0xc1fc >> 2),
707 (0x0e00 << 16) | (0xc208 >> 2),
709 (0x0e00 << 16) | (0xc264 >> 2),
711 (0x0e00 << 16) | (0xc268 >> 2),
713 (0x0e00 << 16) | (0xc26c >> 2),
715 (0x0e00 << 16) | (0xc270 >> 2),
717 (0x0e00 << 16) | (0xc274 >> 2),
719 (0x0e00 << 16) | (0xc28c >> 2),
721 (0x0e00 << 16) | (0xc290 >> 2),
723 (0x0e00 << 16) | (0xc294 >> 2),
725 (0x0e00 << 16) | (0xc298 >> 2),
727 (0x0e00 << 16) | (0xc2a0 >> 2),
729 (0x0e00 << 16) | (0xc2a4 >> 2),
731 (0x0e00 << 16) | (0xc2a8 >> 2),
733 (0x0e00 << 16) | (0xc2ac >> 2),
735 (0x0e00 << 16) | (0x301d0 >> 2),
737 (0x0e00 << 16) | (0x30238 >> 2),
739 (0x0e00 << 16) | (0x30250 >> 2),
741 (0x0e00 << 16) | (0x30254 >> 2),
743 (0x0e00 << 16) | (0x30258 >> 2),
745 (0x0e00 << 16) | (0x3025c >> 2),
747 (0x4e00 << 16) | (0xc900 >> 2),
749 (0x5e00 << 16) | (0xc900 >> 2),
751 (0x6e00 << 16) | (0xc900 >> 2),
753 (0x7e00 << 16) | (0xc900 >> 2),
755 (0x4e00 << 16) | (0xc904 >> 2),
757 (0x5e00 << 16) | (0xc904 >> 2),
759 (0x6e00 << 16) | (0xc904 >> 2),
761 (0x7e00 << 16) | (0xc904 >> 2),
763 (0x4e00 << 16) | (0xc908 >> 2),
765 (0x5e00 << 16) | (0xc908 >> 2),
767 (0x6e00 << 16) | (0xc908 >> 2),
769 (0x7e00 << 16) | (0xc908 >> 2),
771 (0x4e00 << 16) | (0xc90c >> 2),
773 (0x5e00 << 16) | (0xc90c >> 2),
775 (0x6e00 << 16) | (0xc90c >> 2),
777 (0x7e00 << 16) | (0xc90c >> 2),
779 (0x4e00 << 16) | (0xc910 >> 2),
781 (0x5e00 << 16) | (0xc910 >> 2),
783 (0x6e00 << 16) | (0xc910 >> 2),
785 (0x7e00 << 16) | (0xc910 >> 2),
787 (0x0e00 << 16) | (0xc99c >> 2),
789 (0x0e00 << 16) | (0x9834 >> 2),
791 (0x0000 << 16) | (0x30f00 >> 2),
793 (0x0000 << 16) | (0x30f04 >> 2),
795 (0x0000 << 16) | (0x30f08 >> 2),
797 (0x0000 << 16) | (0x30f0c >> 2),
799 (0x0600 << 16) | (0x9b7c >> 2),
801 (0x0e00 << 16) | (0x8a14 >> 2),
803 (0x0e00 << 16) | (0x8a18 >> 2),
805 (0x0600 << 16) | (0x30a00 >> 2),
807 (0x0e00 << 16) | (0x8bf0 >> 2),
809 (0x0e00 << 16) | (0x8bcc >> 2),
811 (0x0e00 << 16) | (0x8b24 >> 2),
813 (0x0e00 << 16) | (0x30a04 >> 2),
815 (0x0600 << 16) | (0x30a10 >> 2),
817 (0x0600 << 16) | (0x30a14 >> 2),
819 (0x0600 << 16) | (0x30a18 >> 2),
821 (0x0600 << 16) | (0x30a2c >> 2),
823 (0x0e00 << 16) | (0xc700 >> 2),
825 (0x0e00 << 16) | (0xc704 >> 2),
827 (0x0e00 << 16) | (0xc708 >> 2),
829 (0x0e00 << 16) | (0xc768 >> 2),
831 (0x0400 << 16) | (0xc770 >> 2),
833 (0x0400 << 16) | (0xc774 >> 2),
835 (0x0400 << 16) | (0xc798 >> 2),
837 (0x0400 << 16) | (0xc79c >> 2),
839 (0x0e00 << 16) | (0x9100 >> 2),
841 (0x0e00 << 16) | (0x3c010 >> 2),
843 (0x0e00 << 16) | (0x8c00 >> 2),
845 (0x0e00 << 16) | (0x8c04 >> 2),
847 (0x0e00 << 16) | (0x8c20 >> 2),
849 (0x0e00 << 16) | (0x8c38 >> 2),
851 (0x0e00 << 16) | (0x8c3c >> 2),
853 (0x0e00 << 16) | (0xae00 >> 2),
855 (0x0e00 << 16) | (0x9604 >> 2),
857 (0x0e00 << 16) | (0xac08 >> 2),
859 (0x0e00 << 16) | (0xac0c >> 2),
861 (0x0e00 << 16) | (0xac10 >> 2),
863 (0x0e00 << 16) | (0xac14 >> 2),
865 (0x0e00 << 16) | (0xac58 >> 2),
867 (0x0e00 << 16) | (0xac68 >> 2),
869 (0x0e00 << 16) | (0xac6c >> 2),
871 (0x0e00 << 16) | (0xac70 >> 2),
873 (0x0e00 << 16) | (0xac74 >> 2),
875 (0x0e00 << 16) | (0xac78 >> 2),
877 (0x0e00 << 16) | (0xac7c >> 2),
879 (0x0e00 << 16) | (0xac80 >> 2),
881 (0x0e00 << 16) | (0xac84 >> 2),
883 (0x0e00 << 16) | (0xac88 >> 2),
885 (0x0e00 << 16) | (0xac8c >> 2),
887 (0x0e00 << 16) | (0x970c >> 2),
889 (0x0e00 << 16) | (0x9714 >> 2),
891 (0x0e00 << 16) | (0x9718 >> 2),
893 (0x0e00 << 16) | (0x971c >> 2),
895 (0x0e00 << 16) | (0x31068 >> 2),
897 (0x4e00 << 16) | (0x31068 >> 2),
899 (0x5e00 << 16) | (0x31068 >> 2),
901 (0x6e00 << 16) | (0x31068 >> 2),
903 (0x7e00 << 16) | (0x31068 >> 2),
905 (0x0e00 << 16) | (0xcd10 >> 2),
907 (0x0e00 << 16) | (0xcd14 >> 2),
909 (0x0e00 << 16) | (0x88b0 >> 2),
911 (0x0e00 << 16) | (0x88b4 >> 2),
913 (0x0e00 << 16) | (0x88b8 >> 2),
915 (0x0e00 << 16) | (0x88bc >> 2),
917 (0x0400 << 16) | (0x89c0 >> 2),
919 (0x0e00 << 16) | (0x88c4 >> 2),
921 (0x0e00 << 16) | (0x88c8 >> 2),
923 (0x0e00 << 16) | (0x88d0 >> 2),
925 (0x0e00 << 16) | (0x88d4 >> 2),
927 (0x0e00 << 16) | (0x88d8 >> 2),
929 (0x0e00 << 16) | (0x8980 >> 2),
931 (0x0e00 << 16) | (0x30938 >> 2),
933 (0x0e00 << 16) | (0x3093c >> 2),
935 (0x0e00 << 16) | (0x30940 >> 2),
937 (0x0e00 << 16) | (0x89a0 >> 2),
939 (0x0e00 << 16) | (0x30900 >> 2),
941 (0x0e00 << 16) | (0x30904 >> 2),
943 (0x0e00 << 16) | (0x89b4 >> 2),
945 (0x0e00 << 16) | (0x3e1fc >> 2),
947 (0x0e00 << 16) | (0x3c210 >> 2),
949 (0x0e00 << 16) | (0x3c214 >> 2),
951 (0x0e00 << 16) | (0x3c218 >> 2),
953 (0x0e00 << 16) | (0x8904 >> 2),
956 (0x0e00 << 16) | (0x8c28 >> 2),
957 (0x0e00 << 16) | (0x8c2c >> 2),
958 (0x0e00 << 16) | (0x8c30 >> 2),
959 (0x0e00 << 16) | (0x8c34 >> 2),
960 (0x0e00 << 16) | (0x9600 >> 2),
963 static const u32 bonaire_golden_spm_registers[] =
965 0x30800, 0xe0ffffff, 0xe0000000
968 static const u32 bonaire_golden_common_registers[] =
970 0xc770, 0xffffffff, 0x00000800,
971 0xc774, 0xffffffff, 0x00000800,
972 0xc798, 0xffffffff, 0x00007fbf,
973 0xc79c, 0xffffffff, 0x00007faf
976 static const u32 bonaire_golden_registers[] =
978 0x3354, 0x00000333, 0x00000333,
979 0x3350, 0x000c0fc0, 0x00040200,
980 0x9a10, 0x00010000, 0x00058208,
981 0x3c000, 0xffff1fff, 0x00140000,
982 0x3c200, 0xfdfc0fff, 0x00000100,
983 0x3c234, 0x40000000, 0x40000200,
984 0x9830, 0xffffffff, 0x00000000,
985 0x9834, 0xf00fffff, 0x00000400,
986 0x9838, 0x0002021c, 0x00020200,
987 0xc78, 0x00000080, 0x00000000,
988 0x5bb0, 0x000000f0, 0x00000070,
989 0x5bc0, 0xf0311fff, 0x80300000,
990 0x98f8, 0x73773777, 0x12010001,
991 0x350c, 0x00810000, 0x408af000,
992 0x7030, 0x31000111, 0x00000011,
993 0x2f48, 0x73773777, 0x12010001,
994 0x220c, 0x00007fb6, 0x0021a1b1,
995 0x2210, 0x00007fb6, 0x002021b1,
996 0x2180, 0x00007fb6, 0x00002191,
997 0x2218, 0x00007fb6, 0x002121b1,
998 0x221c, 0x00007fb6, 0x002021b1,
999 0x21dc, 0x00007fb6, 0x00002191,
1000 0x21e0, 0x00007fb6, 0x00002191,
1001 0x3628, 0x0000003f, 0x0000000a,
1002 0x362c, 0x0000003f, 0x0000000a,
1003 0x2ae4, 0x00073ffe, 0x000022a2,
1004 0x240c, 0x000007ff, 0x00000000,
1005 0x8a14, 0xf000003f, 0x00000007,
1006 0x8bf0, 0x00002001, 0x00000001,
1007 0x8b24, 0xffffffff, 0x00ffffff,
1008 0x30a04, 0x0000ff0f, 0x00000000,
1009 0x28a4c, 0x07ffffff, 0x06000000,
1010 0x4d8, 0x00000fff, 0x00000100,
1011 0x3e78, 0x00000001, 0x00000002,
1012 0x9100, 0x03000000, 0x0362c688,
1013 0x8c00, 0x000000ff, 0x00000001,
1014 0xe40, 0x00001fff, 0x00001fff,
1015 0x9060, 0x0000007f, 0x00000020,
1016 0x9508, 0x00010000, 0x00010000,
1017 0xac14, 0x000003ff, 0x000000f3,
1018 0xac0c, 0xffffffff, 0x00001032
1021 static const u32 bonaire_mgcg_cgcg_init[] =
1023 0xc420, 0xffffffff, 0xfffffffc,
1024 0x30800, 0xffffffff, 0xe0000000,
1025 0x3c2a0, 0xffffffff, 0x00000100,
1026 0x3c208, 0xffffffff, 0x00000100,
1027 0x3c2c0, 0xffffffff, 0xc0000100,
1028 0x3c2c8, 0xffffffff, 0xc0000100,
1029 0x3c2c4, 0xffffffff, 0xc0000100,
1030 0x55e4, 0xffffffff, 0x00600100,
1031 0x3c280, 0xffffffff, 0x00000100,
1032 0x3c214, 0xffffffff, 0x06000100,
1033 0x3c220, 0xffffffff, 0x00000100,
1034 0x3c218, 0xffffffff, 0x06000100,
1035 0x3c204, 0xffffffff, 0x00000100,
1036 0x3c2e0, 0xffffffff, 0x00000100,
1037 0x3c224, 0xffffffff, 0x00000100,
1038 0x3c200, 0xffffffff, 0x00000100,
1039 0x3c230, 0xffffffff, 0x00000100,
1040 0x3c234, 0xffffffff, 0x00000100,
1041 0x3c250, 0xffffffff, 0x00000100,
1042 0x3c254, 0xffffffff, 0x00000100,
1043 0x3c258, 0xffffffff, 0x00000100,
1044 0x3c25c, 0xffffffff, 0x00000100,
1045 0x3c260, 0xffffffff, 0x00000100,
1046 0x3c27c, 0xffffffff, 0x00000100,
1047 0x3c278, 0xffffffff, 0x00000100,
1048 0x3c210, 0xffffffff, 0x06000100,
1049 0x3c290, 0xffffffff, 0x00000100,
1050 0x3c274, 0xffffffff, 0x00000100,
1051 0x3c2b4, 0xffffffff, 0x00000100,
1052 0x3c2b0, 0xffffffff, 0x00000100,
1053 0x3c270, 0xffffffff, 0x00000100,
1054 0x30800, 0xffffffff, 0xe0000000,
1055 0x3c020, 0xffffffff, 0x00010000,
1056 0x3c024, 0xffffffff, 0x00030002,
1057 0x3c028, 0xffffffff, 0x00040007,
1058 0x3c02c, 0xffffffff, 0x00060005,
1059 0x3c030, 0xffffffff, 0x00090008,
1060 0x3c034, 0xffffffff, 0x00010000,
1061 0x3c038, 0xffffffff, 0x00030002,
1062 0x3c03c, 0xffffffff, 0x00040007,
1063 0x3c040, 0xffffffff, 0x00060005,
1064 0x3c044, 0xffffffff, 0x00090008,
1065 0x3c048, 0xffffffff, 0x00010000,
1066 0x3c04c, 0xffffffff, 0x00030002,
1067 0x3c050, 0xffffffff, 0x00040007,
1068 0x3c054, 0xffffffff, 0x00060005,
1069 0x3c058, 0xffffffff, 0x00090008,
1070 0x3c05c, 0xffffffff, 0x00010000,
1071 0x3c060, 0xffffffff, 0x00030002,
1072 0x3c064, 0xffffffff, 0x00040007,
1073 0x3c068, 0xffffffff, 0x00060005,
1074 0x3c06c, 0xffffffff, 0x00090008,
1075 0x3c070, 0xffffffff, 0x00010000,
1076 0x3c074, 0xffffffff, 0x00030002,
1077 0x3c078, 0xffffffff, 0x00040007,
1078 0x3c07c, 0xffffffff, 0x00060005,
1079 0x3c080, 0xffffffff, 0x00090008,
1080 0x3c084, 0xffffffff, 0x00010000,
1081 0x3c088, 0xffffffff, 0x00030002,
1082 0x3c08c, 0xffffffff, 0x00040007,
1083 0x3c090, 0xffffffff, 0x00060005,
1084 0x3c094, 0xffffffff, 0x00090008,
1085 0x3c098, 0xffffffff, 0x00010000,
1086 0x3c09c, 0xffffffff, 0x00030002,
1087 0x3c0a0, 0xffffffff, 0x00040007,
1088 0x3c0a4, 0xffffffff, 0x00060005,
1089 0x3c0a8, 0xffffffff, 0x00090008,
1090 0x3c000, 0xffffffff, 0x96e00200,
1091 0x8708, 0xffffffff, 0x00900100,
1092 0xc424, 0xffffffff, 0x0020003f,
1093 0x38, 0xffffffff, 0x0140001c,
1094 0x3c, 0x000f0000, 0x000f0000,
1095 0x220, 0xffffffff, 0xC060000C,
1096 0x224, 0xc0000fff, 0x00000100,
1097 0xf90, 0xffffffff, 0x00000100,
1098 0xf98, 0x00000101, 0x00000000,
1099 0x20a8, 0xffffffff, 0x00000104,
1100 0x55e4, 0xff000fff, 0x00000100,
1101 0x30cc, 0xc0000fff, 0x00000104,
1102 0xc1e4, 0x00000001, 0x00000001,
1103 0xd00c, 0xff000ff0, 0x00000100,
1104 0xd80c, 0xff000ff0, 0x00000100
1107 static const u32 spectre_golden_spm_registers[] =
1109 0x30800, 0xe0ffffff, 0xe0000000
1112 static const u32 spectre_golden_common_registers[] =
1114 0xc770, 0xffffffff, 0x00000800,
1115 0xc774, 0xffffffff, 0x00000800,
1116 0xc798, 0xffffffff, 0x00007fbf,
1117 0xc79c, 0xffffffff, 0x00007faf
1120 static const u32 spectre_golden_registers[] =
1122 0x3c000, 0xffff1fff, 0x96940200,
1123 0x3c00c, 0xffff0001, 0xff000000,
1124 0x3c200, 0xfffc0fff, 0x00000100,
1125 0x6ed8, 0x00010101, 0x00010000,
1126 0x9834, 0xf00fffff, 0x00000400,
1127 0x9838, 0xfffffffc, 0x00020200,
1128 0x5bb0, 0x000000f0, 0x00000070,
1129 0x5bc0, 0xf0311fff, 0x80300000,
1130 0x98f8, 0x73773777, 0x12010001,
1131 0x9b7c, 0x00ff0000, 0x00fc0000,
1132 0x2f48, 0x73773777, 0x12010001,
1133 0x8a14, 0xf000003f, 0x00000007,
1134 0x8b24, 0xffffffff, 0x00ffffff,
1135 0x28350, 0x3f3f3fff, 0x00000082,
1136 0x28354, 0x0000003f, 0x00000000,
1137 0x3e78, 0x00000001, 0x00000002,
1138 0x913c, 0xffff03df, 0x00000004,
1139 0xc768, 0x00000008, 0x00000008,
1140 0x8c00, 0x000008ff, 0x00000800,
1141 0x9508, 0x00010000, 0x00010000,
1142 0xac0c, 0xffffffff, 0x54763210,
1143 0x214f8, 0x01ff01ff, 0x00000002,
1144 0x21498, 0x007ff800, 0x00200000,
1145 0x2015c, 0xffffffff, 0x00000f40,
1146 0x30934, 0xffffffff, 0x00000001
1149 static const u32 spectre_mgcg_cgcg_init[] =
1151 0xc420, 0xffffffff, 0xfffffffc,
1152 0x30800, 0xffffffff, 0xe0000000,
1153 0x3c2a0, 0xffffffff, 0x00000100,
1154 0x3c208, 0xffffffff, 0x00000100,
1155 0x3c2c0, 0xffffffff, 0x00000100,
1156 0x3c2c8, 0xffffffff, 0x00000100,
1157 0x3c2c4, 0xffffffff, 0x00000100,
1158 0x55e4, 0xffffffff, 0x00600100,
1159 0x3c280, 0xffffffff, 0x00000100,
1160 0x3c214, 0xffffffff, 0x06000100,
1161 0x3c220, 0xffffffff, 0x00000100,
1162 0x3c218, 0xffffffff, 0x06000100,
1163 0x3c204, 0xffffffff, 0x00000100,
1164 0x3c2e0, 0xffffffff, 0x00000100,
1165 0x3c224, 0xffffffff, 0x00000100,
1166 0x3c200, 0xffffffff, 0x00000100,
1167 0x3c230, 0xffffffff, 0x00000100,
1168 0x3c234, 0xffffffff, 0x00000100,
1169 0x3c250, 0xffffffff, 0x00000100,
1170 0x3c254, 0xffffffff, 0x00000100,
1171 0x3c258, 0xffffffff, 0x00000100,
1172 0x3c25c, 0xffffffff, 0x00000100,
1173 0x3c260, 0xffffffff, 0x00000100,
1174 0x3c27c, 0xffffffff, 0x00000100,
1175 0x3c278, 0xffffffff, 0x00000100,
1176 0x3c210, 0xffffffff, 0x06000100,
1177 0x3c290, 0xffffffff, 0x00000100,
1178 0x3c274, 0xffffffff, 0x00000100,
1179 0x3c2b4, 0xffffffff, 0x00000100,
1180 0x3c2b0, 0xffffffff, 0x00000100,
1181 0x3c270, 0xffffffff, 0x00000100,
1182 0x30800, 0xffffffff, 0xe0000000,
1183 0x3c020, 0xffffffff, 0x00010000,
1184 0x3c024, 0xffffffff, 0x00030002,
1185 0x3c028, 0xffffffff, 0x00040007,
1186 0x3c02c, 0xffffffff, 0x00060005,
1187 0x3c030, 0xffffffff, 0x00090008,
1188 0x3c034, 0xffffffff, 0x00010000,
1189 0x3c038, 0xffffffff, 0x00030002,
1190 0x3c03c, 0xffffffff, 0x00040007,
1191 0x3c040, 0xffffffff, 0x00060005,
1192 0x3c044, 0xffffffff, 0x00090008,
1193 0x3c048, 0xffffffff, 0x00010000,
1194 0x3c04c, 0xffffffff, 0x00030002,
1195 0x3c050, 0xffffffff, 0x00040007,
1196 0x3c054, 0xffffffff, 0x00060005,
1197 0x3c058, 0xffffffff, 0x00090008,
1198 0x3c05c, 0xffffffff, 0x00010000,
1199 0x3c060, 0xffffffff, 0x00030002,
1200 0x3c064, 0xffffffff, 0x00040007,
1201 0x3c068, 0xffffffff, 0x00060005,
1202 0x3c06c, 0xffffffff, 0x00090008,
1203 0x3c070, 0xffffffff, 0x00010000,
1204 0x3c074, 0xffffffff, 0x00030002,
1205 0x3c078, 0xffffffff, 0x00040007,
1206 0x3c07c, 0xffffffff, 0x00060005,
1207 0x3c080, 0xffffffff, 0x00090008,
1208 0x3c084, 0xffffffff, 0x00010000,
1209 0x3c088, 0xffffffff, 0x00030002,
1210 0x3c08c, 0xffffffff, 0x00040007,
1211 0x3c090, 0xffffffff, 0x00060005,
1212 0x3c094, 0xffffffff, 0x00090008,
1213 0x3c098, 0xffffffff, 0x00010000,
1214 0x3c09c, 0xffffffff, 0x00030002,
1215 0x3c0a0, 0xffffffff, 0x00040007,
1216 0x3c0a4, 0xffffffff, 0x00060005,
1217 0x3c0a8, 0xffffffff, 0x00090008,
1218 0x3c0ac, 0xffffffff, 0x00010000,
1219 0x3c0b0, 0xffffffff, 0x00030002,
1220 0x3c0b4, 0xffffffff, 0x00040007,
1221 0x3c0b8, 0xffffffff, 0x00060005,
1222 0x3c0bc, 0xffffffff, 0x00090008,
1223 0x3c000, 0xffffffff, 0x96e00200,
1224 0x8708, 0xffffffff, 0x00900100,
1225 0xc424, 0xffffffff, 0x0020003f,
1226 0x38, 0xffffffff, 0x0140001c,
1227 0x3c, 0x000f0000, 0x000f0000,
1228 0x220, 0xffffffff, 0xC060000C,
1229 0x224, 0xc0000fff, 0x00000100,
1230 0xf90, 0xffffffff, 0x00000100,
1231 0xf98, 0x00000101, 0x00000000,
1232 0x20a8, 0xffffffff, 0x00000104,
1233 0x55e4, 0xff000fff, 0x00000100,
1234 0x30cc, 0xc0000fff, 0x00000104,
1235 0xc1e4, 0x00000001, 0x00000001,
1236 0xd00c, 0xff000ff0, 0x00000100,
1237 0xd80c, 0xff000ff0, 0x00000100
1240 static const u32 kalindi_golden_spm_registers[] =
1242 0x30800, 0xe0ffffff, 0xe0000000
1245 static const u32 kalindi_golden_common_registers[] =
1247 0xc770, 0xffffffff, 0x00000800,
1248 0xc774, 0xffffffff, 0x00000800,
1249 0xc798, 0xffffffff, 0x00007fbf,
1250 0xc79c, 0xffffffff, 0x00007faf
1253 static const u32 kalindi_golden_registers[] =
1255 0x3c000, 0xffffdfff, 0x6e944040,
1256 0x55e4, 0xff607fff, 0xfc000100,
1257 0x3c220, 0xff000fff, 0x00000100,
1258 0x3c224, 0xff000fff, 0x00000100,
1259 0x3c200, 0xfffc0fff, 0x00000100,
1260 0x6ed8, 0x00010101, 0x00010000,
1261 0x9830, 0xffffffff, 0x00000000,
1262 0x9834, 0xf00fffff, 0x00000400,
1263 0x5bb0, 0x000000f0, 0x00000070,
1264 0x5bc0, 0xf0311fff, 0x80300000,
1265 0x98f8, 0x73773777, 0x12010001,
1266 0x98fc, 0xffffffff, 0x00000010,
1267 0x9b7c, 0x00ff0000, 0x00fc0000,
1268 0x8030, 0x00001f0f, 0x0000100a,
1269 0x2f48, 0x73773777, 0x12010001,
1270 0x2408, 0x000fffff, 0x000c007f,
1271 0x8a14, 0xf000003f, 0x00000007,
1272 0x8b24, 0x3fff3fff, 0x00ffcfff,
1273 0x30a04, 0x0000ff0f, 0x00000000,
1274 0x28a4c, 0x07ffffff, 0x06000000,
1275 0x4d8, 0x00000fff, 0x00000100,
1276 0x3e78, 0x00000001, 0x00000002,
1277 0xc768, 0x00000008, 0x00000008,
1278 0x8c00, 0x000000ff, 0x00000003,
1279 0x214f8, 0x01ff01ff, 0x00000002,
1280 0x21498, 0x007ff800, 0x00200000,
1281 0x2015c, 0xffffffff, 0x00000f40,
1282 0x88c4, 0x001f3ae3, 0x00000082,
1283 0x88d4, 0x0000001f, 0x00000010,
1284 0x30934, 0xffffffff, 0x00000000
1287 static const u32 kalindi_mgcg_cgcg_init[] =
1289 0xc420, 0xffffffff, 0xfffffffc,
1290 0x30800, 0xffffffff, 0xe0000000,
1291 0x3c2a0, 0xffffffff, 0x00000100,
1292 0x3c208, 0xffffffff, 0x00000100,
1293 0x3c2c0, 0xffffffff, 0x00000100,
1294 0x3c2c8, 0xffffffff, 0x00000100,
1295 0x3c2c4, 0xffffffff, 0x00000100,
1296 0x55e4, 0xffffffff, 0x00600100,
1297 0x3c280, 0xffffffff, 0x00000100,
1298 0x3c214, 0xffffffff, 0x06000100,
1299 0x3c220, 0xffffffff, 0x00000100,
1300 0x3c218, 0xffffffff, 0x06000100,
1301 0x3c204, 0xffffffff, 0x00000100,
1302 0x3c2e0, 0xffffffff, 0x00000100,
1303 0x3c224, 0xffffffff, 0x00000100,
1304 0x3c200, 0xffffffff, 0x00000100,
1305 0x3c230, 0xffffffff, 0x00000100,
1306 0x3c234, 0xffffffff, 0x00000100,
1307 0x3c250, 0xffffffff, 0x00000100,
1308 0x3c254, 0xffffffff, 0x00000100,
1309 0x3c258, 0xffffffff, 0x00000100,
1310 0x3c25c, 0xffffffff, 0x00000100,
1311 0x3c260, 0xffffffff, 0x00000100,
1312 0x3c27c, 0xffffffff, 0x00000100,
1313 0x3c278, 0xffffffff, 0x00000100,
1314 0x3c210, 0xffffffff, 0x06000100,
1315 0x3c290, 0xffffffff, 0x00000100,
1316 0x3c274, 0xffffffff, 0x00000100,
1317 0x3c2b4, 0xffffffff, 0x00000100,
1318 0x3c2b0, 0xffffffff, 0x00000100,
1319 0x3c270, 0xffffffff, 0x00000100,
1320 0x30800, 0xffffffff, 0xe0000000,
1321 0x3c020, 0xffffffff, 0x00010000,
1322 0x3c024, 0xffffffff, 0x00030002,
1323 0x3c028, 0xffffffff, 0x00040007,
1324 0x3c02c, 0xffffffff, 0x00060005,
1325 0x3c030, 0xffffffff, 0x00090008,
1326 0x3c034, 0xffffffff, 0x00010000,
1327 0x3c038, 0xffffffff, 0x00030002,
1328 0x3c03c, 0xffffffff, 0x00040007,
1329 0x3c040, 0xffffffff, 0x00060005,
1330 0x3c044, 0xffffffff, 0x00090008,
1331 0x3c000, 0xffffffff, 0x96e00200,
1332 0x8708, 0xffffffff, 0x00900100,
1333 0xc424, 0xffffffff, 0x0020003f,
1334 0x38, 0xffffffff, 0x0140001c,
1335 0x3c, 0x000f0000, 0x000f0000,
1336 0x220, 0xffffffff, 0xC060000C,
1337 0x224, 0xc0000fff, 0x00000100,
1338 0x20a8, 0xffffffff, 0x00000104,
1339 0x55e4, 0xff000fff, 0x00000100,
1340 0x30cc, 0xc0000fff, 0x00000104,
1341 0xc1e4, 0x00000001, 0x00000001,
1342 0xd00c, 0xff000ff0, 0x00000100,
1343 0xd80c, 0xff000ff0, 0x00000100
1346 static const u32 hawaii_golden_spm_registers[] =
1348 0x30800, 0xe0ffffff, 0xe0000000
1351 static const u32 hawaii_golden_common_registers[] =
1353 0x30800, 0xffffffff, 0xe0000000,
1354 0x28350, 0xffffffff, 0x3a00161a,
1355 0x28354, 0xffffffff, 0x0000002e,
1356 0x9a10, 0xffffffff, 0x00018208,
1357 0x98f8, 0xffffffff, 0x12011003
1360 static const u32 hawaii_golden_registers[] =
1362 0x3354, 0x00000333, 0x00000333,
1363 0x9a10, 0x00010000, 0x00058208,
1364 0x9830, 0xffffffff, 0x00000000,
1365 0x9834, 0xf00fffff, 0x00000400,
1366 0x9838, 0x0002021c, 0x00020200,
1367 0xc78, 0x00000080, 0x00000000,
1368 0x5bb0, 0x000000f0, 0x00000070,
1369 0x5bc0, 0xf0311fff, 0x80300000,
1370 0x350c, 0x00810000, 0x408af000,
1371 0x7030, 0x31000111, 0x00000011,
1372 0x2f48, 0x73773777, 0x12010001,
1373 0x2120, 0x0000007f, 0x0000001b,
1374 0x21dc, 0x00007fb6, 0x00002191,
1375 0x3628, 0x0000003f, 0x0000000a,
1376 0x362c, 0x0000003f, 0x0000000a,
1377 0x2ae4, 0x00073ffe, 0x000022a2,
1378 0x240c, 0x000007ff, 0x00000000,
1379 0x8bf0, 0x00002001, 0x00000001,
1380 0x8b24, 0xffffffff, 0x00ffffff,
1381 0x30a04, 0x0000ff0f, 0x00000000,
1382 0x28a4c, 0x07ffffff, 0x06000000,
1383 0x3e78, 0x00000001, 0x00000002,
1384 0xc768, 0x00000008, 0x00000008,
1385 0xc770, 0x00000f00, 0x00000800,
1386 0xc774, 0x00000f00, 0x00000800,
1387 0xc798, 0x00ffffff, 0x00ff7fbf,
1388 0xc79c, 0x00ffffff, 0x00ff7faf,
1389 0x8c00, 0x000000ff, 0x00000800,
1390 0xe40, 0x00001fff, 0x00001fff,
1391 0x9060, 0x0000007f, 0x00000020,
1392 0x9508, 0x00010000, 0x00010000,
1393 0xae00, 0x00100000, 0x000ff07c,
1394 0xac14, 0x000003ff, 0x0000000f,
1395 0xac10, 0xffffffff, 0x7564fdec,
1396 0xac0c, 0xffffffff, 0x3120b9a8,
1397 0xac08, 0x20000000, 0x0f9c0000
1400 static const u32 hawaii_mgcg_cgcg_init[] =
1402 0xc420, 0xffffffff, 0xfffffffd,
1403 0x30800, 0xffffffff, 0xe0000000,
1404 0x3c2a0, 0xffffffff, 0x00000100,
1405 0x3c208, 0xffffffff, 0x00000100,
1406 0x3c2c0, 0xffffffff, 0x00000100,
1407 0x3c2c8, 0xffffffff, 0x00000100,
1408 0x3c2c4, 0xffffffff, 0x00000100,
1409 0x55e4, 0xffffffff, 0x00200100,
1410 0x3c280, 0xffffffff, 0x00000100,
1411 0x3c214, 0xffffffff, 0x06000100,
1412 0x3c220, 0xffffffff, 0x00000100,
1413 0x3c218, 0xffffffff, 0x06000100,
1414 0x3c204, 0xffffffff, 0x00000100,
1415 0x3c2e0, 0xffffffff, 0x00000100,
1416 0x3c224, 0xffffffff, 0x00000100,
1417 0x3c200, 0xffffffff, 0x00000100,
1418 0x3c230, 0xffffffff, 0x00000100,
1419 0x3c234, 0xffffffff, 0x00000100,
1420 0x3c250, 0xffffffff, 0x00000100,
1421 0x3c254, 0xffffffff, 0x00000100,
1422 0x3c258, 0xffffffff, 0x00000100,
1423 0x3c25c, 0xffffffff, 0x00000100,
1424 0x3c260, 0xffffffff, 0x00000100,
1425 0x3c27c, 0xffffffff, 0x00000100,
1426 0x3c278, 0xffffffff, 0x00000100,
1427 0x3c210, 0xffffffff, 0x06000100,
1428 0x3c290, 0xffffffff, 0x00000100,
1429 0x3c274, 0xffffffff, 0x00000100,
1430 0x3c2b4, 0xffffffff, 0x00000100,
1431 0x3c2b0, 0xffffffff, 0x00000100,
1432 0x3c270, 0xffffffff, 0x00000100,
1433 0x30800, 0xffffffff, 0xe0000000,
1434 0x3c020, 0xffffffff, 0x00010000,
1435 0x3c024, 0xffffffff, 0x00030002,
1436 0x3c028, 0xffffffff, 0x00040007,
1437 0x3c02c, 0xffffffff, 0x00060005,
1438 0x3c030, 0xffffffff, 0x00090008,
1439 0x3c034, 0xffffffff, 0x00010000,
1440 0x3c038, 0xffffffff, 0x00030002,
1441 0x3c03c, 0xffffffff, 0x00040007,
1442 0x3c040, 0xffffffff, 0x00060005,
1443 0x3c044, 0xffffffff, 0x00090008,
1444 0x3c048, 0xffffffff, 0x00010000,
1445 0x3c04c, 0xffffffff, 0x00030002,
1446 0x3c050, 0xffffffff, 0x00040007,
1447 0x3c054, 0xffffffff, 0x00060005,
1448 0x3c058, 0xffffffff, 0x00090008,
1449 0x3c05c, 0xffffffff, 0x00010000,
1450 0x3c060, 0xffffffff, 0x00030002,
1451 0x3c064, 0xffffffff, 0x00040007,
1452 0x3c068, 0xffffffff, 0x00060005,
1453 0x3c06c, 0xffffffff, 0x00090008,
1454 0x3c070, 0xffffffff, 0x00010000,
1455 0x3c074, 0xffffffff, 0x00030002,
1456 0x3c078, 0xffffffff, 0x00040007,
1457 0x3c07c, 0xffffffff, 0x00060005,
1458 0x3c080, 0xffffffff, 0x00090008,
1459 0x3c084, 0xffffffff, 0x00010000,
1460 0x3c088, 0xffffffff, 0x00030002,
1461 0x3c08c, 0xffffffff, 0x00040007,
1462 0x3c090, 0xffffffff, 0x00060005,
1463 0x3c094, 0xffffffff, 0x00090008,
1464 0x3c098, 0xffffffff, 0x00010000,
1465 0x3c09c, 0xffffffff, 0x00030002,
1466 0x3c0a0, 0xffffffff, 0x00040007,
1467 0x3c0a4, 0xffffffff, 0x00060005,
1468 0x3c0a8, 0xffffffff, 0x00090008,
1469 0x3c0ac, 0xffffffff, 0x00010000,
1470 0x3c0b0, 0xffffffff, 0x00030002,
1471 0x3c0b4, 0xffffffff, 0x00040007,
1472 0x3c0b8, 0xffffffff, 0x00060005,
1473 0x3c0bc, 0xffffffff, 0x00090008,
1474 0x3c0c0, 0xffffffff, 0x00010000,
1475 0x3c0c4, 0xffffffff, 0x00030002,
1476 0x3c0c8, 0xffffffff, 0x00040007,
1477 0x3c0cc, 0xffffffff, 0x00060005,
1478 0x3c0d0, 0xffffffff, 0x00090008,
1479 0x3c0d4, 0xffffffff, 0x00010000,
1480 0x3c0d8, 0xffffffff, 0x00030002,
1481 0x3c0dc, 0xffffffff, 0x00040007,
1482 0x3c0e0, 0xffffffff, 0x00060005,
1483 0x3c0e4, 0xffffffff, 0x00090008,
1484 0x3c0e8, 0xffffffff, 0x00010000,
1485 0x3c0ec, 0xffffffff, 0x00030002,
1486 0x3c0f0, 0xffffffff, 0x00040007,
1487 0x3c0f4, 0xffffffff, 0x00060005,
1488 0x3c0f8, 0xffffffff, 0x00090008,
1489 0xc318, 0xffffffff, 0x00020200,
1490 0x3350, 0xffffffff, 0x00000200,
1491 0x15c0, 0xffffffff, 0x00000400,
1492 0x55e8, 0xffffffff, 0x00000000,
1493 0x2f50, 0xffffffff, 0x00000902,
1494 0x3c000, 0xffffffff, 0x96940200,
1495 0x8708, 0xffffffff, 0x00900100,
1496 0xc424, 0xffffffff, 0x0020003f,
1497 0x38, 0xffffffff, 0x0140001c,
1498 0x3c, 0x000f0000, 0x000f0000,
1499 0x220, 0xffffffff, 0xc060000c,
1500 0x224, 0xc0000fff, 0x00000100,
1501 0xf90, 0xffffffff, 0x00000100,
1502 0xf98, 0x00000101, 0x00000000,
1503 0x20a8, 0xffffffff, 0x00000104,
1504 0x55e4, 0xff000fff, 0x00000100,
1505 0x30cc, 0xc0000fff, 0x00000104,
1506 0xc1e4, 0x00000001, 0x00000001,
1507 0xd00c, 0xff000ff0, 0x00000100,
1508 0xd80c, 0xff000ff0, 0x00000100
1511 static const u32 godavari_golden_registers[] =
1513 0x55e4, 0xff607fff, 0xfc000100,
1514 0x6ed8, 0x00010101, 0x00010000,
1515 0x9830, 0xffffffff, 0x00000000,
1516 0x98302, 0xf00fffff, 0x00000400,
1517 0x6130, 0xffffffff, 0x00010000,
1518 0x5bb0, 0x000000f0, 0x00000070,
1519 0x5bc0, 0xf0311fff, 0x80300000,
1520 0x98f8, 0x73773777, 0x12010001,
1521 0x98fc, 0xffffffff, 0x00000010,
1522 0x8030, 0x00001f0f, 0x0000100a,
1523 0x2f48, 0x73773777, 0x12010001,
1524 0x2408, 0x000fffff, 0x000c007f,
1525 0x8a14, 0xf000003f, 0x00000007,
1526 0x8b24, 0xffffffff, 0x00ff0fff,
1527 0x30a04, 0x0000ff0f, 0x00000000,
1528 0x28a4c, 0x07ffffff, 0x06000000,
1529 0x4d8, 0x00000fff, 0x00000100,
1530 0xd014, 0x00010000, 0x00810001,
1531 0xd814, 0x00010000, 0x00810001,
1532 0x3e78, 0x00000001, 0x00000002,
1533 0xc768, 0x00000008, 0x00000008,
1534 0xc770, 0x00000f00, 0x00000800,
1535 0xc774, 0x00000f00, 0x00000800,
1536 0xc798, 0x00ffffff, 0x00ff7fbf,
1537 0xc79c, 0x00ffffff, 0x00ff7faf,
1538 0x8c00, 0x000000ff, 0x00000001,
1539 0x214f8, 0x01ff01ff, 0x00000002,
1540 0x21498, 0x007ff800, 0x00200000,
1541 0x2015c, 0xffffffff, 0x00000f40,
1542 0x88c4, 0x001f3ae3, 0x00000082,
1543 0x88d4, 0x0000001f, 0x00000010,
1544 0x30934, 0xffffffff, 0x00000000
1548 static void cik_init_golden_registers(struct radeon_device *rdev)
1550 /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1551 mutex_lock(&rdev->grbm_idx_mutex);
1552 switch (rdev->family) {
1554 radeon_program_register_sequence(rdev,
1555 bonaire_mgcg_cgcg_init,
1556 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1557 radeon_program_register_sequence(rdev,
1558 bonaire_golden_registers,
1559 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1560 radeon_program_register_sequence(rdev,
1561 bonaire_golden_common_registers,
1562 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1563 radeon_program_register_sequence(rdev,
1564 bonaire_golden_spm_registers,
1565 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1568 radeon_program_register_sequence(rdev,
1569 kalindi_mgcg_cgcg_init,
1570 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1571 radeon_program_register_sequence(rdev,
1572 kalindi_golden_registers,
1573 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1574 radeon_program_register_sequence(rdev,
1575 kalindi_golden_common_registers,
1576 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1577 radeon_program_register_sequence(rdev,
1578 kalindi_golden_spm_registers,
1579 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1582 radeon_program_register_sequence(rdev,
1583 kalindi_mgcg_cgcg_init,
1584 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1585 radeon_program_register_sequence(rdev,
1586 godavari_golden_registers,
1587 (const u32)ARRAY_SIZE(godavari_golden_registers));
1588 radeon_program_register_sequence(rdev,
1589 kalindi_golden_common_registers,
1590 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1591 radeon_program_register_sequence(rdev,
1592 kalindi_golden_spm_registers,
1593 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1596 radeon_program_register_sequence(rdev,
1597 spectre_mgcg_cgcg_init,
1598 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1599 radeon_program_register_sequence(rdev,
1600 spectre_golden_registers,
1601 (const u32)ARRAY_SIZE(spectre_golden_registers));
1602 radeon_program_register_sequence(rdev,
1603 spectre_golden_common_registers,
1604 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1605 radeon_program_register_sequence(rdev,
1606 spectre_golden_spm_registers,
1607 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1610 radeon_program_register_sequence(rdev,
1611 hawaii_mgcg_cgcg_init,
1612 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1613 radeon_program_register_sequence(rdev,
1614 hawaii_golden_registers,
1615 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1616 radeon_program_register_sequence(rdev,
1617 hawaii_golden_common_registers,
1618 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1619 radeon_program_register_sequence(rdev,
1620 hawaii_golden_spm_registers,
1621 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1626 mutex_unlock(&rdev->grbm_idx_mutex);
1630 * cik_get_xclk - get the xclk
1632 * @rdev: radeon_device pointer
1634 * Returns the reference clock used by the gfx engine
1637 u32 cik_get_xclk(struct radeon_device *rdev)
1639 u32 reference_clock = rdev->clock.spll.reference_freq;
1641 if (rdev->flags & RADEON_IS_IGP) {
1642 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1643 return reference_clock / 2;
1645 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1646 return reference_clock / 4;
1648 return reference_clock;
1652 * cik_mm_rdoorbell - read a doorbell dword
1654 * @rdev: radeon_device pointer
1655 * @index: doorbell index
1657 * Returns the value in the doorbell aperture at the
1658 * requested doorbell index (CIK).
1660 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1662 if (index < rdev->doorbell.num_doorbells) {
1663 return readl(rdev->doorbell.ptr + index);
1665 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1671 * cik_mm_wdoorbell - write a doorbell dword
1673 * @rdev: radeon_device pointer
1674 * @index: doorbell index
1675 * @v: value to write
1677 * Writes @v to the doorbell aperture at the
1678 * requested doorbell index (CIK).
1680 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1682 if (index < rdev->doorbell.num_doorbells) {
1683 writel(v, rdev->doorbell.ptr + index);
1685 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1689 #define BONAIRE_IO_MC_REGS_SIZE 36
1691 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1693 {0x00000070, 0x04400000},
1694 {0x00000071, 0x80c01803},
1695 {0x00000072, 0x00004004},
1696 {0x00000073, 0x00000100},
1697 {0x00000074, 0x00ff0000},
1698 {0x00000075, 0x34000000},
1699 {0x00000076, 0x08000014},
1700 {0x00000077, 0x00cc08ec},
1701 {0x00000078, 0x00000400},
1702 {0x00000079, 0x00000000},
1703 {0x0000007a, 0x04090000},
1704 {0x0000007c, 0x00000000},
1705 {0x0000007e, 0x4408a8e8},
1706 {0x0000007f, 0x00000304},
1707 {0x00000080, 0x00000000},
1708 {0x00000082, 0x00000001},
1709 {0x00000083, 0x00000002},
1710 {0x00000084, 0xf3e4f400},
1711 {0x00000085, 0x052024e3},
1712 {0x00000087, 0x00000000},
1713 {0x00000088, 0x01000000},
1714 {0x0000008a, 0x1c0a0000},
1715 {0x0000008b, 0xff010000},
1716 {0x0000008d, 0xffffefff},
1717 {0x0000008e, 0xfff3efff},
1718 {0x0000008f, 0xfff3efbf},
1719 {0x00000092, 0xf7ffffff},
1720 {0x00000093, 0xffffff7f},
1721 {0x00000095, 0x00101101},
1722 {0x00000096, 0x00000fff},
1723 {0x00000097, 0x00116fff},
1724 {0x00000098, 0x60010000},
1725 {0x00000099, 0x10010000},
1726 {0x0000009a, 0x00006000},
1727 {0x0000009b, 0x00001000},
1728 {0x0000009f, 0x00b48000}
1731 #define HAWAII_IO_MC_REGS_SIZE 22
1733 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1735 {0x0000007d, 0x40000000},
1736 {0x0000007e, 0x40180304},
1737 {0x0000007f, 0x0000ff00},
1738 {0x00000081, 0x00000000},
1739 {0x00000083, 0x00000800},
1740 {0x00000086, 0x00000000},
1741 {0x00000087, 0x00000100},
1742 {0x00000088, 0x00020100},
1743 {0x00000089, 0x00000000},
1744 {0x0000008b, 0x00040000},
1745 {0x0000008c, 0x00000100},
1746 {0x0000008e, 0xff010000},
1747 {0x00000090, 0xffffefff},
1748 {0x00000091, 0xfff3efff},
1749 {0x00000092, 0xfff3efbf},
1750 {0x00000093, 0xf7ffffff},
1751 {0x00000094, 0xffffff7f},
1752 {0x00000095, 0x00000fff},
1753 {0x00000096, 0x00116fff},
1754 {0x00000097, 0x60010000},
1755 {0x00000098, 0x10010000},
1756 {0x0000009f, 0x00c79000}
1761 * cik_srbm_select - select specific register instances
1763 * @rdev: radeon_device pointer
1764 * @me: selected ME (micro engine)
1769 * Switches the currently active registers instances. Some
1770 * registers are instanced per VMID, others are instanced per
1771 * me/pipe/queue combination.
1773 static void cik_srbm_select(struct radeon_device *rdev,
1774 u32 me, u32 pipe, u32 queue, u32 vmid)
1776 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1779 QUEUEID(queue & 0x7));
1780 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1785 * ci_mc_load_microcode - load MC ucode into the hw
1787 * @rdev: radeon_device pointer
1789 * Load the GDDR MC ucode into the hw (CIK).
1790 * Returns 0 on success, error on failure.
1792 int ci_mc_load_microcode(struct radeon_device *rdev)
1794 const __be32 *fw_data = NULL;
1795 const __le32 *new_fw_data = NULL;
1797 u32 *io_mc_regs = NULL;
1798 const __le32 *new_io_mc_regs = NULL;
1799 int i, regs_size, ucode_size;
1805 const struct mc_firmware_header_v1_0 *hdr =
1806 (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1808 radeon_ucode_print_mc_hdr(&hdr->header);
1810 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1811 new_io_mc_regs = (const __le32 *)
1812 (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1813 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1814 new_fw_data = (const __le32 *)
1815 (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1817 ucode_size = rdev->mc_fw->size / 4;
1819 switch (rdev->family) {
1821 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1822 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1825 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1826 regs_size = HAWAII_IO_MC_REGS_SIZE;
1831 fw_data = (const __be32 *)rdev->mc_fw->data;
1834 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1837 /* reset the engine and set to writable */
1838 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1839 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1841 /* load mc io regs */
1842 for (i = 0; i < regs_size; i++) {
1844 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1845 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1847 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1848 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1852 tmp = RREG32(MC_SEQ_MISC0);
1853 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1854 WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1855 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1856 WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1857 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1860 /* load the MC ucode */
1861 for (i = 0; i < ucode_size; i++) {
1863 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1865 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1868 /* put the engine back into the active state */
1869 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1870 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1871 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1873 /* wait for training to complete */
1874 for (i = 0; i < rdev->usec_timeout; i++) {
1875 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1879 for (i = 0; i < rdev->usec_timeout; i++) {
1880 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1890 * cik_init_microcode - load ucode images from disk
1892 * @rdev: radeon_device pointer
1894 * Use the firmware interface to load the ucode images into
1895 * the driver (not loaded into hw).
1896 * Returns 0 on success, error on failure.
1898 static int cik_init_microcode(struct radeon_device *rdev)
1900 const char *chip_name;
1901 const char *new_chip_name;
1902 size_t pfp_req_size, me_req_size, ce_req_size,
1903 mec_req_size, rlc_req_size, mc_req_size = 0,
1904 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1909 bool new_smc = false;
1913 switch (rdev->family) {
1915 chip_name = "BONAIRE";
1916 if ((rdev->pdev->revision == 0x80) ||
1917 (rdev->pdev->revision == 0x81) ||
1918 (rdev->pdev->device == 0x665f))
1920 new_chip_name = "bonaire";
1921 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1922 me_req_size = CIK_ME_UCODE_SIZE * 4;
1923 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1924 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1925 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1926 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1927 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1928 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1929 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1933 chip_name = "HAWAII";
1934 if (rdev->pdev->revision == 0x80)
1936 new_chip_name = "hawaii";
1937 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1938 me_req_size = CIK_ME_UCODE_SIZE * 4;
1939 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1940 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1941 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1942 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1943 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1944 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1945 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1949 chip_name = "KAVERI";
1950 new_chip_name = "kaveri";
1951 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1952 me_req_size = CIK_ME_UCODE_SIZE * 4;
1953 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1954 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1955 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1956 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1960 chip_name = "KABINI";
1961 new_chip_name = "kabini";
1962 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1963 me_req_size = CIK_ME_UCODE_SIZE * 4;
1964 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1965 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1966 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1967 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1971 chip_name = "MULLINS";
1972 new_chip_name = "mullins";
1973 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1974 me_req_size = CIK_ME_UCODE_SIZE * 4;
1975 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1976 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1977 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1978 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1984 DRM_INFO("Loading %s Microcode\n", new_chip_name);
1986 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1987 err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1989 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1990 err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1993 if (rdev->pfp_fw->size != pfp_req_size) {
1994 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
1995 rdev->pfp_fw->size, fw_name);
2000 err = radeon_ucode_validate(rdev->pfp_fw);
2002 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2010 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2011 err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2013 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2014 err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2017 if (rdev->me_fw->size != me_req_size) {
2018 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2019 rdev->me_fw->size, fw_name);
2023 err = radeon_ucode_validate(rdev->me_fw);
2025 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2033 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2034 err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2036 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2037 err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2040 if (rdev->ce_fw->size != ce_req_size) {
2041 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2042 rdev->ce_fw->size, fw_name);
2046 err = radeon_ucode_validate(rdev->ce_fw);
2048 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2056 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2057 err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2059 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2060 err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2063 if (rdev->mec_fw->size != mec_req_size) {
2064 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2065 rdev->mec_fw->size, fw_name);
2069 err = radeon_ucode_validate(rdev->mec_fw);
2071 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2079 if (rdev->family == CHIP_KAVERI) {
2080 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2081 err = reject_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2085 err = radeon_ucode_validate(rdev->mec2_fw);
2094 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2095 err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2097 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2098 err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2101 if (rdev->rlc_fw->size != rlc_req_size) {
2102 pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2103 rdev->rlc_fw->size, fw_name);
2107 err = radeon_ucode_validate(rdev->rlc_fw);
2109 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2117 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2118 err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2120 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2121 err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2124 if (rdev->sdma_fw->size != sdma_req_size) {
2125 pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2126 rdev->sdma_fw->size, fw_name);
2130 err = radeon_ucode_validate(rdev->sdma_fw);
2132 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2140 /* No SMC, MC ucode on APUs */
2141 if (!(rdev->flags & RADEON_IS_IGP)) {
2142 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2143 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2145 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2146 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2148 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2149 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2153 if ((rdev->mc_fw->size != mc_req_size) &&
2154 (rdev->mc_fw->size != mc2_req_size)){
2155 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2156 rdev->mc_fw->size, fw_name);
2159 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2161 err = radeon_ucode_validate(rdev->mc_fw);
2163 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2172 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2174 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2175 err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2177 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2178 err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2180 pr_err("smc: error loading firmware \"%s\"\n",
2182 release_firmware(rdev->smc_fw);
2183 rdev->smc_fw = NULL;
2185 } else if (rdev->smc_fw->size != smc_req_size) {
2186 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2187 rdev->smc_fw->size, fw_name);
2191 err = radeon_ucode_validate(rdev->smc_fw);
2193 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2203 rdev->new_fw = false;
2204 } else if (new_fw < num_fw) {
2205 pr_err("ci_fw: mixing new and old firmware!\n");
2208 rdev->new_fw = true;
2214 pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2216 release_firmware(rdev->pfp_fw);
2217 rdev->pfp_fw = NULL;
2218 release_firmware(rdev->me_fw);
2220 release_firmware(rdev->ce_fw);
2222 release_firmware(rdev->mec_fw);
2223 rdev->mec_fw = NULL;
2224 release_firmware(rdev->mec2_fw);
2225 rdev->mec2_fw = NULL;
2226 release_firmware(rdev->rlc_fw);
2227 rdev->rlc_fw = NULL;
2228 release_firmware(rdev->sdma_fw);
2229 rdev->sdma_fw = NULL;
2230 release_firmware(rdev->mc_fw);
2232 release_firmware(rdev->smc_fw);
2233 rdev->smc_fw = NULL;
2242 * cik_tiling_mode_table_init - init the hw tiling table
2244 * @rdev: radeon_device pointer
2246 * Starting with SI, the tiling setup is done globally in a
2247 * set of 32 tiling modes. Rather than selecting each set of
2248 * parameters per surface as on older asics, we just select
2249 * which index in the tiling table we want to use, and the
2250 * surface uses those parameters (CIK).
2252 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2254 u32 *tile = rdev->config.cik.tile_mode_array;
2255 u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2256 const u32 num_tile_mode_states =
2257 ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2258 const u32 num_secondary_tile_mode_states =
2259 ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2260 u32 reg_offset, split_equal_to_row_size;
2261 u32 num_pipe_configs;
2262 u32 num_rbs = rdev->config.cik.max_backends_per_se *
2263 rdev->config.cik.max_shader_engines;
2265 switch (rdev->config.cik.mem_row_size_in_kb) {
2267 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2271 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2274 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2278 num_pipe_configs = rdev->config.cik.max_tile_pipes;
2279 if (num_pipe_configs > 8)
2280 num_pipe_configs = 16;
2282 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2283 tile[reg_offset] = 0;
2284 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2285 macrotile[reg_offset] = 0;
2287 switch(num_pipe_configs) {
2289 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2291 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2293 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2295 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2297 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2298 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2299 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2301 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2303 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2305 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2307 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308 TILE_SPLIT(split_equal_to_row_size));
2309 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2310 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2312 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2313 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2314 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2316 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2317 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2318 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319 TILE_SPLIT(split_equal_to_row_size));
2320 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2321 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2322 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2323 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2325 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2326 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2329 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2330 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2331 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2333 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2334 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2335 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2337 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2338 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2340 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2342 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2345 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2346 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2347 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2350 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2352 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2353 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2355 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2356 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2360 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2361 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2364 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2365 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2371 NUM_BANKS(ADDR_SURF_16_BANK));
2372 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2374 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2375 NUM_BANKS(ADDR_SURF_16_BANK));
2376 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2379 NUM_BANKS(ADDR_SURF_16_BANK));
2380 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2383 NUM_BANKS(ADDR_SURF_16_BANK));
2384 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2387 NUM_BANKS(ADDR_SURF_8_BANK));
2388 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2390 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2391 NUM_BANKS(ADDR_SURF_4_BANK));
2392 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2394 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2395 NUM_BANKS(ADDR_SURF_2_BANK));
2396 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2399 NUM_BANKS(ADDR_SURF_16_BANK));
2400 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2402 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2403 NUM_BANKS(ADDR_SURF_16_BANK));
2404 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2407 NUM_BANKS(ADDR_SURF_16_BANK));
2408 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2410 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2411 NUM_BANKS(ADDR_SURF_8_BANK));
2412 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2414 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2415 NUM_BANKS(ADDR_SURF_4_BANK));
2416 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2419 NUM_BANKS(ADDR_SURF_2_BANK));
2420 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2423 NUM_BANKS(ADDR_SURF_2_BANK));
2425 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2426 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2427 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2428 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2432 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2434 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2435 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2436 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2438 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2439 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2440 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2442 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2443 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2444 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2446 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2447 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2448 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2449 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2450 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2451 TILE_SPLIT(split_equal_to_row_size));
2452 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2453 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2454 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2455 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2456 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2457 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2458 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2459 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2460 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2461 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2462 TILE_SPLIT(split_equal_to_row_size));
2463 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2464 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2465 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2466 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2467 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2468 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2469 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2470 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2473 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2474 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2476 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2477 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2478 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2481 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2483 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2485 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2487 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2488 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2489 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2491 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2492 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2493 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2495 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2496 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2498 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2500 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2503 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2504 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2505 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2506 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2507 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2508 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2511 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2514 NUM_BANKS(ADDR_SURF_16_BANK));
2515 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2517 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518 NUM_BANKS(ADDR_SURF_16_BANK));
2519 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522 NUM_BANKS(ADDR_SURF_16_BANK));
2523 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2526 NUM_BANKS(ADDR_SURF_16_BANK));
2527 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2530 NUM_BANKS(ADDR_SURF_8_BANK));
2531 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2533 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2534 NUM_BANKS(ADDR_SURF_4_BANK));
2535 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2537 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2538 NUM_BANKS(ADDR_SURF_2_BANK));
2539 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2541 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2542 NUM_BANKS(ADDR_SURF_16_BANK));
2543 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2546 NUM_BANKS(ADDR_SURF_16_BANK));
2547 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2550 NUM_BANKS(ADDR_SURF_16_BANK));
2551 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2553 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554 NUM_BANKS(ADDR_SURF_16_BANK));
2555 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2557 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2558 NUM_BANKS(ADDR_SURF_8_BANK));
2559 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2561 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2562 NUM_BANKS(ADDR_SURF_4_BANK));
2563 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566 NUM_BANKS(ADDR_SURF_2_BANK));
2568 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2569 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2570 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2571 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2576 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2578 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2579 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2580 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2581 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2582 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2583 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2584 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2586 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2587 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2588 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2590 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2591 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2592 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2593 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2594 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2595 TILE_SPLIT(split_equal_to_row_size));
2596 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2597 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2598 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2599 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2600 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2601 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2602 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2603 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2604 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2605 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2606 TILE_SPLIT(split_equal_to_row_size));
2607 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2608 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2609 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2610 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2611 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2612 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2613 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2614 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2616 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2617 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2618 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2619 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2621 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2622 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2624 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2625 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2627 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2629 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2631 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2633 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2635 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2636 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2637 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2639 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2641 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2642 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2644 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2645 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2646 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2648 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2649 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2651 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2652 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2655 } else if (num_rbs < 4) {
2656 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2658 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2659 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2660 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2663 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2664 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2666 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2667 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2668 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2670 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2672 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2675 TILE_SPLIT(split_equal_to_row_size));
2676 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2677 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2679 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2680 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2681 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2682 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2683 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2684 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2685 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2686 TILE_SPLIT(split_equal_to_row_size));
2687 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2688 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2689 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2690 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2691 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2692 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2694 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2695 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2696 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2697 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2698 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2699 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2700 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2701 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2702 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2703 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2704 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2706 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2707 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2709 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2710 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2712 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2713 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2714 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2716 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2717 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2718 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2721 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2722 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2723 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2724 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2728 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2730 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2731 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2732 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2738 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2739 NUM_BANKS(ADDR_SURF_16_BANK));
2740 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2742 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2743 NUM_BANKS(ADDR_SURF_16_BANK));
2744 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747 NUM_BANKS(ADDR_SURF_16_BANK));
2748 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2750 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2751 NUM_BANKS(ADDR_SURF_16_BANK));
2752 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2754 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2755 NUM_BANKS(ADDR_SURF_16_BANK));
2756 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2759 NUM_BANKS(ADDR_SURF_8_BANK));
2760 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2762 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2763 NUM_BANKS(ADDR_SURF_4_BANK));
2764 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2765 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2766 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2767 NUM_BANKS(ADDR_SURF_16_BANK));
2768 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2769 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2770 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771 NUM_BANKS(ADDR_SURF_16_BANK));
2772 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2774 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2775 NUM_BANKS(ADDR_SURF_16_BANK));
2776 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2778 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2779 NUM_BANKS(ADDR_SURF_16_BANK));
2780 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783 NUM_BANKS(ADDR_SURF_16_BANK));
2784 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2785 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2786 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2787 NUM_BANKS(ADDR_SURF_8_BANK));
2788 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2789 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2790 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2791 NUM_BANKS(ADDR_SURF_4_BANK));
2793 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2794 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2795 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2796 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2800 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2801 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2802 PIPE_CONFIG(ADDR_SURF_P2) |
2803 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2804 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2805 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2806 PIPE_CONFIG(ADDR_SURF_P2) |
2807 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2808 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810 PIPE_CONFIG(ADDR_SURF_P2) |
2811 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2812 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2813 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2814 PIPE_CONFIG(ADDR_SURF_P2) |
2815 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2816 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2817 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2818 PIPE_CONFIG(ADDR_SURF_P2) |
2819 TILE_SPLIT(split_equal_to_row_size));
2820 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2821 PIPE_CONFIG(ADDR_SURF_P2) |
2822 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2823 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2824 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2825 PIPE_CONFIG(ADDR_SURF_P2) |
2826 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2827 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2828 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2829 PIPE_CONFIG(ADDR_SURF_P2) |
2830 TILE_SPLIT(split_equal_to_row_size));
2831 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2832 PIPE_CONFIG(ADDR_SURF_P2);
2833 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2834 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2835 PIPE_CONFIG(ADDR_SURF_P2));
2836 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2837 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2838 PIPE_CONFIG(ADDR_SURF_P2) |
2839 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2840 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2841 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2842 PIPE_CONFIG(ADDR_SURF_P2) |
2843 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2845 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2846 PIPE_CONFIG(ADDR_SURF_P2) |
2847 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2848 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2849 PIPE_CONFIG(ADDR_SURF_P2) |
2850 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2851 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2852 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2853 PIPE_CONFIG(ADDR_SURF_P2) |
2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2856 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2857 PIPE_CONFIG(ADDR_SURF_P2) |
2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2859 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2860 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2861 PIPE_CONFIG(ADDR_SURF_P2) |
2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2864 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2865 PIPE_CONFIG(ADDR_SURF_P2));
2866 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2867 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2868 PIPE_CONFIG(ADDR_SURF_P2) |
2869 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2870 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2871 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2872 PIPE_CONFIG(ADDR_SURF_P2) |
2873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2874 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2875 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2876 PIPE_CONFIG(ADDR_SURF_P2) |
2877 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2879 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2880 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2881 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2882 NUM_BANKS(ADDR_SURF_16_BANK));
2883 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2884 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2885 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2886 NUM_BANKS(ADDR_SURF_16_BANK));
2887 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2888 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2889 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2890 NUM_BANKS(ADDR_SURF_16_BANK));
2891 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2892 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2893 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2894 NUM_BANKS(ADDR_SURF_16_BANK));
2895 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2896 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2897 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2898 NUM_BANKS(ADDR_SURF_16_BANK));
2899 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2900 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2901 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2902 NUM_BANKS(ADDR_SURF_16_BANK));
2903 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2904 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2905 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2906 NUM_BANKS(ADDR_SURF_8_BANK));
2907 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2908 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2909 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2910 NUM_BANKS(ADDR_SURF_16_BANK));
2911 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2912 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2913 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2914 NUM_BANKS(ADDR_SURF_16_BANK));
2915 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2918 NUM_BANKS(ADDR_SURF_16_BANK));
2919 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2920 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2921 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2922 NUM_BANKS(ADDR_SURF_16_BANK));
2923 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2925 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2926 NUM_BANKS(ADDR_SURF_16_BANK));
2927 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2928 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2929 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2930 NUM_BANKS(ADDR_SURF_16_BANK));
2931 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2932 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2933 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2934 NUM_BANKS(ADDR_SURF_8_BANK));
2936 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2937 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2938 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2939 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2943 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2948 * cik_select_se_sh - select which SE, SH to address
2950 * @rdev: radeon_device pointer
2951 * @se_num: shader engine to address
2952 * @sh_num: sh block to address
2954 * Select which SE, SH combinations to address. Certain
2955 * registers are instanced per SE or SH. 0xffffffff means
2956 * broadcast to all SEs or SHs (CIK).
2958 static void cik_select_se_sh(struct radeon_device *rdev,
2959 u32 se_num, u32 sh_num)
2961 u32 data = INSTANCE_BROADCAST_WRITES;
2963 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2964 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2965 else if (se_num == 0xffffffff)
2966 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2967 else if (sh_num == 0xffffffff)
2968 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2970 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2971 WREG32(GRBM_GFX_INDEX, data);
2975 * cik_create_bitmask - create a bitmask
2977 * @bit_width: length of the mask
2979 * create a variable length bit mask (CIK).
2980 * Returns the bitmask.
2982 static u32 cik_create_bitmask(u32 bit_width)
2986 for (i = 0; i < bit_width; i++) {
2994 * cik_get_rb_disabled - computes the mask of disabled RBs
2996 * @rdev: radeon_device pointer
2997 * @max_rb_num: max RBs (render backends) for the asic
2998 * @se_num: number of SEs (shader engines) for the asic
2999 * @sh_per_se: number of SH blocks per SE for the asic
3001 * Calculates the bitmask of disabled RBs (CIK).
3002 * Returns the disabled RB bitmask.
3004 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3005 u32 max_rb_num_per_se,
3010 data = RREG32(CC_RB_BACKEND_DISABLE);
3012 data &= BACKEND_DISABLE_MASK;
3015 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3017 data >>= BACKEND_DISABLE_SHIFT;
3019 mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3025 * cik_setup_rb - setup the RBs on the asic
3027 * @rdev: radeon_device pointer
3028 * @se_num: number of SEs (shader engines) for the asic
3029 * @sh_per_se: number of SH blocks per SE for the asic
3030 * @max_rb_num: max RBs (render backends) for the asic
3032 * Configures per-SE/SH RB registers (CIK).
3034 static void cik_setup_rb(struct radeon_device *rdev,
3035 u32 se_num, u32 sh_per_se,
3036 u32 max_rb_num_per_se)
3040 u32 disabled_rbs = 0;
3041 u32 enabled_rbs = 0;
3043 mutex_lock(&rdev->grbm_idx_mutex);
3044 for (i = 0; i < se_num; i++) {
3045 for (j = 0; j < sh_per_se; j++) {
3046 cik_select_se_sh(rdev, i, j);
3047 data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3048 if (rdev->family == CHIP_HAWAII)
3049 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3051 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3054 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3055 mutex_unlock(&rdev->grbm_idx_mutex);
3058 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3059 if (!(disabled_rbs & mask))
3060 enabled_rbs |= mask;
3064 rdev->config.cik.backend_enable_mask = enabled_rbs;
3066 mutex_lock(&rdev->grbm_idx_mutex);
3067 for (i = 0; i < se_num; i++) {
3068 cik_select_se_sh(rdev, i, 0xffffffff);
3070 for (j = 0; j < sh_per_se; j++) {
3071 switch (enabled_rbs & 3) {
3074 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3076 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3079 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3082 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3086 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3091 WREG32(PA_SC_RASTER_CONFIG, data);
3093 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3094 mutex_unlock(&rdev->grbm_idx_mutex);
3098 * cik_gpu_init - setup the 3D engine
3100 * @rdev: radeon_device pointer
3102 * Configures the 3D engine and tiling configuration
3103 * registers so that the 3D engine is usable.
3105 static void cik_gpu_init(struct radeon_device *rdev)
3107 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3108 u32 mc_shared_chmap, mc_arb_ramcfg;
3109 u32 hdp_host_path_cntl;
3113 switch (rdev->family) {
3115 rdev->config.cik.max_shader_engines = 2;
3116 rdev->config.cik.max_tile_pipes = 4;
3117 rdev->config.cik.max_cu_per_sh = 7;
3118 rdev->config.cik.max_sh_per_se = 1;
3119 rdev->config.cik.max_backends_per_se = 2;
3120 rdev->config.cik.max_texture_channel_caches = 4;
3121 rdev->config.cik.max_gprs = 256;
3122 rdev->config.cik.max_gs_threads = 32;
3123 rdev->config.cik.max_hw_contexts = 8;
3125 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3126 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3127 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3128 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3129 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3132 rdev->config.cik.max_shader_engines = 4;
3133 rdev->config.cik.max_tile_pipes = 16;
3134 rdev->config.cik.max_cu_per_sh = 11;
3135 rdev->config.cik.max_sh_per_se = 1;
3136 rdev->config.cik.max_backends_per_se = 4;
3137 rdev->config.cik.max_texture_channel_caches = 16;
3138 rdev->config.cik.max_gprs = 256;
3139 rdev->config.cik.max_gs_threads = 32;
3140 rdev->config.cik.max_hw_contexts = 8;
3142 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3143 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3144 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3145 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3146 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3149 rdev->config.cik.max_shader_engines = 1;
3150 rdev->config.cik.max_tile_pipes = 4;
3151 rdev->config.cik.max_cu_per_sh = 8;
3152 rdev->config.cik.max_backends_per_se = 2;
3153 rdev->config.cik.max_sh_per_se = 1;
3154 rdev->config.cik.max_texture_channel_caches = 4;
3155 rdev->config.cik.max_gprs = 256;
3156 rdev->config.cik.max_gs_threads = 16;
3157 rdev->config.cik.max_hw_contexts = 8;
3159 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3160 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3161 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3162 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3163 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3168 rdev->config.cik.max_shader_engines = 1;
3169 rdev->config.cik.max_tile_pipes = 2;
3170 rdev->config.cik.max_cu_per_sh = 2;
3171 rdev->config.cik.max_sh_per_se = 1;
3172 rdev->config.cik.max_backends_per_se = 1;
3173 rdev->config.cik.max_texture_channel_caches = 2;
3174 rdev->config.cik.max_gprs = 256;
3175 rdev->config.cik.max_gs_threads = 16;
3176 rdev->config.cik.max_hw_contexts = 8;
3178 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3179 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3180 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3181 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3182 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3186 /* Initialize HDP */
3187 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3188 WREG32((0x2c14 + j), 0x00000000);
3189 WREG32((0x2c18 + j), 0x00000000);
3190 WREG32((0x2c1c + j), 0x00000000);
3191 WREG32((0x2c20 + j), 0x00000000);
3192 WREG32((0x2c24 + j), 0x00000000);
3195 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3196 WREG32(SRBM_INT_CNTL, 0x1);
3197 WREG32(SRBM_INT_ACK, 0x1);
3199 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3201 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3202 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3204 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3205 rdev->config.cik.mem_max_burst_length_bytes = 256;
3206 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3207 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3208 if (rdev->config.cik.mem_row_size_in_kb > 4)
3209 rdev->config.cik.mem_row_size_in_kb = 4;
3210 /* XXX use MC settings? */
3211 rdev->config.cik.shader_engine_tile_size = 32;
3212 rdev->config.cik.num_gpus = 1;
3213 rdev->config.cik.multi_gpu_tile_size = 64;
3215 /* fix up row size */
3216 gb_addr_config &= ~ROW_SIZE_MASK;
3217 switch (rdev->config.cik.mem_row_size_in_kb) {
3220 gb_addr_config |= ROW_SIZE(0);
3223 gb_addr_config |= ROW_SIZE(1);
3226 gb_addr_config |= ROW_SIZE(2);
3230 /* setup tiling info dword. gb_addr_config is not adequate since it does
3231 * not have bank info, so create a custom tiling dword.
3232 * bits 3:0 num_pipes
3233 * bits 7:4 num_banks
3234 * bits 11:8 group_size
3235 * bits 15:12 row_size
3237 rdev->config.cik.tile_config = 0;
3238 switch (rdev->config.cik.num_tile_pipes) {
3240 rdev->config.cik.tile_config |= (0 << 0);
3243 rdev->config.cik.tile_config |= (1 << 0);
3246 rdev->config.cik.tile_config |= (2 << 0);
3250 /* XXX what about 12? */
3251 rdev->config.cik.tile_config |= (3 << 0);
3254 rdev->config.cik.tile_config |=
3255 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3256 rdev->config.cik.tile_config |=
3257 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3258 rdev->config.cik.tile_config |=
3259 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3261 WREG32(GB_ADDR_CONFIG, gb_addr_config);
3262 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3263 WREG32(DMIF_ADDR_CALC, gb_addr_config);
3264 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3265 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3266 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3267 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3268 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3270 cik_tiling_mode_table_init(rdev);
3272 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3273 rdev->config.cik.max_sh_per_se,
3274 rdev->config.cik.max_backends_per_se);
3276 rdev->config.cik.active_cus = 0;
3277 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3278 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3279 rdev->config.cik.active_cus +=
3280 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3284 /* set HW defaults for 3D engine */
3285 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3287 mutex_lock(&rdev->grbm_idx_mutex);
3289 * making sure that the following register writes will be broadcasted
3290 * to all the shaders
3292 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3293 WREG32(SX_DEBUG_1, 0x20);
3295 WREG32(TA_CNTL_AUX, 0x00010000);
3297 tmp = RREG32(SPI_CONFIG_CNTL);
3299 WREG32(SPI_CONFIG_CNTL, tmp);
3301 WREG32(SQ_CONFIG, 1);
3303 WREG32(DB_DEBUG, 0);
3305 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3307 WREG32(DB_DEBUG2, tmp);
3309 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3311 WREG32(DB_DEBUG3, tmp);
3313 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3315 WREG32(CB_HW_CONTROL, tmp);
3317 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3319 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3320 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3321 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3322 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3324 WREG32(VGT_NUM_INSTANCES, 1);
3326 WREG32(CP_PERFMON_CNTL, 0);
3328 WREG32(SQ_CONFIG, 0);
3330 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3331 FORCE_EOV_MAX_REZ_CNT(255)));
3333 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3334 AUTO_INVLD_EN(ES_AND_GS_AUTO));
3336 WREG32(VGT_GS_VERTEX_REUSE, 16);
3337 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3339 tmp = RREG32(HDP_MISC_CNTL);
3340 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3341 WREG32(HDP_MISC_CNTL, tmp);
3343 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3344 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3346 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3347 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3348 mutex_unlock(&rdev->grbm_idx_mutex);
3354 * GPU scratch registers helpers function.
3357 * cik_scratch_init - setup driver info for CP scratch regs
3359 * @rdev: radeon_device pointer
3361 * Set up the number and offset of the CP scratch registers.
3362 * NOTE: use of CP scratch registers is a legacy inferface and
3363 * is not used by default on newer asics (r6xx+). On newer asics,
3364 * memory buffers are used for fences rather than scratch regs.
3366 static void cik_scratch_init(struct radeon_device *rdev)
3370 rdev->scratch.num_reg = 7;
3371 rdev->scratch.reg_base = SCRATCH_REG0;
3372 for (i = 0; i < rdev->scratch.num_reg; i++) {
3373 rdev->scratch.free[i] = true;
3374 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3379 * cik_ring_test - basic gfx ring test
3381 * @rdev: radeon_device pointer
3382 * @ring: radeon_ring structure holding ring information
3384 * Allocate a scratch register and write to it using the gfx ring (CIK).
3385 * Provides a basic gfx ring test to verify that the ring is working.
3386 * Used by cik_cp_gfx_resume();
3387 * Returns 0 on success, error on failure.
3389 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3396 r = radeon_scratch_get(rdev, &scratch);
3398 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3401 WREG32(scratch, 0xCAFEDEAD);
3402 r = radeon_ring_lock(rdev, ring, 3);
3404 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3405 radeon_scratch_free(rdev, scratch);
3408 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3409 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3410 radeon_ring_write(ring, 0xDEADBEEF);
3411 radeon_ring_unlock_commit(rdev, ring, false);
3413 for (i = 0; i < rdev->usec_timeout; i++) {
3414 tmp = RREG32(scratch);
3415 if (tmp == 0xDEADBEEF)
3419 if (i < rdev->usec_timeout) {
3420 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3422 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3423 ring->idx, scratch, tmp);
3426 radeon_scratch_free(rdev, scratch);
3431 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3433 * @rdev: radeon_device pointer
3434 * @ridx: radeon ring index
3436 * Emits an hdp flush on the cp.
3438 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3441 struct radeon_ring *ring = &rdev->ring[ridx];
3444 switch (ring->idx) {
3445 case CAYMAN_RING_TYPE_CP1_INDEX:
3446 case CAYMAN_RING_TYPE_CP2_INDEX:
3450 ref_and_mask = CP2 << ring->pipe;
3453 ref_and_mask = CP6 << ring->pipe;
3459 case RADEON_RING_TYPE_GFX_INDEX:
3464 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3465 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3466 WAIT_REG_MEM_FUNCTION(3) | /* == */
3467 WAIT_REG_MEM_ENGINE(1))); /* pfp */
3468 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3469 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3470 radeon_ring_write(ring, ref_and_mask);
3471 radeon_ring_write(ring, ref_and_mask);
3472 radeon_ring_write(ring, 0x20); /* poll interval */
3476 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3478 * @rdev: radeon_device pointer
3479 * @fence: radeon fence object
3481 * Emits a fence sequnce number on the gfx ring and flushes
3484 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3485 struct radeon_fence *fence)
3487 struct radeon_ring *ring = &rdev->ring[fence->ring];
3488 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3490 /* Workaround for cache flush problems. First send a dummy EOP
3491 * event down the pipe with seq one below.
3493 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3494 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3496 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3498 radeon_ring_write(ring, addr & 0xfffffffc);
3499 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3500 DATA_SEL(1) | INT_SEL(0));
3501 radeon_ring_write(ring, fence->seq - 1);
3502 radeon_ring_write(ring, 0);
3504 /* Then send the real EOP event down the pipe. */
3505 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3506 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3508 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3510 radeon_ring_write(ring, addr & 0xfffffffc);
3511 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3512 radeon_ring_write(ring, fence->seq);
3513 radeon_ring_write(ring, 0);
3517 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3519 * @rdev: radeon_device pointer
3520 * @fence: radeon fence object
3522 * Emits a fence sequnce number on the compute ring and flushes
3525 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3526 struct radeon_fence *fence)
3528 struct radeon_ring *ring = &rdev->ring[fence->ring];
3529 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3531 /* RELEASE_MEM - flush caches, send int */
3532 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3533 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3535 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3537 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3538 radeon_ring_write(ring, addr & 0xfffffffc);
3539 radeon_ring_write(ring, upper_32_bits(addr));
3540 radeon_ring_write(ring, fence->seq);
3541 radeon_ring_write(ring, 0);
3545 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3547 * @rdev: radeon_device pointer
3548 * @ring: radeon ring buffer object
3549 * @semaphore: radeon semaphore object
3550 * @emit_wait: Is this a sempahore wait?
3552 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3553 * from running ahead of semaphore waits.
3555 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3556 struct radeon_ring *ring,
3557 struct radeon_semaphore *semaphore,
3560 uint64_t addr = semaphore->gpu_addr;
3561 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3563 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3564 radeon_ring_write(ring, lower_32_bits(addr));
3565 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3567 if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3568 /* Prevent the PFP from running ahead of the semaphore wait */
3569 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3570 radeon_ring_write(ring, 0x0);
3577 * cik_copy_cpdma - copy pages using the CP DMA engine
3579 * @rdev: radeon_device pointer
3580 * @src_offset: src GPU address
3581 * @dst_offset: dst GPU address
3582 * @num_gpu_pages: number of GPU pages to xfer
3583 * @resv: reservation object to sync to
3585 * Copy GPU paging using the CP DMA engine (CIK+).
3586 * Used by the radeon ttm implementation to move pages if
3587 * registered as the asic copy callback.
3589 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3590 uint64_t src_offset, uint64_t dst_offset,
3591 unsigned num_gpu_pages,
3592 struct reservation_object *resv)
3594 struct radeon_fence *fence;
3595 struct radeon_sync sync;
3596 int ring_index = rdev->asic->copy.blit_ring_index;
3597 struct radeon_ring *ring = &rdev->ring[ring_index];
3598 u32 size_in_bytes, cur_size_in_bytes, control;
3602 radeon_sync_create(&sync);
3604 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3605 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3606 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3608 DRM_ERROR("radeon: moving bo (%d).\n", r);
3609 radeon_sync_free(rdev, &sync, NULL);
3613 radeon_sync_resv(rdev, &sync, resv, false);
3614 radeon_sync_rings(rdev, &sync, ring->idx);
3616 for (i = 0; i < num_loops; i++) {
3617 cur_size_in_bytes = size_in_bytes;
3618 if (cur_size_in_bytes > 0x1fffff)
3619 cur_size_in_bytes = 0x1fffff;
3620 size_in_bytes -= cur_size_in_bytes;
3622 if (size_in_bytes == 0)
3623 control |= PACKET3_DMA_DATA_CP_SYNC;
3624 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3625 radeon_ring_write(ring, control);
3626 radeon_ring_write(ring, lower_32_bits(src_offset));
3627 radeon_ring_write(ring, upper_32_bits(src_offset));
3628 radeon_ring_write(ring, lower_32_bits(dst_offset));
3629 radeon_ring_write(ring, upper_32_bits(dst_offset));
3630 radeon_ring_write(ring, cur_size_in_bytes);
3631 src_offset += cur_size_in_bytes;
3632 dst_offset += cur_size_in_bytes;
3635 r = radeon_fence_emit(rdev, &fence, ring->idx);
3637 radeon_ring_unlock_undo(rdev, ring);
3638 radeon_sync_free(rdev, &sync, NULL);
3642 radeon_ring_unlock_commit(rdev, ring, false);
3643 radeon_sync_free(rdev, &sync, fence);
3652 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3654 * @rdev: radeon_device pointer
3655 * @ib: radeon indirect buffer object
3657 * Emits a DE (drawing engine) or CE (constant engine) IB
3658 * on the gfx ring. IBs are usually generated by userspace
3659 * acceleration drivers and submitted to the kernel for
3660 * scheduling on the ring. This function schedules the IB
3661 * on the gfx ring for execution by the GPU.
3663 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3665 struct radeon_ring *ring = &rdev->ring[ib->ring];
3666 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3667 u32 header, control = INDIRECT_BUFFER_VALID;
3669 if (ib->is_const_ib) {
3670 /* set switch buffer packet before const IB */
3671 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3672 radeon_ring_write(ring, 0);
3674 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3677 if (ring->rptr_save_reg) {
3678 next_rptr = ring->wptr + 3 + 4;
3679 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3680 radeon_ring_write(ring, ((ring->rptr_save_reg -
3681 PACKET3_SET_UCONFIG_REG_START) >> 2));
3682 radeon_ring_write(ring, next_rptr);
3683 } else if (rdev->wb.enabled) {
3684 next_rptr = ring->wptr + 5 + 4;
3685 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3686 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3687 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3688 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3689 radeon_ring_write(ring, next_rptr);
3692 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3695 control |= ib->length_dw | (vm_id << 24);
3697 radeon_ring_write(ring, header);
3698 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3699 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3700 radeon_ring_write(ring, control);
3704 * cik_ib_test - basic gfx ring IB test
3706 * @rdev: radeon_device pointer
3707 * @ring: radeon_ring structure holding ring information
3709 * Allocate an IB and execute it on the gfx ring (CIK).
3710 * Provides a basic gfx ring test to verify that IBs are working.
3711 * Returns 0 on success, error on failure.
3713 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3715 struct radeon_ib ib;
3721 r = radeon_scratch_get(rdev, &scratch);
3723 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3726 WREG32(scratch, 0xCAFEDEAD);
3727 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3729 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3730 radeon_scratch_free(rdev, scratch);
3733 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3734 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3735 ib.ptr[2] = 0xDEADBEEF;
3737 r = radeon_ib_schedule(rdev, &ib, NULL, false);
3739 radeon_scratch_free(rdev, scratch);
3740 radeon_ib_free(rdev, &ib);
3741 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3744 r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3745 RADEON_USEC_IB_TEST_TIMEOUT));
3747 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3748 radeon_scratch_free(rdev, scratch);
3749 radeon_ib_free(rdev, &ib);
3751 } else if (r == 0) {
3752 DRM_ERROR("radeon: fence wait timed out.\n");
3753 radeon_scratch_free(rdev, scratch);
3754 radeon_ib_free(rdev, &ib);
3758 for (i = 0; i < rdev->usec_timeout; i++) {
3759 tmp = RREG32(scratch);
3760 if (tmp == 0xDEADBEEF)
3764 if (i < rdev->usec_timeout) {
3765 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3767 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3771 radeon_scratch_free(rdev, scratch);
3772 radeon_ib_free(rdev, &ib);
3778 * On CIK, gfx and compute now have independant command processors.
3781 * Gfx consists of a single ring and can process both gfx jobs and
3782 * compute jobs. The gfx CP consists of three microengines (ME):
3783 * PFP - Pre-Fetch Parser
3785 * CE - Constant Engine
3786 * The PFP and ME make up what is considered the Drawing Engine (DE).
3787 * The CE is an asynchronous engine used for updating buffer desciptors
3788 * used by the DE so that they can be loaded into cache in parallel
3789 * while the DE is processing state update packets.
3792 * The compute CP consists of two microengines (ME):
3793 * MEC1 - Compute MicroEngine 1
3794 * MEC2 - Compute MicroEngine 2
3795 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3796 * The queues are exposed to userspace and are programmed directly
3797 * by the compute runtime.
3800 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3802 * @rdev: radeon_device pointer
3803 * @enable: enable or disable the MEs
3805 * Halts or unhalts the gfx MEs.
3807 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3810 WREG32(CP_ME_CNTL, 0);
3812 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3813 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3814 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3815 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3821 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3823 * @rdev: radeon_device pointer
3825 * Loads the gfx PFP, ME, and CE ucode.
3826 * Returns 0 for success, -EINVAL if the ucode is not available.
3828 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3832 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3835 cik_cp_gfx_enable(rdev, false);
3838 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3839 (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3840 const struct gfx_firmware_header_v1_0 *ce_hdr =
3841 (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3842 const struct gfx_firmware_header_v1_0 *me_hdr =
3843 (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3844 const __le32 *fw_data;
3847 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3848 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3849 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3852 fw_data = (const __le32 *)
3853 (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3854 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3855 WREG32(CP_PFP_UCODE_ADDR, 0);
3856 for (i = 0; i < fw_size; i++)
3857 WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3858 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3861 fw_data = (const __le32 *)
3862 (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3863 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3864 WREG32(CP_CE_UCODE_ADDR, 0);
3865 for (i = 0; i < fw_size; i++)
3866 WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3867 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3870 fw_data = (const __be32 *)
3871 (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3872 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3873 WREG32(CP_ME_RAM_WADDR, 0);
3874 for (i = 0; i < fw_size; i++)
3875 WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3876 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3877 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3879 const __be32 *fw_data;
3882 fw_data = (const __be32 *)rdev->pfp_fw->data;
3883 WREG32(CP_PFP_UCODE_ADDR, 0);
3884 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3885 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3886 WREG32(CP_PFP_UCODE_ADDR, 0);
3889 fw_data = (const __be32 *)rdev->ce_fw->data;
3890 WREG32(CP_CE_UCODE_ADDR, 0);
3891 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3892 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3893 WREG32(CP_CE_UCODE_ADDR, 0);
3896 fw_data = (const __be32 *)rdev->me_fw->data;
3897 WREG32(CP_ME_RAM_WADDR, 0);
3898 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3899 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3900 WREG32(CP_ME_RAM_WADDR, 0);
3907 * cik_cp_gfx_start - start the gfx ring
3909 * @rdev: radeon_device pointer
3911 * Enables the ring and loads the clear state context and other
3912 * packets required to init the ring.
3913 * Returns 0 for success, error for failure.
3915 static int cik_cp_gfx_start(struct radeon_device *rdev)
3917 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3921 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3922 WREG32(CP_ENDIAN_SWAP, 0);
3923 WREG32(CP_DEVICE_ID, 1);
3925 cik_cp_gfx_enable(rdev, true);
3927 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3929 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3933 /* init the CE partitions. CE only used for gfx on CIK */
3934 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3935 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3936 radeon_ring_write(ring, 0x8000);
3937 radeon_ring_write(ring, 0x8000);
3939 /* setup clear context state */
3940 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3941 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3943 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3944 radeon_ring_write(ring, 0x80000000);
3945 radeon_ring_write(ring, 0x80000000);
3947 for (i = 0; i < cik_default_size; i++)
3948 radeon_ring_write(ring, cik_default_state[i]);
3950 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3951 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3953 /* set clear context state */
3954 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3955 radeon_ring_write(ring, 0);
3957 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3958 radeon_ring_write(ring, 0x00000316);
3959 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3960 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3962 radeon_ring_unlock_commit(rdev, ring, false);
3968 * cik_cp_gfx_fini - stop the gfx ring
3970 * @rdev: radeon_device pointer
3972 * Stop the gfx ring and tear down the driver ring
3975 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3977 cik_cp_gfx_enable(rdev, false);
3978 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3982 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3984 * @rdev: radeon_device pointer
3986 * Program the location and size of the gfx ring buffer
3987 * and test it to make sure it's working.
3988 * Returns 0 for success, error for failure.
3990 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3992 struct radeon_ring *ring;
3998 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3999 if (rdev->family != CHIP_HAWAII)
4000 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4002 /* Set the write pointer delay */
4003 WREG32(CP_RB_WPTR_DELAY, 0);
4005 /* set the RB to use vmid 0 */
4006 WREG32(CP_RB_VMID, 0);
4008 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4010 /* ring 0 - compute and gfx */
4011 /* Set ring buffer size */
4012 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4013 rb_bufsz = order_base_2(ring->ring_size / 8);
4014 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4016 tmp |= BUF_SWAP_32BIT;
4018 WREG32(CP_RB0_CNTL, tmp);
4020 /* Initialize the ring buffer's read and write pointers */
4021 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4023 WREG32(CP_RB0_WPTR, ring->wptr);
4025 /* set the wb address wether it's enabled or not */
4026 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4027 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4029 /* scratch register shadowing is no longer supported */
4030 WREG32(SCRATCH_UMSK, 0);
4032 if (!rdev->wb.enabled)
4033 tmp |= RB_NO_UPDATE;
4036 WREG32(CP_RB0_CNTL, tmp);
4038 rb_addr = ring->gpu_addr >> 8;
4039 WREG32(CP_RB0_BASE, rb_addr);
4040 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4042 /* start the ring */
4043 cik_cp_gfx_start(rdev);
4044 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4045 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4047 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4051 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4052 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4057 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4058 struct radeon_ring *ring)
4062 if (rdev->wb.enabled)
4063 rptr = rdev->wb.wb[ring->rptr_offs/4];
4065 rptr = RREG32(CP_RB0_RPTR);
4070 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4071 struct radeon_ring *ring)
4073 return RREG32(CP_RB0_WPTR);
4076 void cik_gfx_set_wptr(struct radeon_device *rdev,
4077 struct radeon_ring *ring)
4079 WREG32(CP_RB0_WPTR, ring->wptr);
4080 (void)RREG32(CP_RB0_WPTR);
4083 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4084 struct radeon_ring *ring)
4088 if (rdev->wb.enabled) {
4089 rptr = rdev->wb.wb[ring->rptr_offs/4];
4091 mutex_lock(&rdev->srbm_mutex);
4092 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4093 rptr = RREG32(CP_HQD_PQ_RPTR);
4094 cik_srbm_select(rdev, 0, 0, 0, 0);
4095 mutex_unlock(&rdev->srbm_mutex);
4101 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4102 struct radeon_ring *ring)
4106 if (rdev->wb.enabled) {
4107 /* XXX check if swapping is necessary on BE */
4108 wptr = rdev->wb.wb[ring->wptr_offs/4];
4110 mutex_lock(&rdev->srbm_mutex);
4111 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4112 wptr = RREG32(CP_HQD_PQ_WPTR);
4113 cik_srbm_select(rdev, 0, 0, 0, 0);
4114 mutex_unlock(&rdev->srbm_mutex);
4120 void cik_compute_set_wptr(struct radeon_device *rdev,
4121 struct radeon_ring *ring)
4123 /* XXX check if swapping is necessary on BE */
4124 rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4125 WDOORBELL32(ring->doorbell_index, ring->wptr);
4128 static void cik_compute_stop(struct radeon_device *rdev,
4129 struct radeon_ring *ring)
4133 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4134 /* Disable wptr polling. */
4135 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4136 tmp &= ~WPTR_POLL_EN;
4137 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4139 if (RREG32(CP_HQD_ACTIVE) & 1) {
4140 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4141 for (j = 0; j < rdev->usec_timeout; j++) {
4142 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4146 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4147 WREG32(CP_HQD_PQ_RPTR, 0);
4148 WREG32(CP_HQD_PQ_WPTR, 0);
4150 cik_srbm_select(rdev, 0, 0, 0, 0);
4154 * cik_cp_compute_enable - enable/disable the compute CP MEs
4156 * @rdev: radeon_device pointer
4157 * @enable: enable or disable the MEs
4159 * Halts or unhalts the compute MEs.
4161 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4164 WREG32(CP_MEC_CNTL, 0);
4167 * To make hibernation reliable we need to clear compute ring
4168 * configuration before halting the compute ring.
4170 mutex_lock(&rdev->srbm_mutex);
4171 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4172 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4173 mutex_unlock(&rdev->srbm_mutex);
4175 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4176 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4177 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4183 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4185 * @rdev: radeon_device pointer
4187 * Loads the compute MEC1&2 ucode.
4188 * Returns 0 for success, -EINVAL if the ucode is not available.
4190 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4197 cik_cp_compute_enable(rdev, false);
4200 const struct gfx_firmware_header_v1_0 *mec_hdr =
4201 (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4202 const __le32 *fw_data;
4205 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4208 fw_data = (const __le32 *)
4209 (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4210 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4211 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4212 for (i = 0; i < fw_size; i++)
4213 WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4214 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4217 if (rdev->family == CHIP_KAVERI) {
4218 const struct gfx_firmware_header_v1_0 *mec2_hdr =
4219 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4221 fw_data = (const __le32 *)
4222 (rdev->mec2_fw->data +
4223 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4224 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4225 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4226 for (i = 0; i < fw_size; i++)
4227 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4228 WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4231 const __be32 *fw_data;
4234 fw_data = (const __be32 *)rdev->mec_fw->data;
4235 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4236 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4237 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4238 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4240 if (rdev->family == CHIP_KAVERI) {
4242 fw_data = (const __be32 *)rdev->mec_fw->data;
4243 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4244 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4245 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4246 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4254 * cik_cp_compute_start - start the compute queues
4256 * @rdev: radeon_device pointer
4258 * Enable the compute queues.
4259 * Returns 0 for success, error for failure.
4261 static int cik_cp_compute_start(struct radeon_device *rdev)
4263 cik_cp_compute_enable(rdev, true);
4269 * cik_cp_compute_fini - stop the compute queues
4271 * @rdev: radeon_device pointer
4273 * Stop the compute queues and tear down the driver queue
4276 static void cik_cp_compute_fini(struct radeon_device *rdev)
4280 cik_cp_compute_enable(rdev, false);
4282 for (i = 0; i < 2; i++) {
4284 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4286 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4288 if (rdev->ring[idx].mqd_obj) {
4289 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4290 if (unlikely(r != 0))
4291 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4293 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4294 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4296 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4297 rdev->ring[idx].mqd_obj = NULL;
4302 static void cik_mec_fini(struct radeon_device *rdev)
4306 if (rdev->mec.hpd_eop_obj) {
4307 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4308 if (unlikely(r != 0))
4309 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4310 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4311 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4313 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4314 rdev->mec.hpd_eop_obj = NULL;
4318 #define MEC_HPD_SIZE 2048
4320 static int cik_mec_init(struct radeon_device *rdev)
4326 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4327 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4328 * Nonetheless, we assign only 1 pipe because all other pipes will
4331 rdev->mec.num_mec = 1;
4332 rdev->mec.num_pipe = 1;
4333 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4335 if (rdev->mec.hpd_eop_obj == NULL) {
4336 r = radeon_bo_create(rdev,
4337 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4339 RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4340 &rdev->mec.hpd_eop_obj);
4342 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4347 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4348 if (unlikely(r != 0)) {
4352 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4353 &rdev->mec.hpd_eop_gpu_addr);
4355 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4359 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4361 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4366 /* clear memory. Not sure if this is required or not */
4367 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4369 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4370 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4375 struct hqd_registers
4377 u32 cp_mqd_base_addr;
4378 u32 cp_mqd_base_addr_hi;
4381 u32 cp_hqd_persistent_state;
4382 u32 cp_hqd_pipe_priority;
4383 u32 cp_hqd_queue_priority;
4386 u32 cp_hqd_pq_base_hi;
4388 u32 cp_hqd_pq_rptr_report_addr;
4389 u32 cp_hqd_pq_rptr_report_addr_hi;
4390 u32 cp_hqd_pq_wptr_poll_addr;
4391 u32 cp_hqd_pq_wptr_poll_addr_hi;
4392 u32 cp_hqd_pq_doorbell_control;
4394 u32 cp_hqd_pq_control;
4395 u32 cp_hqd_ib_base_addr;
4396 u32 cp_hqd_ib_base_addr_hi;
4398 u32 cp_hqd_ib_control;
4399 u32 cp_hqd_iq_timer;
4401 u32 cp_hqd_dequeue_request;
4402 u32 cp_hqd_dma_offload;
4403 u32 cp_hqd_sema_cmd;
4404 u32 cp_hqd_msg_type;
4405 u32 cp_hqd_atomic0_preop_lo;
4406 u32 cp_hqd_atomic0_preop_hi;
4407 u32 cp_hqd_atomic1_preop_lo;
4408 u32 cp_hqd_atomic1_preop_hi;
4409 u32 cp_hqd_hq_scheduler0;
4410 u32 cp_hqd_hq_scheduler1;
4417 u32 dispatch_initiator;
4421 u32 pipeline_stat_enable;
4422 u32 perf_counter_enable;
4428 u32 resource_limits;
4429 u32 static_thread_mgmt01[2];
4431 u32 static_thread_mgmt23[2];
4433 u32 thread_trace_enable;
4436 u32 vgtcs_invoke_count[2];
4437 struct hqd_registers queue_state;
4439 u32 interrupt_queue[64];
4443 * cik_cp_compute_resume - setup the compute queue registers
4445 * @rdev: radeon_device pointer
4447 * Program the compute queues and test them to make sure they
4449 * Returns 0 for success, error for failure.
4451 static int cik_cp_compute_resume(struct radeon_device *rdev)
4455 bool use_doorbell = true;
4461 struct bonaire_mqd *mqd;
4463 r = cik_cp_compute_start(rdev);
4467 /* fix up chicken bits */
4468 tmp = RREG32(CP_CPF_DEBUG);
4470 WREG32(CP_CPF_DEBUG, tmp);
4472 /* init the pipes */
4473 mutex_lock(&rdev->srbm_mutex);
4475 for (i = 0; i < rdev->mec.num_pipe; ++i) {
4476 cik_srbm_select(rdev, 0, i, 0, 0);
4478 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4479 /* write the EOP addr */
4480 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4481 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4483 /* set the VMID assigned */
4484 WREG32(CP_HPD_EOP_VMID, 0);
4486 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4487 tmp = RREG32(CP_HPD_EOP_CONTROL);
4488 tmp &= ~EOP_SIZE_MASK;
4489 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4490 WREG32(CP_HPD_EOP_CONTROL, tmp);
4493 mutex_unlock(&rdev->srbm_mutex);
4495 /* init the queues. Just two for now. */
4496 for (i = 0; i < 2; i++) {
4498 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4500 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4502 if (rdev->ring[idx].mqd_obj == NULL) {
4503 r = radeon_bo_create(rdev,
4504 sizeof(struct bonaire_mqd),
4506 RADEON_GEM_DOMAIN_GTT, 0, NULL,
4507 NULL, &rdev->ring[idx].mqd_obj);
4509 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4514 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4515 if (unlikely(r != 0)) {
4516 cik_cp_compute_fini(rdev);
4519 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4522 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4523 cik_cp_compute_fini(rdev);
4526 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4528 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4529 cik_cp_compute_fini(rdev);
4533 /* init the mqd struct */
4534 memset(buf, 0, sizeof(struct bonaire_mqd));
4536 mqd = (struct bonaire_mqd *)buf;
4537 mqd->header = 0xC0310800;
4538 mqd->static_thread_mgmt01[0] = 0xffffffff;
4539 mqd->static_thread_mgmt01[1] = 0xffffffff;
4540 mqd->static_thread_mgmt23[0] = 0xffffffff;
4541 mqd->static_thread_mgmt23[1] = 0xffffffff;
4543 mutex_lock(&rdev->srbm_mutex);
4544 cik_srbm_select(rdev, rdev->ring[idx].me,
4545 rdev->ring[idx].pipe,
4546 rdev->ring[idx].queue, 0);
4548 /* disable wptr polling */
4549 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4550 tmp &= ~WPTR_POLL_EN;
4551 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4553 /* enable doorbell? */
4554 mqd->queue_state.cp_hqd_pq_doorbell_control =
4555 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4557 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4559 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4560 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4561 mqd->queue_state.cp_hqd_pq_doorbell_control);
4563 /* disable the queue if it's active */
4564 mqd->queue_state.cp_hqd_dequeue_request = 0;
4565 mqd->queue_state.cp_hqd_pq_rptr = 0;
4566 mqd->queue_state.cp_hqd_pq_wptr= 0;
4567 if (RREG32(CP_HQD_ACTIVE) & 1) {
4568 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4569 for (j = 0; j < rdev->usec_timeout; j++) {
4570 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4574 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4575 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4576 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4579 /* set the pointer to the MQD */
4580 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4581 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4582 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4583 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4584 /* set MQD vmid to 0 */
4585 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4586 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4587 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4589 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4590 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4591 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4592 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4593 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4594 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4596 /* set up the HQD, this is similar to CP_RB0_CNTL */
4597 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4598 mqd->queue_state.cp_hqd_pq_control &=
4599 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4601 mqd->queue_state.cp_hqd_pq_control |=
4602 order_base_2(rdev->ring[idx].ring_size / 8);
4603 mqd->queue_state.cp_hqd_pq_control |=
4604 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4606 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4608 mqd->queue_state.cp_hqd_pq_control &=
4609 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4610 mqd->queue_state.cp_hqd_pq_control |=
4611 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4612 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4614 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4616 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4618 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4619 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4620 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4621 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4622 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4623 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4625 /* set the wb address wether it's enabled or not */
4627 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4629 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4630 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4631 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4632 upper_32_bits(wb_gpu_addr) & 0xffff;
4633 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4634 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4635 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4636 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4638 /* enable the doorbell if requested */
4640 mqd->queue_state.cp_hqd_pq_doorbell_control =
4641 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4642 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4643 mqd->queue_state.cp_hqd_pq_doorbell_control |=
4644 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4645 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4646 mqd->queue_state.cp_hqd_pq_doorbell_control &=
4647 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4650 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4652 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4653 mqd->queue_state.cp_hqd_pq_doorbell_control);
4655 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4656 rdev->ring[idx].wptr = 0;
4657 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4658 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4659 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4661 /* set the vmid for the queue */
4662 mqd->queue_state.cp_hqd_vmid = 0;
4663 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4665 /* activate the queue */
4666 mqd->queue_state.cp_hqd_active = 1;
4667 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4669 cik_srbm_select(rdev, 0, 0, 0, 0);
4670 mutex_unlock(&rdev->srbm_mutex);
4672 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4673 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4675 rdev->ring[idx].ready = true;
4676 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4678 rdev->ring[idx].ready = false;
4684 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4686 cik_cp_gfx_enable(rdev, enable);
4687 cik_cp_compute_enable(rdev, enable);
4690 static int cik_cp_load_microcode(struct radeon_device *rdev)
4694 r = cik_cp_gfx_load_microcode(rdev);
4697 r = cik_cp_compute_load_microcode(rdev);
4704 static void cik_cp_fini(struct radeon_device *rdev)
4706 cik_cp_gfx_fini(rdev);
4707 cik_cp_compute_fini(rdev);
4710 static int cik_cp_resume(struct radeon_device *rdev)
4714 cik_enable_gui_idle_interrupt(rdev, false);
4716 r = cik_cp_load_microcode(rdev);
4720 r = cik_cp_gfx_resume(rdev);
4723 r = cik_cp_compute_resume(rdev);
4727 cik_enable_gui_idle_interrupt(rdev, true);
4732 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4734 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4735 RREG32(GRBM_STATUS));
4736 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4737 RREG32(GRBM_STATUS2));
4738 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4739 RREG32(GRBM_STATUS_SE0));
4740 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4741 RREG32(GRBM_STATUS_SE1));
4742 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4743 RREG32(GRBM_STATUS_SE2));
4744 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4745 RREG32(GRBM_STATUS_SE3));
4746 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4747 RREG32(SRBM_STATUS));
4748 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4749 RREG32(SRBM_STATUS2));
4750 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4751 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4752 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4753 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4754 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4755 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4756 RREG32(CP_STALLED_STAT1));
4757 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4758 RREG32(CP_STALLED_STAT2));
4759 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4760 RREG32(CP_STALLED_STAT3));
4761 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4762 RREG32(CP_CPF_BUSY_STAT));
4763 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4764 RREG32(CP_CPF_STALLED_STAT1));
4765 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4766 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4767 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4768 RREG32(CP_CPC_STALLED_STAT1));
4769 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4773 * cik_gpu_check_soft_reset - check which blocks are busy
4775 * @rdev: radeon_device pointer
4777 * Check which blocks are busy and return the relevant reset
4778 * mask to be used by cik_gpu_soft_reset().
4779 * Returns a mask of the blocks to be reset.
4781 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4787 tmp = RREG32(GRBM_STATUS);
4788 if (tmp & (PA_BUSY | SC_BUSY |
4789 BCI_BUSY | SX_BUSY |
4790 TA_BUSY | VGT_BUSY |
4792 GDS_BUSY | SPI_BUSY |
4793 IA_BUSY | IA_BUSY_NO_DMA))
4794 reset_mask |= RADEON_RESET_GFX;
4796 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4797 reset_mask |= RADEON_RESET_CP;
4800 tmp = RREG32(GRBM_STATUS2);
4802 reset_mask |= RADEON_RESET_RLC;
4804 /* SDMA0_STATUS_REG */
4805 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4806 if (!(tmp & SDMA_IDLE))
4807 reset_mask |= RADEON_RESET_DMA;
4809 /* SDMA1_STATUS_REG */
4810 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4811 if (!(tmp & SDMA_IDLE))
4812 reset_mask |= RADEON_RESET_DMA1;
4815 tmp = RREG32(SRBM_STATUS2);
4816 if (tmp & SDMA_BUSY)
4817 reset_mask |= RADEON_RESET_DMA;
4819 if (tmp & SDMA1_BUSY)
4820 reset_mask |= RADEON_RESET_DMA1;
4823 tmp = RREG32(SRBM_STATUS);
4826 reset_mask |= RADEON_RESET_IH;
4829 reset_mask |= RADEON_RESET_SEM;
4831 if (tmp & GRBM_RQ_PENDING)
4832 reset_mask |= RADEON_RESET_GRBM;
4835 reset_mask |= RADEON_RESET_VMC;
4837 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4838 MCC_BUSY | MCD_BUSY))
4839 reset_mask |= RADEON_RESET_MC;
4841 if (evergreen_is_display_hung(rdev))
4842 reset_mask |= RADEON_RESET_DISPLAY;
4844 /* Skip MC reset as it's mostly likely not hung, just busy */
4845 if (reset_mask & RADEON_RESET_MC) {
4846 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4847 reset_mask &= ~RADEON_RESET_MC;
4854 * cik_gpu_soft_reset - soft reset GPU
4856 * @rdev: radeon_device pointer
4857 * @reset_mask: mask of which blocks to reset
4859 * Soft reset the blocks specified in @reset_mask.
4861 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4863 struct evergreen_mc_save save;
4864 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4867 if (reset_mask == 0)
4870 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4872 cik_print_gpu_status_regs(rdev);
4873 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4874 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4875 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4876 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4885 /* Disable GFX parsing/prefetching */
4886 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4888 /* Disable MEC parsing/prefetching */
4889 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4891 if (reset_mask & RADEON_RESET_DMA) {
4893 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4895 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4897 if (reset_mask & RADEON_RESET_DMA1) {
4899 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4901 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4904 evergreen_mc_stop(rdev, &save);
4905 if (evergreen_mc_wait_for_idle(rdev)) {
4906 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4909 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4910 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4912 if (reset_mask & RADEON_RESET_CP) {
4913 grbm_soft_reset |= SOFT_RESET_CP;
4915 srbm_soft_reset |= SOFT_RESET_GRBM;
4918 if (reset_mask & RADEON_RESET_DMA)
4919 srbm_soft_reset |= SOFT_RESET_SDMA;
4921 if (reset_mask & RADEON_RESET_DMA1)
4922 srbm_soft_reset |= SOFT_RESET_SDMA1;
4924 if (reset_mask & RADEON_RESET_DISPLAY)
4925 srbm_soft_reset |= SOFT_RESET_DC;
4927 if (reset_mask & RADEON_RESET_RLC)
4928 grbm_soft_reset |= SOFT_RESET_RLC;
4930 if (reset_mask & RADEON_RESET_SEM)
4931 srbm_soft_reset |= SOFT_RESET_SEM;
4933 if (reset_mask & RADEON_RESET_IH)
4934 srbm_soft_reset |= SOFT_RESET_IH;
4936 if (reset_mask & RADEON_RESET_GRBM)
4937 srbm_soft_reset |= SOFT_RESET_GRBM;
4939 if (reset_mask & RADEON_RESET_VMC)
4940 srbm_soft_reset |= SOFT_RESET_VMC;
4942 if (!(rdev->flags & RADEON_IS_IGP)) {
4943 if (reset_mask & RADEON_RESET_MC)
4944 srbm_soft_reset |= SOFT_RESET_MC;
4947 if (grbm_soft_reset) {
4948 tmp = RREG32(GRBM_SOFT_RESET);
4949 tmp |= grbm_soft_reset;
4950 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4951 WREG32(GRBM_SOFT_RESET, tmp);
4952 tmp = RREG32(GRBM_SOFT_RESET);
4956 tmp &= ~grbm_soft_reset;
4957 WREG32(GRBM_SOFT_RESET, tmp);
4958 tmp = RREG32(GRBM_SOFT_RESET);
4961 if (srbm_soft_reset) {
4962 tmp = RREG32(SRBM_SOFT_RESET);
4963 tmp |= srbm_soft_reset;
4964 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4965 WREG32(SRBM_SOFT_RESET, tmp);
4966 tmp = RREG32(SRBM_SOFT_RESET);
4970 tmp &= ~srbm_soft_reset;
4971 WREG32(SRBM_SOFT_RESET, tmp);
4972 tmp = RREG32(SRBM_SOFT_RESET);
4975 /* Wait a little for things to settle down */
4978 evergreen_mc_resume(rdev, &save);
4981 cik_print_gpu_status_regs(rdev);
4984 struct kv_reset_save_regs {
4985 u32 gmcon_reng_execute;
4990 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4991 struct kv_reset_save_regs *save)
4993 save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4994 save->gmcon_misc = RREG32(GMCON_MISC);
4995 save->gmcon_misc3 = RREG32(GMCON_MISC3);
4997 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4998 WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4999 STCTRL_STUTTER_EN));
5002 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5003 struct kv_reset_save_regs *save)
5007 WREG32(GMCON_PGFSM_WRITE, 0);
5008 WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5010 for (i = 0; i < 5; i++)
5011 WREG32(GMCON_PGFSM_WRITE, 0);
5013 WREG32(GMCON_PGFSM_WRITE, 0);
5014 WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5016 for (i = 0; i < 5; i++)
5017 WREG32(GMCON_PGFSM_WRITE, 0);
5019 WREG32(GMCON_PGFSM_WRITE, 0x210000);
5020 WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5022 for (i = 0; i < 5; i++)
5023 WREG32(GMCON_PGFSM_WRITE, 0);
5025 WREG32(GMCON_PGFSM_WRITE, 0x21003);
5026 WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5028 for (i = 0; i < 5; i++)
5029 WREG32(GMCON_PGFSM_WRITE, 0);
5031 WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5032 WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5034 for (i = 0; i < 5; i++)
5035 WREG32(GMCON_PGFSM_WRITE, 0);
5037 WREG32(GMCON_PGFSM_WRITE, 0);
5038 WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5040 for (i = 0; i < 5; i++)
5041 WREG32(GMCON_PGFSM_WRITE, 0);
5043 WREG32(GMCON_PGFSM_WRITE, 0x420000);
5044 WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5046 for (i = 0; i < 5; i++)
5047 WREG32(GMCON_PGFSM_WRITE, 0);
5049 WREG32(GMCON_PGFSM_WRITE, 0x120202);
5050 WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5052 for (i = 0; i < 5; i++)
5053 WREG32(GMCON_PGFSM_WRITE, 0);
5055 WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5056 WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5058 for (i = 0; i < 5; i++)
5059 WREG32(GMCON_PGFSM_WRITE, 0);
5061 WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5062 WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5064 for (i = 0; i < 5; i++)
5065 WREG32(GMCON_PGFSM_WRITE, 0);
5067 WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5068 WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5070 WREG32(GMCON_MISC3, save->gmcon_misc3);
5071 WREG32(GMCON_MISC, save->gmcon_misc);
5072 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5075 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5077 struct evergreen_mc_save save;
5078 struct kv_reset_save_regs kv_save = { 0 };
5081 dev_info(rdev->dev, "GPU pci config reset\n");
5089 /* Disable GFX parsing/prefetching */
5090 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5092 /* Disable MEC parsing/prefetching */
5093 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5096 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5098 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5100 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5102 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5103 /* XXX other engines? */
5105 /* halt the rlc, disable cp internal ints */
5110 /* disable mem access */
5111 evergreen_mc_stop(rdev, &save);
5112 if (evergreen_mc_wait_for_idle(rdev)) {
5113 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5116 if (rdev->flags & RADEON_IS_IGP)
5117 kv_save_regs_for_reset(rdev, &kv_save);
5120 pci_clear_master(rdev->pdev);
5122 radeon_pci_config_reset(rdev);
5126 /* wait for asic to come out of reset */
5127 for (i = 0; i < rdev->usec_timeout; i++) {
5128 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5133 /* does asic init need to be run first??? */
5134 if (rdev->flags & RADEON_IS_IGP)
5135 kv_restore_regs_for_reset(rdev, &kv_save);
5139 * cik_asic_reset - soft reset GPU
5141 * @rdev: radeon_device pointer
5142 * @hard: force hard reset
5144 * Look up which blocks are hung and attempt
5146 * Returns 0 for success.
5148 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5153 cik_gpu_pci_config_reset(rdev);
5157 reset_mask = cik_gpu_check_soft_reset(rdev);
5160 r600_set_bios_scratch_engine_hung(rdev, true);
5162 /* try soft reset */
5163 cik_gpu_soft_reset(rdev, reset_mask);
5165 reset_mask = cik_gpu_check_soft_reset(rdev);
5167 /* try pci config reset */
5168 if (reset_mask && radeon_hard_reset)
5169 cik_gpu_pci_config_reset(rdev);
5171 reset_mask = cik_gpu_check_soft_reset(rdev);
5174 r600_set_bios_scratch_engine_hung(rdev, false);
5180 * cik_gfx_is_lockup - check if the 3D engine is locked up
5182 * @rdev: radeon_device pointer
5183 * @ring: radeon_ring structure holding ring information
5185 * Check if the 3D engine is locked up (CIK).
5186 * Returns true if the engine is locked, false if not.
5188 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5190 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5192 if (!(reset_mask & (RADEON_RESET_GFX |
5193 RADEON_RESET_COMPUTE |
5194 RADEON_RESET_CP))) {
5195 radeon_ring_lockup_update(rdev, ring);
5198 return radeon_ring_test_lockup(rdev, ring);
5203 * cik_mc_program - program the GPU memory controller
5205 * @rdev: radeon_device pointer
5207 * Set the location of vram, gart, and AGP in the GPU's
5208 * physical address space (CIK).
5210 static void cik_mc_program(struct radeon_device *rdev)
5212 struct evergreen_mc_save save;
5216 /* Initialize HDP */
5217 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5218 WREG32((0x2c14 + j), 0x00000000);
5219 WREG32((0x2c18 + j), 0x00000000);
5220 WREG32((0x2c1c + j), 0x00000000);
5221 WREG32((0x2c20 + j), 0x00000000);
5222 WREG32((0x2c24 + j), 0x00000000);
5224 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5226 evergreen_mc_stop(rdev, &save);
5227 if (radeon_mc_wait_for_idle(rdev)) {
5228 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5230 /* Lockout access through VGA aperture*/
5231 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5232 /* Update configuration */
5233 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5234 rdev->mc.vram_start >> 12);
5235 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5236 rdev->mc.vram_end >> 12);
5237 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5238 rdev->vram_scratch.gpu_addr >> 12);
5239 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5240 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5241 WREG32(MC_VM_FB_LOCATION, tmp);
5242 /* XXX double check these! */
5243 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5244 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5245 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5246 WREG32(MC_VM_AGP_BASE, 0);
5247 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5248 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5249 if (radeon_mc_wait_for_idle(rdev)) {
5250 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5252 evergreen_mc_resume(rdev, &save);
5253 /* we need to own VRAM, so turn off the VGA renderer here
5254 * to stop it overwriting our objects */
5255 rv515_vga_render_disable(rdev);
5259 * cik_mc_init - initialize the memory controller driver params
5261 * @rdev: radeon_device pointer
5263 * Look up the amount of vram, vram width, and decide how to place
5264 * vram and gart within the GPU's physical address space (CIK).
5265 * Returns 0 for success.
5267 static int cik_mc_init(struct radeon_device *rdev)
5270 int chansize, numchan;
5272 /* Get VRAM informations */
5273 rdev->mc.vram_is_ddr = true;
5274 tmp = RREG32(MC_ARB_RAMCFG);
5275 if (tmp & CHANSIZE_MASK) {
5280 tmp = RREG32(MC_SHARED_CHMAP);
5281 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5311 rdev->mc.vram_width = numchan * chansize;
5312 /* Could aper size report 0 ? */
5313 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5314 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5315 /* size in MB on si */
5316 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5317 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5318 rdev->mc.visible_vram_size = rdev->mc.aper_size;
5319 si_vram_gtt_location(rdev, &rdev->mc);
5320 radeon_update_bandwidth_info(rdev);
5327 * VMID 0 is the physical GPU addresses as used by the kernel.
5328 * VMIDs 1-15 are used for userspace clients and are handled
5329 * by the radeon vm/hsa code.
5332 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5334 * @rdev: radeon_device pointer
5336 * Flush the TLB for the VMID 0 page table (CIK).
5338 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5340 /* flush hdp cache */
5341 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5343 /* bits 0-15 are the VM contexts0-15 */
5344 WREG32(VM_INVALIDATE_REQUEST, 0x1);
5347 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5350 uint32_t sh_mem_bases, sh_mem_config;
5352 sh_mem_bases = 0x6000 | 0x6000 << 16;
5353 sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5354 sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5356 mutex_lock(&rdev->srbm_mutex);
5357 for (i = 8; i < 16; i++) {
5358 cik_srbm_select(rdev, 0, 0, 0, i);
5359 /* CP and shaders */
5360 WREG32(SH_MEM_CONFIG, sh_mem_config);
5361 WREG32(SH_MEM_APE1_BASE, 1);
5362 WREG32(SH_MEM_APE1_LIMIT, 0);
5363 WREG32(SH_MEM_BASES, sh_mem_bases);
5365 cik_srbm_select(rdev, 0, 0, 0, 0);
5366 mutex_unlock(&rdev->srbm_mutex);
5370 * cik_pcie_gart_enable - gart enable
5372 * @rdev: radeon_device pointer
5374 * This sets up the TLBs, programs the page tables for VMID0,
5375 * sets up the hw for VMIDs 1-15 which are allocated on
5376 * demand, and sets up the global locations for the LDS, GDS,
5377 * and GPUVM for FSA64 clients (CIK).
5378 * Returns 0 for success, errors for failure.
5380 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5384 if (rdev->gart.robj == NULL) {
5385 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5388 r = radeon_gart_table_vram_pin(rdev);
5391 /* Setup TLB control */
5392 WREG32(MC_VM_MX_L1_TLB_CNTL,
5395 ENABLE_L1_FRAGMENT_PROCESSING |
5396 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5397 ENABLE_ADVANCED_DRIVER_MODEL |
5398 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5399 /* Setup L2 cache */
5400 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5401 ENABLE_L2_FRAGMENT_PROCESSING |
5402 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5403 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5404 EFFECTIVE_L2_QUEUE_SIZE(7) |
5405 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5406 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5407 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5409 L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5410 /* setup context0 */
5411 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5412 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5413 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5414 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5415 (u32)(rdev->dummy_page.addr >> 12));
5416 WREG32(VM_CONTEXT0_CNTL2, 0);
5417 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5418 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5424 /* restore context1-15 */
5425 /* set vm size, must be a multiple of 4 */
5426 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5427 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5428 for (i = 1; i < 16; i++) {
5430 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5431 rdev->vm_manager.saved_table_addr[i]);
5433 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5434 rdev->vm_manager.saved_table_addr[i]);
5437 /* enable context1-15 */
5438 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5439 (u32)(rdev->dummy_page.addr >> 12));
5440 WREG32(VM_CONTEXT1_CNTL2, 4);
5441 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5442 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5443 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5444 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5445 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5446 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5447 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5448 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5449 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5450 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5451 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5452 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5453 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5454 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5456 if (rdev->family == CHIP_KAVERI) {
5457 u32 tmp = RREG32(CHUB_CONTROL);
5459 WREG32(CHUB_CONTROL, tmp);
5462 /* XXX SH_MEM regs */
5463 /* where to put LDS, scratch, GPUVM in FSA64 space */
5464 mutex_lock(&rdev->srbm_mutex);
5465 for (i = 0; i < 16; i++) {
5466 cik_srbm_select(rdev, 0, 0, 0, i);
5467 /* CP and shaders */
5468 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5469 WREG32(SH_MEM_APE1_BASE, 1);
5470 WREG32(SH_MEM_APE1_LIMIT, 0);
5471 WREG32(SH_MEM_BASES, 0);
5473 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5474 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5475 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5476 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5477 /* XXX SDMA RLC - todo */
5479 cik_srbm_select(rdev, 0, 0, 0, 0);
5480 mutex_unlock(&rdev->srbm_mutex);
5482 cik_pcie_init_compute_vmid(rdev);
5484 cik_pcie_gart_tlb_flush(rdev);
5485 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5486 (unsigned)(rdev->mc.gtt_size >> 20),
5487 (unsigned long long)rdev->gart.table_addr);
5488 rdev->gart.ready = true;
5493 * cik_pcie_gart_disable - gart disable
5495 * @rdev: radeon_device pointer
5497 * This disables all VM page table (CIK).
5499 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5503 for (i = 1; i < 16; ++i) {
5506 reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5508 reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5509 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5512 /* Disable all tables */
5513 WREG32(VM_CONTEXT0_CNTL, 0);
5514 WREG32(VM_CONTEXT1_CNTL, 0);
5515 /* Setup TLB control */
5516 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5517 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5518 /* Setup L2 cache */
5520 ENABLE_L2_FRAGMENT_PROCESSING |
5521 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5522 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5523 EFFECTIVE_L2_QUEUE_SIZE(7) |
5524 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5525 WREG32(VM_L2_CNTL2, 0);
5526 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5527 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5528 radeon_gart_table_vram_unpin(rdev);
5532 * cik_pcie_gart_fini - vm fini callback
5534 * @rdev: radeon_device pointer
5536 * Tears down the driver GART/VM setup (CIK).
5538 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5540 cik_pcie_gart_disable(rdev);
5541 radeon_gart_table_vram_free(rdev);
5542 radeon_gart_fini(rdev);
5547 * cik_ib_parse - vm ib_parse callback
5549 * @rdev: radeon_device pointer
5550 * @ib: indirect buffer pointer
5552 * CIK uses hw IB checking so this is a nop (CIK).
5554 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5561 * VMID 0 is the physical GPU addresses as used by the kernel.
5562 * VMIDs 1-15 are used for userspace clients and are handled
5563 * by the radeon vm/hsa code.
5566 * cik_vm_init - cik vm init callback
5568 * @rdev: radeon_device pointer
5570 * Inits cik specific vm parameters (number of VMs, base of vram for
5571 * VMIDs 1-15) (CIK).
5572 * Returns 0 for success.
5574 int cik_vm_init(struct radeon_device *rdev)
5578 * VMID 0 is reserved for System
5579 * radeon graphics/compute will use VMIDs 1-7
5580 * amdkfd will use VMIDs 8-15
5582 rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5583 /* base offset of vram pages */
5584 if (rdev->flags & RADEON_IS_IGP) {
5585 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5587 rdev->vm_manager.vram_base_offset = tmp;
5589 rdev->vm_manager.vram_base_offset = 0;
5595 * cik_vm_fini - cik vm fini callback
5597 * @rdev: radeon_device pointer
5599 * Tear down any asic specific VM setup (CIK).
5601 void cik_vm_fini(struct radeon_device *rdev)
5606 * cik_vm_decode_fault - print human readable fault info
5608 * @rdev: radeon_device pointer
5609 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5610 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5612 * Print human readable fault information (CIK).
5614 static void cik_vm_decode_fault(struct radeon_device *rdev,
5615 u32 status, u32 addr, u32 mc_client)
5618 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5619 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5620 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5621 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5623 if (rdev->family == CHIP_HAWAII)
5624 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5626 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5628 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5629 protections, vmid, addr,
5630 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5631 block, mc_client, mc_id);
5635 * cik_vm_flush - cik vm flush using the CP
5637 * @rdev: radeon_device pointer
5639 * Update the page table base and flush the VM TLB
5640 * using the CP (CIK).
5642 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5643 unsigned vm_id, uint64_t pd_addr)
5645 int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5647 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5648 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5649 WRITE_DATA_DST_SEL(0)));
5651 radeon_ring_write(ring,
5652 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5654 radeon_ring_write(ring,
5655 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5657 radeon_ring_write(ring, 0);
5658 radeon_ring_write(ring, pd_addr >> 12);
5660 /* update SH_MEM_* regs */
5661 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5662 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5663 WRITE_DATA_DST_SEL(0)));
5664 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5665 radeon_ring_write(ring, 0);
5666 radeon_ring_write(ring, VMID(vm_id));
5668 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5669 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5670 WRITE_DATA_DST_SEL(0)));
5671 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5672 radeon_ring_write(ring, 0);
5674 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5675 radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5676 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5677 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5679 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5680 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5681 WRITE_DATA_DST_SEL(0)));
5682 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5683 radeon_ring_write(ring, 0);
5684 radeon_ring_write(ring, VMID(0));
5687 cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5689 /* bits 0-15 are the VM contexts0-15 */
5690 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5691 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5692 WRITE_DATA_DST_SEL(0)));
5693 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5694 radeon_ring_write(ring, 0);
5695 radeon_ring_write(ring, 1 << vm_id);
5697 /* wait for the invalidate to complete */
5698 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5699 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5700 WAIT_REG_MEM_FUNCTION(0) | /* always */
5701 WAIT_REG_MEM_ENGINE(0))); /* me */
5702 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5703 radeon_ring_write(ring, 0);
5704 radeon_ring_write(ring, 0); /* ref */
5705 radeon_ring_write(ring, 0); /* mask */
5706 radeon_ring_write(ring, 0x20); /* poll interval */
5708 /* compute doesn't have PFP */
5710 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5711 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5712 radeon_ring_write(ring, 0x0);
5718 * The RLC is a multi-purpose microengine that handles a
5719 * variety of functions, the most important of which is
5720 * the interrupt controller.
5722 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5725 u32 tmp = RREG32(CP_INT_CNTL_RING0);
5728 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5730 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5731 WREG32(CP_INT_CNTL_RING0, tmp);
5734 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5738 tmp = RREG32(RLC_LB_CNTL);
5740 tmp |= LOAD_BALANCE_ENABLE;
5742 tmp &= ~LOAD_BALANCE_ENABLE;
5743 WREG32(RLC_LB_CNTL, tmp);
5746 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5751 mutex_lock(&rdev->grbm_idx_mutex);
5752 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5753 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5754 cik_select_se_sh(rdev, i, j);
5755 for (k = 0; k < rdev->usec_timeout; k++) {
5756 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5762 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5763 mutex_unlock(&rdev->grbm_idx_mutex);
5765 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5766 for (k = 0; k < rdev->usec_timeout; k++) {
5767 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5773 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5777 tmp = RREG32(RLC_CNTL);
5779 WREG32(RLC_CNTL, rlc);
5782 static u32 cik_halt_rlc(struct radeon_device *rdev)
5786 orig = data = RREG32(RLC_CNTL);
5788 if (data & RLC_ENABLE) {
5791 data &= ~RLC_ENABLE;
5792 WREG32(RLC_CNTL, data);
5794 for (i = 0; i < rdev->usec_timeout; i++) {
5795 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5800 cik_wait_for_rlc_serdes(rdev);
5806 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5810 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5811 WREG32(RLC_GPR_REG2, tmp);
5813 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5814 for (i = 0; i < rdev->usec_timeout; i++) {
5815 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5820 for (i = 0; i < rdev->usec_timeout; i++) {
5821 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5827 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5831 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5832 WREG32(RLC_GPR_REG2, tmp);
5836 * cik_rlc_stop - stop the RLC ME
5838 * @rdev: radeon_device pointer
5840 * Halt the RLC ME (MicroEngine) (CIK).
5842 static void cik_rlc_stop(struct radeon_device *rdev)
5844 WREG32(RLC_CNTL, 0);
5846 cik_enable_gui_idle_interrupt(rdev, false);
5848 cik_wait_for_rlc_serdes(rdev);
5852 * cik_rlc_start - start the RLC ME
5854 * @rdev: radeon_device pointer
5856 * Unhalt the RLC ME (MicroEngine) (CIK).
5858 static void cik_rlc_start(struct radeon_device *rdev)
5860 WREG32(RLC_CNTL, RLC_ENABLE);
5862 cik_enable_gui_idle_interrupt(rdev, true);
5868 * cik_rlc_resume - setup the RLC hw
5870 * @rdev: radeon_device pointer
5872 * Initialize the RLC registers, load the ucode,
5873 * and start the RLC (CIK).
5874 * Returns 0 for success, -EINVAL if the ucode is not available.
5876 static int cik_rlc_resume(struct radeon_device *rdev)
5886 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5887 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5895 WREG32(RLC_LB_CNTR_INIT, 0);
5896 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5898 mutex_lock(&rdev->grbm_idx_mutex);
5899 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5900 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5901 WREG32(RLC_LB_PARAMS, 0x00600408);
5902 WREG32(RLC_LB_CNTL, 0x80000004);
5903 mutex_unlock(&rdev->grbm_idx_mutex);
5905 WREG32(RLC_MC_CNTL, 0);
5906 WREG32(RLC_UCODE_CNTL, 0);
5909 const struct rlc_firmware_header_v1_0 *hdr =
5910 (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5911 const __le32 *fw_data = (const __le32 *)
5912 (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5914 radeon_ucode_print_rlc_hdr(&hdr->header);
5916 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5917 WREG32(RLC_GPM_UCODE_ADDR, 0);
5918 for (i = 0; i < size; i++)
5919 WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5920 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5922 const __be32 *fw_data;
5924 switch (rdev->family) {
5928 size = BONAIRE_RLC_UCODE_SIZE;
5931 size = KV_RLC_UCODE_SIZE;
5934 size = KB_RLC_UCODE_SIZE;
5937 size = ML_RLC_UCODE_SIZE;
5941 fw_data = (const __be32 *)rdev->rlc_fw->data;
5942 WREG32(RLC_GPM_UCODE_ADDR, 0);
5943 for (i = 0; i < size; i++)
5944 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5945 WREG32(RLC_GPM_UCODE_ADDR, 0);
5948 /* XXX - find out what chips support lbpw */
5949 cik_enable_lbpw(rdev, false);
5951 if (rdev->family == CHIP_BONAIRE)
5952 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5954 cik_rlc_start(rdev);
5959 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5961 u32 data, orig, tmp, tmp2;
5963 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5965 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5966 cik_enable_gui_idle_interrupt(rdev, true);
5968 tmp = cik_halt_rlc(rdev);
5970 mutex_lock(&rdev->grbm_idx_mutex);
5971 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5972 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5973 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5974 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5975 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5976 mutex_unlock(&rdev->grbm_idx_mutex);
5978 cik_update_rlc(rdev, tmp);
5980 data |= CGCG_EN | CGLS_EN;
5982 cik_enable_gui_idle_interrupt(rdev, false);
5984 RREG32(CB_CGTT_SCLK_CTRL);
5985 RREG32(CB_CGTT_SCLK_CTRL);
5986 RREG32(CB_CGTT_SCLK_CTRL);
5987 RREG32(CB_CGTT_SCLK_CTRL);
5989 data &= ~(CGCG_EN | CGLS_EN);
5993 WREG32(RLC_CGCG_CGLS_CTRL, data);
5997 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5999 u32 data, orig, tmp = 0;
6001 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6002 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6003 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6004 orig = data = RREG32(CP_MEM_SLP_CNTL);
6005 data |= CP_MEM_LS_EN;
6007 WREG32(CP_MEM_SLP_CNTL, data);
6011 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6015 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6017 tmp = cik_halt_rlc(rdev);
6019 mutex_lock(&rdev->grbm_idx_mutex);
6020 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6021 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6022 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6023 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6024 WREG32(RLC_SERDES_WR_CTRL, data);
6025 mutex_unlock(&rdev->grbm_idx_mutex);
6027 cik_update_rlc(rdev, tmp);
6029 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6030 orig = data = RREG32(CGTS_SM_CTRL_REG);
6031 data &= ~SM_MODE_MASK;
6032 data |= SM_MODE(0x2);
6033 data |= SM_MODE_ENABLE;
6034 data &= ~CGTS_OVERRIDE;
6035 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6036 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6037 data &= ~CGTS_LS_OVERRIDE;
6038 data &= ~ON_MONITOR_ADD_MASK;
6039 data |= ON_MONITOR_ADD_EN;
6040 data |= ON_MONITOR_ADD(0x96);
6042 WREG32(CGTS_SM_CTRL_REG, data);
6045 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6048 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6050 data = RREG32(RLC_MEM_SLP_CNTL);
6051 if (data & RLC_MEM_LS_EN) {
6052 data &= ~RLC_MEM_LS_EN;
6053 WREG32(RLC_MEM_SLP_CNTL, data);
6056 data = RREG32(CP_MEM_SLP_CNTL);
6057 if (data & CP_MEM_LS_EN) {
6058 data &= ~CP_MEM_LS_EN;
6059 WREG32(CP_MEM_SLP_CNTL, data);
6062 orig = data = RREG32(CGTS_SM_CTRL_REG);
6063 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6065 WREG32(CGTS_SM_CTRL_REG, data);
6067 tmp = cik_halt_rlc(rdev);
6069 mutex_lock(&rdev->grbm_idx_mutex);
6070 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6071 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6072 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6073 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6074 WREG32(RLC_SERDES_WR_CTRL, data);
6075 mutex_unlock(&rdev->grbm_idx_mutex);
6077 cik_update_rlc(rdev, tmp);
6081 static const u32 mc_cg_registers[] =
6094 static void cik_enable_mc_ls(struct radeon_device *rdev,
6100 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6101 orig = data = RREG32(mc_cg_registers[i]);
6102 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6103 data |= MC_LS_ENABLE;
6105 data &= ~MC_LS_ENABLE;
6107 WREG32(mc_cg_registers[i], data);
6111 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6117 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6118 orig = data = RREG32(mc_cg_registers[i]);
6119 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6120 data |= MC_CG_ENABLE;
6122 data &= ~MC_CG_ENABLE;
6124 WREG32(mc_cg_registers[i], data);
6128 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6133 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6134 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6135 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6137 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6140 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6142 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6145 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6149 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6154 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6155 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6158 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6160 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6163 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6165 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6168 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6170 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6173 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6177 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6182 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6183 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6185 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6187 orig = data = RREG32(UVD_CGC_CTRL);
6190 WREG32(UVD_CGC_CTRL, data);
6192 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6194 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6196 orig = data = RREG32(UVD_CGC_CTRL);
6199 WREG32(UVD_CGC_CTRL, data);
6203 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6208 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6210 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6211 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6212 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6214 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6215 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6218 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6221 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6226 orig = data = RREG32(HDP_HOST_PATH_CNTL);
6228 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6229 data &= ~CLOCK_GATING_DIS;
6231 data |= CLOCK_GATING_DIS;
6234 WREG32(HDP_HOST_PATH_CNTL, data);
6237 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6242 orig = data = RREG32(HDP_MEM_POWER_LS);
6244 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6245 data |= HDP_LS_ENABLE;
6247 data &= ~HDP_LS_ENABLE;
6250 WREG32(HDP_MEM_POWER_LS, data);
6253 void cik_update_cg(struct radeon_device *rdev,
6254 u32 block, bool enable)
6257 if (block & RADEON_CG_BLOCK_GFX) {
6258 cik_enable_gui_idle_interrupt(rdev, false);
6259 /* order matters! */
6261 cik_enable_mgcg(rdev, true);
6262 cik_enable_cgcg(rdev, true);
6264 cik_enable_cgcg(rdev, false);
6265 cik_enable_mgcg(rdev, false);
6267 cik_enable_gui_idle_interrupt(rdev, true);
6270 if (block & RADEON_CG_BLOCK_MC) {
6271 if (!(rdev->flags & RADEON_IS_IGP)) {
6272 cik_enable_mc_mgcg(rdev, enable);
6273 cik_enable_mc_ls(rdev, enable);
6277 if (block & RADEON_CG_BLOCK_SDMA) {
6278 cik_enable_sdma_mgcg(rdev, enable);
6279 cik_enable_sdma_mgls(rdev, enable);
6282 if (block & RADEON_CG_BLOCK_BIF) {
6283 cik_enable_bif_mgls(rdev, enable);
6286 if (block & RADEON_CG_BLOCK_UVD) {
6288 cik_enable_uvd_mgcg(rdev, enable);
6291 if (block & RADEON_CG_BLOCK_HDP) {
6292 cik_enable_hdp_mgcg(rdev, enable);
6293 cik_enable_hdp_ls(rdev, enable);
6296 if (block & RADEON_CG_BLOCK_VCE) {
6297 vce_v2_0_enable_mgcg(rdev, enable);
6301 static void cik_init_cg(struct radeon_device *rdev)
6304 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6307 si_init_uvd_internal_cg(rdev);
6309 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6310 RADEON_CG_BLOCK_SDMA |
6311 RADEON_CG_BLOCK_BIF |
6312 RADEON_CG_BLOCK_UVD |
6313 RADEON_CG_BLOCK_HDP), true);
6316 static void cik_fini_cg(struct radeon_device *rdev)
6318 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6319 RADEON_CG_BLOCK_SDMA |
6320 RADEON_CG_BLOCK_BIF |
6321 RADEON_CG_BLOCK_UVD |
6322 RADEON_CG_BLOCK_HDP), false);
6324 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6327 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6332 orig = data = RREG32(RLC_PG_CNTL);
6333 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6334 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6336 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6338 WREG32(RLC_PG_CNTL, data);
6341 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6346 orig = data = RREG32(RLC_PG_CNTL);
6347 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6348 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6350 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6352 WREG32(RLC_PG_CNTL, data);
6355 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6359 orig = data = RREG32(RLC_PG_CNTL);
6360 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6361 data &= ~DISABLE_CP_PG;
6363 data |= DISABLE_CP_PG;
6365 WREG32(RLC_PG_CNTL, data);
6368 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6372 orig = data = RREG32(RLC_PG_CNTL);
6373 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6374 data &= ~DISABLE_GDS_PG;
6376 data |= DISABLE_GDS_PG;
6378 WREG32(RLC_PG_CNTL, data);
6381 #define CP_ME_TABLE_SIZE 96
6382 #define CP_ME_TABLE_OFFSET 2048
6383 #define CP_MEC_TABLE_OFFSET 4096
6385 void cik_init_cp_pg_table(struct radeon_device *rdev)
6387 volatile u32 *dst_ptr;
6388 int me, i, max_me = 4;
6390 u32 table_offset, table_size;
6392 if (rdev->family == CHIP_KAVERI)
6395 if (rdev->rlc.cp_table_ptr == NULL)
6398 /* write the cp table buffer */
6399 dst_ptr = rdev->rlc.cp_table_ptr;
6400 for (me = 0; me < max_me; me++) {
6402 const __le32 *fw_data;
6403 const struct gfx_firmware_header_v1_0 *hdr;
6406 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6407 fw_data = (const __le32 *)
6408 (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6409 table_offset = le32_to_cpu(hdr->jt_offset);
6410 table_size = le32_to_cpu(hdr->jt_size);
6411 } else if (me == 1) {
6412 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6413 fw_data = (const __le32 *)
6414 (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6415 table_offset = le32_to_cpu(hdr->jt_offset);
6416 table_size = le32_to_cpu(hdr->jt_size);
6417 } else if (me == 2) {
6418 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6419 fw_data = (const __le32 *)
6420 (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6421 table_offset = le32_to_cpu(hdr->jt_offset);
6422 table_size = le32_to_cpu(hdr->jt_size);
6423 } else if (me == 3) {
6424 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6425 fw_data = (const __le32 *)
6426 (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6427 table_offset = le32_to_cpu(hdr->jt_offset);
6428 table_size = le32_to_cpu(hdr->jt_size);
6430 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6431 fw_data = (const __le32 *)
6432 (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6433 table_offset = le32_to_cpu(hdr->jt_offset);
6434 table_size = le32_to_cpu(hdr->jt_size);
6437 for (i = 0; i < table_size; i ++) {
6438 dst_ptr[bo_offset + i] =
6439 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6441 bo_offset += table_size;
6443 const __be32 *fw_data;
6444 table_size = CP_ME_TABLE_SIZE;
6447 fw_data = (const __be32 *)rdev->ce_fw->data;
6448 table_offset = CP_ME_TABLE_OFFSET;
6449 } else if (me == 1) {
6450 fw_data = (const __be32 *)rdev->pfp_fw->data;
6451 table_offset = CP_ME_TABLE_OFFSET;
6452 } else if (me == 2) {
6453 fw_data = (const __be32 *)rdev->me_fw->data;
6454 table_offset = CP_ME_TABLE_OFFSET;
6456 fw_data = (const __be32 *)rdev->mec_fw->data;
6457 table_offset = CP_MEC_TABLE_OFFSET;
6460 for (i = 0; i < table_size; i ++) {
6461 dst_ptr[bo_offset + i] =
6462 cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6464 bo_offset += table_size;
6469 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6474 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6475 orig = data = RREG32(RLC_PG_CNTL);
6476 data |= GFX_PG_ENABLE;
6478 WREG32(RLC_PG_CNTL, data);
6480 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6483 WREG32(RLC_AUTO_PG_CTRL, data);
6485 orig = data = RREG32(RLC_PG_CNTL);
6486 data &= ~GFX_PG_ENABLE;
6488 WREG32(RLC_PG_CNTL, data);
6490 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6491 data &= ~AUTO_PG_EN;
6493 WREG32(RLC_AUTO_PG_CTRL, data);
6495 data = RREG32(DB_RENDER_CONTROL);
6499 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6501 u32 mask = 0, tmp, tmp1;
6504 mutex_lock(&rdev->grbm_idx_mutex);
6505 cik_select_se_sh(rdev, se, sh);
6506 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6507 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6508 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6509 mutex_unlock(&rdev->grbm_idx_mutex);
6516 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6521 return (~tmp) & mask;
6524 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6526 u32 i, j, k, active_cu_number = 0;
6527 u32 mask, counter, cu_bitmap;
6530 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6531 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6535 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6536 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6544 active_cu_number += counter;
6545 tmp |= (cu_bitmap << (i * 16 + j * 8));
6549 WREG32(RLC_PG_AO_CU_MASK, tmp);
6551 tmp = RREG32(RLC_MAX_PG_CU);
6552 tmp &= ~MAX_PU_CU_MASK;
6553 tmp |= MAX_PU_CU(active_cu_number);
6554 WREG32(RLC_MAX_PG_CU, tmp);
6557 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6562 orig = data = RREG32(RLC_PG_CNTL);
6563 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6564 data |= STATIC_PER_CU_PG_ENABLE;
6566 data &= ~STATIC_PER_CU_PG_ENABLE;
6568 WREG32(RLC_PG_CNTL, data);
6571 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6576 orig = data = RREG32(RLC_PG_CNTL);
6577 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6578 data |= DYN_PER_CU_PG_ENABLE;
6580 data &= ~DYN_PER_CU_PG_ENABLE;
6582 WREG32(RLC_PG_CNTL, data);
6585 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6586 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6588 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6593 if (rdev->rlc.cs_data) {
6594 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6595 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6596 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6597 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6599 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6600 for (i = 0; i < 3; i++)
6601 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6603 if (rdev->rlc.reg_list) {
6604 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6605 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6606 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6609 orig = data = RREG32(RLC_PG_CNTL);
6612 WREG32(RLC_PG_CNTL, data);
6614 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6615 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6617 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6618 data &= ~IDLE_POLL_COUNT_MASK;
6619 data |= IDLE_POLL_COUNT(0x60);
6620 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6623 WREG32(RLC_PG_DELAY, data);
6625 data = RREG32(RLC_PG_DELAY_2);
6628 WREG32(RLC_PG_DELAY_2, data);
6630 data = RREG32(RLC_AUTO_PG_CTRL);
6631 data &= ~GRBM_REG_SGIT_MASK;
6632 data |= GRBM_REG_SGIT(0x700);
6633 WREG32(RLC_AUTO_PG_CTRL, data);
6637 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6639 cik_enable_gfx_cgpg(rdev, enable);
6640 cik_enable_gfx_static_mgpg(rdev, enable);
6641 cik_enable_gfx_dynamic_mgpg(rdev, enable);
6644 u32 cik_get_csb_size(struct radeon_device *rdev)
6647 const struct cs_section_def *sect = NULL;
6648 const struct cs_extent_def *ext = NULL;
6650 if (rdev->rlc.cs_data == NULL)
6653 /* begin clear state */
6655 /* context control state */
6658 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6659 for (ext = sect->section; ext->extent != NULL; ++ext) {
6660 if (sect->id == SECT_CONTEXT)
6661 count += 2 + ext->reg_count;
6666 /* pa_sc_raster_config/pa_sc_raster_config1 */
6668 /* end clear state */
6676 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6679 const struct cs_section_def *sect = NULL;
6680 const struct cs_extent_def *ext = NULL;
6682 if (rdev->rlc.cs_data == NULL)
6687 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6688 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6690 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6691 buffer[count++] = cpu_to_le32(0x80000000);
6692 buffer[count++] = cpu_to_le32(0x80000000);
6694 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6695 for (ext = sect->section; ext->extent != NULL; ++ext) {
6696 if (sect->id == SECT_CONTEXT) {
6698 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6699 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6700 for (i = 0; i < ext->reg_count; i++)
6701 buffer[count++] = cpu_to_le32(ext->extent[i]);
6708 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6709 buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6710 switch (rdev->family) {
6712 buffer[count++] = cpu_to_le32(0x16000012);
6713 buffer[count++] = cpu_to_le32(0x00000000);
6716 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6717 buffer[count++] = cpu_to_le32(0x00000000);
6721 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6722 buffer[count++] = cpu_to_le32(0x00000000);
6725 buffer[count++] = cpu_to_le32(0x3a00161a);
6726 buffer[count++] = cpu_to_le32(0x0000002e);
6729 buffer[count++] = cpu_to_le32(0x00000000);
6730 buffer[count++] = cpu_to_le32(0x00000000);
6734 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6735 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6737 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6738 buffer[count++] = cpu_to_le32(0);
6741 static void cik_init_pg(struct radeon_device *rdev)
6743 if (rdev->pg_flags) {
6744 cik_enable_sck_slowdown_on_pu(rdev, true);
6745 cik_enable_sck_slowdown_on_pd(rdev, true);
6746 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6747 cik_init_gfx_cgpg(rdev);
6748 cik_enable_cp_pg(rdev, true);
6749 cik_enable_gds_pg(rdev, true);
6751 cik_init_ao_cu_mask(rdev);
6752 cik_update_gfx_pg(rdev, true);
6756 static void cik_fini_pg(struct radeon_device *rdev)
6758 if (rdev->pg_flags) {
6759 cik_update_gfx_pg(rdev, false);
6760 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6761 cik_enable_cp_pg(rdev, false);
6762 cik_enable_gds_pg(rdev, false);
6769 * Starting with r6xx, interrupts are handled via a ring buffer.
6770 * Ring buffers are areas of GPU accessible memory that the GPU
6771 * writes interrupt vectors into and the host reads vectors out of.
6772 * There is a rptr (read pointer) that determines where the
6773 * host is currently reading, and a wptr (write pointer)
6774 * which determines where the GPU has written. When the
6775 * pointers are equal, the ring is idle. When the GPU
6776 * writes vectors to the ring buffer, it increments the
6777 * wptr. When there is an interrupt, the host then starts
6778 * fetching commands and processing them until the pointers are
6779 * equal again at which point it updates the rptr.
6783 * cik_enable_interrupts - Enable the interrupt ring buffer
6785 * @rdev: radeon_device pointer
6787 * Enable the interrupt ring buffer (CIK).
6789 static void cik_enable_interrupts(struct radeon_device *rdev)
6791 u32 ih_cntl = RREG32(IH_CNTL);
6792 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6794 ih_cntl |= ENABLE_INTR;
6795 ih_rb_cntl |= IH_RB_ENABLE;
6796 WREG32(IH_CNTL, ih_cntl);
6797 WREG32(IH_RB_CNTL, ih_rb_cntl);
6798 rdev->ih.enabled = true;
6802 * cik_disable_interrupts - Disable the interrupt ring buffer
6804 * @rdev: radeon_device pointer
6806 * Disable the interrupt ring buffer (CIK).
6808 static void cik_disable_interrupts(struct radeon_device *rdev)
6810 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6811 u32 ih_cntl = RREG32(IH_CNTL);
6813 ih_rb_cntl &= ~IH_RB_ENABLE;
6814 ih_cntl &= ~ENABLE_INTR;
6815 WREG32(IH_RB_CNTL, ih_rb_cntl);
6816 WREG32(IH_CNTL, ih_cntl);
6817 /* set rptr, wptr to 0 */
6818 WREG32(IH_RB_RPTR, 0);
6819 WREG32(IH_RB_WPTR, 0);
6820 rdev->ih.enabled = false;
6825 * cik_disable_interrupt_state - Disable all interrupt sources
6827 * @rdev: radeon_device pointer
6829 * Clear all interrupt enable bits used by the driver (CIK).
6831 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6836 tmp = RREG32(CP_INT_CNTL_RING0) &
6837 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6838 WREG32(CP_INT_CNTL_RING0, tmp);
6840 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6841 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6842 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6843 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6844 /* compute queues */
6845 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6846 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6847 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6848 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6849 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6850 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6851 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6852 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6854 WREG32(GRBM_INT_CNTL, 0);
6856 WREG32(SRBM_INT_CNTL, 0);
6857 /* vline/vblank, etc. */
6858 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6859 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6860 if (rdev->num_crtc >= 4) {
6861 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6862 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6864 if (rdev->num_crtc >= 6) {
6865 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6866 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6869 if (rdev->num_crtc >= 2) {
6870 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6871 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6873 if (rdev->num_crtc >= 4) {
6874 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6875 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6877 if (rdev->num_crtc >= 6) {
6878 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6879 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6883 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6885 /* digital hotplug */
6886 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6887 WREG32(DC_HPD1_INT_CONTROL, tmp);
6888 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6889 WREG32(DC_HPD2_INT_CONTROL, tmp);
6890 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6891 WREG32(DC_HPD3_INT_CONTROL, tmp);
6892 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6893 WREG32(DC_HPD4_INT_CONTROL, tmp);
6894 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6895 WREG32(DC_HPD5_INT_CONTROL, tmp);
6896 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6897 WREG32(DC_HPD6_INT_CONTROL, tmp);
6902 * cik_irq_init - init and enable the interrupt ring
6904 * @rdev: radeon_device pointer
6906 * Allocate a ring buffer for the interrupt controller,
6907 * enable the RLC, disable interrupts, enable the IH
6908 * ring buffer and enable it (CIK).
6909 * Called at device load and reume.
6910 * Returns 0 for success, errors for failure.
6912 static int cik_irq_init(struct radeon_device *rdev)
6916 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6919 ret = r600_ih_ring_alloc(rdev);
6924 cik_disable_interrupts(rdev);
6927 ret = cik_rlc_resume(rdev);
6929 r600_ih_ring_fini(rdev);
6933 /* setup interrupt control */
6934 /* set dummy read address to dummy page address */
6935 WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6936 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6937 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6938 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6940 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6941 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6942 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6943 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6945 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6946 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6948 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6949 IH_WPTR_OVERFLOW_CLEAR |
6952 if (rdev->wb.enabled)
6953 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6955 /* set the writeback address whether it's enabled or not */
6956 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6957 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6959 WREG32(IH_RB_CNTL, ih_rb_cntl);
6961 /* set rptr, wptr to 0 */
6962 WREG32(IH_RB_RPTR, 0);
6963 WREG32(IH_RB_WPTR, 0);
6965 /* Default settings for IH_CNTL (disabled at first) */
6966 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6967 /* RPTR_REARM only works if msi's are enabled */
6968 if (rdev->msi_enabled)
6969 ih_cntl |= RPTR_REARM;
6970 WREG32(IH_CNTL, ih_cntl);
6972 /* force the active interrupt state to all disabled */
6973 cik_disable_interrupt_state(rdev);
6975 pci_set_master(rdev->pdev);
6978 cik_enable_interrupts(rdev);
6984 * cik_irq_set - enable/disable interrupt sources
6986 * @rdev: radeon_device pointer
6988 * Enable interrupt sources on the GPU (vblanks, hpd,
6990 * Returns 0 for success, errors for failure.
6992 int cik_irq_set(struct radeon_device *rdev)
6996 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6997 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6998 u32 grbm_int_cntl = 0;
6999 u32 dma_cntl, dma_cntl1;
7001 if (!rdev->irq.installed) {
7002 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7005 /* don't enable anything if the ih is disabled */
7006 if (!rdev->ih.enabled) {
7007 cik_disable_interrupts(rdev);
7008 /* force the active interrupt state to all disabled */
7009 cik_disable_interrupt_state(rdev);
7013 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7014 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7015 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7017 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7018 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7019 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7020 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7021 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7022 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7024 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7025 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7027 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7029 /* enable CP interrupts on all rings */
7030 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7031 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7032 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7034 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7035 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7036 DRM_DEBUG("si_irq_set: sw int cp1\n");
7037 if (ring->me == 1) {
7038 switch (ring->pipe) {
7040 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7043 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7047 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7050 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7051 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7052 DRM_DEBUG("si_irq_set: sw int cp2\n");
7053 if (ring->me == 1) {
7054 switch (ring->pipe) {
7056 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7059 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7063 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7067 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7068 DRM_DEBUG("cik_irq_set: sw int dma\n");
7069 dma_cntl |= TRAP_ENABLE;
7072 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7073 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7074 dma_cntl1 |= TRAP_ENABLE;
7077 if (rdev->irq.crtc_vblank_int[0] ||
7078 atomic_read(&rdev->irq.pflip[0])) {
7079 DRM_DEBUG("cik_irq_set: vblank 0\n");
7080 crtc1 |= VBLANK_INTERRUPT_MASK;
7082 if (rdev->irq.crtc_vblank_int[1] ||
7083 atomic_read(&rdev->irq.pflip[1])) {
7084 DRM_DEBUG("cik_irq_set: vblank 1\n");
7085 crtc2 |= VBLANK_INTERRUPT_MASK;
7087 if (rdev->irq.crtc_vblank_int[2] ||
7088 atomic_read(&rdev->irq.pflip[2])) {
7089 DRM_DEBUG("cik_irq_set: vblank 2\n");
7090 crtc3 |= VBLANK_INTERRUPT_MASK;
7092 if (rdev->irq.crtc_vblank_int[3] ||
7093 atomic_read(&rdev->irq.pflip[3])) {
7094 DRM_DEBUG("cik_irq_set: vblank 3\n");
7095 crtc4 |= VBLANK_INTERRUPT_MASK;
7097 if (rdev->irq.crtc_vblank_int[4] ||
7098 atomic_read(&rdev->irq.pflip[4])) {
7099 DRM_DEBUG("cik_irq_set: vblank 4\n");
7100 crtc5 |= VBLANK_INTERRUPT_MASK;
7102 if (rdev->irq.crtc_vblank_int[5] ||
7103 atomic_read(&rdev->irq.pflip[5])) {
7104 DRM_DEBUG("cik_irq_set: vblank 5\n");
7105 crtc6 |= VBLANK_INTERRUPT_MASK;
7107 if (rdev->irq.hpd[0]) {
7108 DRM_DEBUG("cik_irq_set: hpd 1\n");
7109 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7111 if (rdev->irq.hpd[1]) {
7112 DRM_DEBUG("cik_irq_set: hpd 2\n");
7113 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7115 if (rdev->irq.hpd[2]) {
7116 DRM_DEBUG("cik_irq_set: hpd 3\n");
7117 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7119 if (rdev->irq.hpd[3]) {
7120 DRM_DEBUG("cik_irq_set: hpd 4\n");
7121 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7123 if (rdev->irq.hpd[4]) {
7124 DRM_DEBUG("cik_irq_set: hpd 5\n");
7125 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7127 if (rdev->irq.hpd[5]) {
7128 DRM_DEBUG("cik_irq_set: hpd 6\n");
7129 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7132 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7134 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7135 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7137 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7139 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7141 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7142 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7143 if (rdev->num_crtc >= 4) {
7144 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7145 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7147 if (rdev->num_crtc >= 6) {
7148 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7149 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7152 if (rdev->num_crtc >= 2) {
7153 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7154 GRPH_PFLIP_INT_MASK);
7155 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7156 GRPH_PFLIP_INT_MASK);
7158 if (rdev->num_crtc >= 4) {
7159 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7160 GRPH_PFLIP_INT_MASK);
7161 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7162 GRPH_PFLIP_INT_MASK);
7164 if (rdev->num_crtc >= 6) {
7165 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7166 GRPH_PFLIP_INT_MASK);
7167 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7168 GRPH_PFLIP_INT_MASK);
7171 WREG32(DC_HPD1_INT_CONTROL, hpd1);
7172 WREG32(DC_HPD2_INT_CONTROL, hpd2);
7173 WREG32(DC_HPD3_INT_CONTROL, hpd3);
7174 WREG32(DC_HPD4_INT_CONTROL, hpd4);
7175 WREG32(DC_HPD5_INT_CONTROL, hpd5);
7176 WREG32(DC_HPD6_INT_CONTROL, hpd6);
7179 RREG32(SRBM_STATUS);
7185 * cik_irq_ack - ack interrupt sources
7187 * @rdev: radeon_device pointer
7189 * Ack interrupt sources on the GPU (vblanks, hpd,
7190 * etc.) (CIK). Certain interrupts sources are sw
7191 * generated and do not require an explicit ack.
7193 static inline void cik_irq_ack(struct radeon_device *rdev)
7197 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7198 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7199 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7200 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7201 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7202 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7203 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7205 rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7206 EVERGREEN_CRTC0_REGISTER_OFFSET);
7207 rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7208 EVERGREEN_CRTC1_REGISTER_OFFSET);
7209 if (rdev->num_crtc >= 4) {
7210 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7211 EVERGREEN_CRTC2_REGISTER_OFFSET);
7212 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7213 EVERGREEN_CRTC3_REGISTER_OFFSET);
7215 if (rdev->num_crtc >= 6) {
7216 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7217 EVERGREEN_CRTC4_REGISTER_OFFSET);
7218 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7219 EVERGREEN_CRTC5_REGISTER_OFFSET);
7222 if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7223 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7224 GRPH_PFLIP_INT_CLEAR);
7225 if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7226 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7227 GRPH_PFLIP_INT_CLEAR);
7228 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7229 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7230 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7231 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7232 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7233 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7234 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7235 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7237 if (rdev->num_crtc >= 4) {
7238 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7239 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7240 GRPH_PFLIP_INT_CLEAR);
7241 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7242 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7243 GRPH_PFLIP_INT_CLEAR);
7244 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7245 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7246 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7247 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7248 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7249 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7250 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7251 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7254 if (rdev->num_crtc >= 6) {
7255 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7256 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7257 GRPH_PFLIP_INT_CLEAR);
7258 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7259 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7260 GRPH_PFLIP_INT_CLEAR);
7261 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7262 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7263 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7264 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7265 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7266 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7267 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7268 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7271 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7272 tmp = RREG32(DC_HPD1_INT_CONTROL);
7273 tmp |= DC_HPDx_INT_ACK;
7274 WREG32(DC_HPD1_INT_CONTROL, tmp);
7276 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7277 tmp = RREG32(DC_HPD2_INT_CONTROL);
7278 tmp |= DC_HPDx_INT_ACK;
7279 WREG32(DC_HPD2_INT_CONTROL, tmp);
7281 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7282 tmp = RREG32(DC_HPD3_INT_CONTROL);
7283 tmp |= DC_HPDx_INT_ACK;
7284 WREG32(DC_HPD3_INT_CONTROL, tmp);
7286 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7287 tmp = RREG32(DC_HPD4_INT_CONTROL);
7288 tmp |= DC_HPDx_INT_ACK;
7289 WREG32(DC_HPD4_INT_CONTROL, tmp);
7291 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7292 tmp = RREG32(DC_HPD5_INT_CONTROL);
7293 tmp |= DC_HPDx_INT_ACK;
7294 WREG32(DC_HPD5_INT_CONTROL, tmp);
7296 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7297 tmp = RREG32(DC_HPD6_INT_CONTROL);
7298 tmp |= DC_HPDx_INT_ACK;
7299 WREG32(DC_HPD6_INT_CONTROL, tmp);
7301 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7302 tmp = RREG32(DC_HPD1_INT_CONTROL);
7303 tmp |= DC_HPDx_RX_INT_ACK;
7304 WREG32(DC_HPD1_INT_CONTROL, tmp);
7306 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7307 tmp = RREG32(DC_HPD2_INT_CONTROL);
7308 tmp |= DC_HPDx_RX_INT_ACK;
7309 WREG32(DC_HPD2_INT_CONTROL, tmp);
7311 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7312 tmp = RREG32(DC_HPD3_INT_CONTROL);
7313 tmp |= DC_HPDx_RX_INT_ACK;
7314 WREG32(DC_HPD3_INT_CONTROL, tmp);
7316 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7317 tmp = RREG32(DC_HPD4_INT_CONTROL);
7318 tmp |= DC_HPDx_RX_INT_ACK;
7319 WREG32(DC_HPD4_INT_CONTROL, tmp);
7321 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7322 tmp = RREG32(DC_HPD5_INT_CONTROL);
7323 tmp |= DC_HPDx_RX_INT_ACK;
7324 WREG32(DC_HPD5_INT_CONTROL, tmp);
7326 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7327 tmp = RREG32(DC_HPD6_INT_CONTROL);
7328 tmp |= DC_HPDx_RX_INT_ACK;
7329 WREG32(DC_HPD6_INT_CONTROL, tmp);
7334 * cik_irq_disable - disable interrupts
7336 * @rdev: radeon_device pointer
7338 * Disable interrupts on the hw (CIK).
7340 static void cik_irq_disable(struct radeon_device *rdev)
7342 cik_disable_interrupts(rdev);
7343 /* Wait and acknowledge irq */
7346 cik_disable_interrupt_state(rdev);
7350 * cik_irq_disable - disable interrupts for suspend
7352 * @rdev: radeon_device pointer
7354 * Disable interrupts and stop the RLC (CIK).
7357 static void cik_irq_suspend(struct radeon_device *rdev)
7359 cik_irq_disable(rdev);
7364 * cik_irq_fini - tear down interrupt support
7366 * @rdev: radeon_device pointer
7368 * Disable interrupts on the hw and free the IH ring
7370 * Used for driver unload.
7372 static void cik_irq_fini(struct radeon_device *rdev)
7374 cik_irq_suspend(rdev);
7375 r600_ih_ring_fini(rdev);
7379 * cik_get_ih_wptr - get the IH ring buffer wptr
7381 * @rdev: radeon_device pointer
7383 * Get the IH ring buffer wptr from either the register
7384 * or the writeback memory buffer (CIK). Also check for
7385 * ring buffer overflow and deal with it.
7386 * Used by cik_irq_process().
7387 * Returns the value of the wptr.
7389 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7393 if (rdev->wb.enabled)
7394 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7396 wptr = RREG32(IH_RB_WPTR);
7398 if (wptr & RB_OVERFLOW) {
7399 wptr &= ~RB_OVERFLOW;
7400 /* When a ring buffer overflow happen start parsing interrupt
7401 * from the last not overwritten vector (wptr + 16). Hopefully
7402 * this should allow us to catchup.
7404 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7405 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7406 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7407 tmp = RREG32(IH_RB_CNTL);
7408 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7409 WREG32(IH_RB_CNTL, tmp);
7411 return (wptr & rdev->ih.ptr_mask);
7415 * Each IV ring entry is 128 bits:
7416 * [7:0] - interrupt source id
7418 * [59:32] - interrupt source data
7419 * [63:60] - reserved
7422 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7423 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7424 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7425 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7426 * PIPE_ID - ME0 0=3D
7427 * - ME1&2 compute dispatcher (4 pipes each)
7429 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7430 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7431 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7434 * [127:96] - reserved
7437 * cik_irq_process - interrupt handler
7439 * @rdev: radeon_device pointer
7441 * Interrupt hander (CIK). Walk the IH ring,
7442 * ack interrupts and schedule work to handle
7444 * Returns irq process return code.
7446 int cik_irq_process(struct radeon_device *rdev)
7448 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7449 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7452 u32 src_id, src_data, ring_id;
7453 u8 me_id, pipe_id, queue_id;
7455 bool queue_hotplug = false;
7456 bool queue_dp = false;
7457 bool queue_reset = false;
7458 u32 addr, status, mc_client;
7459 bool queue_thermal = false;
7461 if (!rdev->ih.enabled || rdev->shutdown)
7464 wptr = cik_get_ih_wptr(rdev);
7467 /* is somebody else already processing irqs? */
7468 if (atomic_xchg(&rdev->ih.lock, 1))
7471 rptr = rdev->ih.rptr;
7472 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7474 /* Order reading of wptr vs. reading of IH ring data */
7477 /* display interrupts */
7480 while (rptr != wptr) {
7481 /* wptr/rptr are in bytes! */
7482 ring_index = rptr / 4;
7484 radeon_kfd_interrupt(rdev,
7485 (const void *) &rdev->ih.ring[ring_index]);
7487 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7488 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7489 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7492 case 1: /* D1 vblank/vline */
7494 case 0: /* D1 vblank */
7495 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7496 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7498 if (rdev->irq.crtc_vblank_int[0]) {
7499 drm_handle_vblank(rdev->ddev, 0);
7500 rdev->pm.vblank_sync = true;
7501 wake_up(&rdev->irq.vblank_queue);
7503 if (atomic_read(&rdev->irq.pflip[0]))
7504 radeon_crtc_handle_vblank(rdev, 0);
7505 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7506 DRM_DEBUG("IH: D1 vblank\n");
7509 case 1: /* D1 vline */
7510 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7511 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7513 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7514 DRM_DEBUG("IH: D1 vline\n");
7518 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7522 case 2: /* D2 vblank/vline */
7524 case 0: /* D2 vblank */
7525 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7526 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7528 if (rdev->irq.crtc_vblank_int[1]) {
7529 drm_handle_vblank(rdev->ddev, 1);
7530 rdev->pm.vblank_sync = true;
7531 wake_up(&rdev->irq.vblank_queue);
7533 if (atomic_read(&rdev->irq.pflip[1]))
7534 radeon_crtc_handle_vblank(rdev, 1);
7535 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7536 DRM_DEBUG("IH: D2 vblank\n");
7539 case 1: /* D2 vline */
7540 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7541 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7543 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7544 DRM_DEBUG("IH: D2 vline\n");
7548 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7552 case 3: /* D3 vblank/vline */
7554 case 0: /* D3 vblank */
7555 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7556 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7558 if (rdev->irq.crtc_vblank_int[2]) {
7559 drm_handle_vblank(rdev->ddev, 2);
7560 rdev->pm.vblank_sync = true;
7561 wake_up(&rdev->irq.vblank_queue);
7563 if (atomic_read(&rdev->irq.pflip[2]))
7564 radeon_crtc_handle_vblank(rdev, 2);
7565 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7566 DRM_DEBUG("IH: D3 vblank\n");
7569 case 1: /* D3 vline */
7570 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7571 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7573 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7574 DRM_DEBUG("IH: D3 vline\n");
7578 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7582 case 4: /* D4 vblank/vline */
7584 case 0: /* D4 vblank */
7585 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7586 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7588 if (rdev->irq.crtc_vblank_int[3]) {
7589 drm_handle_vblank(rdev->ddev, 3);
7590 rdev->pm.vblank_sync = true;
7591 wake_up(&rdev->irq.vblank_queue);
7593 if (atomic_read(&rdev->irq.pflip[3]))
7594 radeon_crtc_handle_vblank(rdev, 3);
7595 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7596 DRM_DEBUG("IH: D4 vblank\n");
7599 case 1: /* D4 vline */
7600 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7601 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7603 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7604 DRM_DEBUG("IH: D4 vline\n");
7608 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7612 case 5: /* D5 vblank/vline */
7614 case 0: /* D5 vblank */
7615 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7616 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7618 if (rdev->irq.crtc_vblank_int[4]) {
7619 drm_handle_vblank(rdev->ddev, 4);
7620 rdev->pm.vblank_sync = true;
7621 wake_up(&rdev->irq.vblank_queue);
7623 if (atomic_read(&rdev->irq.pflip[4]))
7624 radeon_crtc_handle_vblank(rdev, 4);
7625 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7626 DRM_DEBUG("IH: D5 vblank\n");
7629 case 1: /* D5 vline */
7630 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7631 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7633 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7634 DRM_DEBUG("IH: D5 vline\n");
7638 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7642 case 6: /* D6 vblank/vline */
7644 case 0: /* D6 vblank */
7645 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7646 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7648 if (rdev->irq.crtc_vblank_int[5]) {
7649 drm_handle_vblank(rdev->ddev, 5);
7650 rdev->pm.vblank_sync = true;
7651 wake_up(&rdev->irq.vblank_queue);
7653 if (atomic_read(&rdev->irq.pflip[5]))
7654 radeon_crtc_handle_vblank(rdev, 5);
7655 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7656 DRM_DEBUG("IH: D6 vblank\n");
7659 case 1: /* D6 vline */
7660 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7661 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7663 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7664 DRM_DEBUG("IH: D6 vline\n");
7668 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7672 case 8: /* D1 page flip */
7673 case 10: /* D2 page flip */
7674 case 12: /* D3 page flip */
7675 case 14: /* D4 page flip */
7676 case 16: /* D5 page flip */
7677 case 18: /* D6 page flip */
7678 DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7679 if (radeon_use_pflipirq > 0)
7680 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7682 case 42: /* HPD hotplug */
7685 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7686 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7688 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7689 queue_hotplug = true;
7690 DRM_DEBUG("IH: HPD1\n");
7694 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7695 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7697 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7698 queue_hotplug = true;
7699 DRM_DEBUG("IH: HPD2\n");
7703 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7704 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7707 queue_hotplug = true;
7708 DRM_DEBUG("IH: HPD3\n");
7712 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7713 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7715 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7716 queue_hotplug = true;
7717 DRM_DEBUG("IH: HPD4\n");
7721 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7722 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7724 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7725 queue_hotplug = true;
7726 DRM_DEBUG("IH: HPD5\n");
7730 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7731 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7733 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7734 queue_hotplug = true;
7735 DRM_DEBUG("IH: HPD6\n");
7739 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7740 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7742 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7744 DRM_DEBUG("IH: HPD_RX 1\n");
7748 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7749 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7753 DRM_DEBUG("IH: HPD_RX 2\n");
7757 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7758 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7760 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7762 DRM_DEBUG("IH: HPD_RX 3\n");
7766 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7767 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7769 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7771 DRM_DEBUG("IH: HPD_RX 4\n");
7775 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7776 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7778 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7780 DRM_DEBUG("IH: HPD_RX 5\n");
7784 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7785 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7787 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7789 DRM_DEBUG("IH: HPD_RX 6\n");
7793 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7798 DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7799 WREG32(SRBM_INT_ACK, 0x1);
7802 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7803 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7807 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7808 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7809 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7810 /* reset addr and status */
7811 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7812 if (addr == 0x0 && status == 0x0)
7814 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7815 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
7817 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7819 cik_vm_decode_fault(rdev, status, addr, mc_client);
7822 DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7825 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7828 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7831 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7835 case 176: /* GFX RB CP_INT */
7836 case 177: /* GFX IB CP_INT */
7837 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7839 case 181: /* CP EOP event */
7840 DRM_DEBUG("IH: CP EOP\n");
7841 /* XXX check the bitfield order! */
7842 me_id = (ring_id & 0x60) >> 5;
7843 pipe_id = (ring_id & 0x18) >> 3;
7844 queue_id = (ring_id & 0x7) >> 0;
7847 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7851 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7852 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7853 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7854 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7858 case 184: /* CP Privileged reg access */
7859 DRM_ERROR("Illegal register access in command stream\n");
7860 /* XXX check the bitfield order! */
7861 me_id = (ring_id & 0x60) >> 5;
7862 pipe_id = (ring_id & 0x18) >> 3;
7863 queue_id = (ring_id & 0x7) >> 0;
7866 /* This results in a full GPU reset, but all we need to do is soft
7867 * reset the CP for gfx
7881 case 185: /* CP Privileged inst */
7882 DRM_ERROR("Illegal instruction in command stream\n");
7883 /* XXX check the bitfield order! */
7884 me_id = (ring_id & 0x60) >> 5;
7885 pipe_id = (ring_id & 0x18) >> 3;
7886 queue_id = (ring_id & 0x7) >> 0;
7889 /* This results in a full GPU reset, but all we need to do is soft
7890 * reset the CP for gfx
7904 case 224: /* SDMA trap event */
7905 /* XXX check the bitfield order! */
7906 me_id = (ring_id & 0x3) >> 0;
7907 queue_id = (ring_id & 0xc) >> 2;
7908 DRM_DEBUG("IH: SDMA trap\n");
7913 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7926 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7938 case 230: /* thermal low to high */
7939 DRM_DEBUG("IH: thermal low to high\n");
7940 rdev->pm.dpm.thermal.high_to_low = false;
7941 queue_thermal = true;
7943 case 231: /* thermal high to low */
7944 DRM_DEBUG("IH: thermal high to low\n");
7945 rdev->pm.dpm.thermal.high_to_low = true;
7946 queue_thermal = true;
7948 case 233: /* GUI IDLE */
7949 DRM_DEBUG("IH: GUI idle\n");
7951 case 241: /* SDMA Privileged inst */
7952 case 247: /* SDMA Privileged inst */
7953 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7954 /* XXX check the bitfield order! */
7955 me_id = (ring_id & 0x3) >> 0;
7956 queue_id = (ring_id & 0xc) >> 2;
7991 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7995 /* wptr/rptr are in bytes! */
7997 rptr &= rdev->ih.ptr_mask;
7998 WREG32(IH_RB_RPTR, rptr);
8001 schedule_work(&rdev->dp_work);
8003 schedule_delayed_work(&rdev->hotplug_work, 0);
8005 rdev->needs_reset = true;
8006 wake_up_all(&rdev->fence_queue);
8009 schedule_work(&rdev->pm.dpm.thermal.work);
8010 rdev->ih.rptr = rptr;
8011 atomic_set(&rdev->ih.lock, 0);
8013 /* make sure wptr hasn't changed while processing */
8014 wptr = cik_get_ih_wptr(rdev);
8022 * startup/shutdown callbacks
8024 static void cik_uvd_init(struct radeon_device *rdev)
8031 r = radeon_uvd_init(rdev);
8033 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8035 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8036 * to early fails cik_uvd_start() and thus nothing happens
8037 * there. So it is pointless to try to go through that code
8038 * hence why we disable uvd here.
8043 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8044 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8047 static void cik_uvd_start(struct radeon_device *rdev)
8054 r = radeon_uvd_resume(rdev);
8056 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8059 r = uvd_v4_2_resume(rdev);
8061 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8064 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8066 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8072 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8075 static void cik_uvd_resume(struct radeon_device *rdev)
8077 struct radeon_ring *ring;
8080 if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8083 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8084 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8086 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8089 r = uvd_v1_0_init(rdev);
8091 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8096 static void cik_vce_init(struct radeon_device *rdev)
8103 r = radeon_vce_init(rdev);
8105 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8107 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8108 * to early fails cik_vce_start() and thus nothing happens
8109 * there. So it is pointless to try to go through that code
8110 * hence why we disable vce here.
8115 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8116 r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8117 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8118 r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8121 static void cik_vce_start(struct radeon_device *rdev)
8128 r = radeon_vce_resume(rdev);
8130 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8133 r = vce_v2_0_resume(rdev);
8135 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8138 r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8140 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8143 r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8145 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8151 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8152 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8155 static void cik_vce_resume(struct radeon_device *rdev)
8157 struct radeon_ring *ring;
8160 if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8163 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8164 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8166 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8169 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8170 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8172 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8175 r = vce_v1_0_init(rdev);
8177 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8183 * cik_startup - program the asic to a functional state
8185 * @rdev: radeon_device pointer
8187 * Programs the asic to a functional state (CIK).
8188 * Called by cik_init() and cik_resume().
8189 * Returns 0 for success, error for failure.
8191 static int cik_startup(struct radeon_device *rdev)
8193 struct radeon_ring *ring;
8197 /* enable pcie gen2/3 link */
8198 cik_pcie_gen3_enable(rdev);
8200 cik_program_aspm(rdev);
8202 /* scratch needs to be initialized before MC */
8203 r = r600_vram_scratch_init(rdev);
8207 cik_mc_program(rdev);
8209 if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8210 r = ci_mc_load_microcode(rdev);
8212 DRM_ERROR("Failed to load MC firmware!\n");
8217 r = cik_pcie_gart_enable(rdev);
8222 /* allocate rlc buffers */
8223 if (rdev->flags & RADEON_IS_IGP) {
8224 if (rdev->family == CHIP_KAVERI) {
8225 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8226 rdev->rlc.reg_list_size =
8227 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8229 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8230 rdev->rlc.reg_list_size =
8231 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8234 rdev->rlc.cs_data = ci_cs_data;
8235 rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8236 rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8237 r = sumo_rlc_init(rdev);
8239 DRM_ERROR("Failed to init rlc BOs!\n");
8243 /* allocate wb buffer */
8244 r = radeon_wb_init(rdev);
8248 /* allocate mec buffers */
8249 r = cik_mec_init(rdev);
8251 DRM_ERROR("Failed to init MEC BOs!\n");
8255 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8257 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8261 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8263 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8267 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8269 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8273 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8275 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8279 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8281 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8285 cik_uvd_start(rdev);
8286 cik_vce_start(rdev);
8289 if (!rdev->irq.installed) {
8290 r = radeon_irq_kms_init(rdev);
8295 r = cik_irq_init(rdev);
8297 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8298 radeon_irq_kms_fini(rdev);
8303 if (rdev->family == CHIP_HAWAII) {
8305 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8307 nop = RADEON_CP_PACKET2;
8309 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8312 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8313 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8318 /* set up the compute queues */
8319 /* type-2 packets are deprecated on MEC, use type-3 instead */
8320 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8321 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8325 ring->me = 1; /* first MEC */
8326 ring->pipe = 0; /* first pipe */
8327 ring->queue = 0; /* first queue */
8328 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8330 /* type-2 packets are deprecated on MEC, use type-3 instead */
8331 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8332 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8336 /* dGPU only have 1 MEC */
8337 ring->me = 1; /* first MEC */
8338 ring->pipe = 0; /* first pipe */
8339 ring->queue = 1; /* second queue */
8340 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8342 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8343 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8344 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8348 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8349 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8350 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8354 r = cik_cp_resume(rdev);
8358 r = cik_sdma_resume(rdev);
8362 cik_uvd_resume(rdev);
8363 cik_vce_resume(rdev);
8365 r = radeon_ib_pool_init(rdev);
8367 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8371 r = radeon_vm_manager_init(rdev);
8373 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8377 r = radeon_audio_init(rdev);
8381 r = radeon_kfd_resume(rdev);
8389 * cik_resume - resume the asic to a functional state
8391 * @rdev: radeon_device pointer
8393 * Programs the asic to a functional state (CIK).
8395 * Returns 0 for success, error for failure.
8397 int cik_resume(struct radeon_device *rdev)
8402 atom_asic_init(rdev->mode_info.atom_context);
8404 /* init golden registers */
8405 cik_init_golden_registers(rdev);
8407 if (rdev->pm.pm_method == PM_METHOD_DPM)
8408 radeon_pm_resume(rdev);
8410 rdev->accel_working = true;
8411 r = cik_startup(rdev);
8413 DRM_ERROR("cik startup failed on resume\n");
8414 rdev->accel_working = false;
8423 * cik_suspend - suspend the asic
8425 * @rdev: radeon_device pointer
8427 * Bring the chip into a state suitable for suspend (CIK).
8428 * Called at suspend.
8429 * Returns 0 for success.
8431 int cik_suspend(struct radeon_device *rdev)
8433 radeon_kfd_suspend(rdev);
8434 radeon_pm_suspend(rdev);
8435 radeon_audio_fini(rdev);
8436 radeon_vm_manager_fini(rdev);
8437 cik_cp_enable(rdev, false);
8438 cik_sdma_enable(rdev, false);
8439 if (rdev->has_uvd) {
8440 uvd_v1_0_fini(rdev);
8441 radeon_uvd_suspend(rdev);
8444 radeon_vce_suspend(rdev);
8447 cik_irq_suspend(rdev);
8448 radeon_wb_disable(rdev);
8449 cik_pcie_gart_disable(rdev);
8453 /* Plan is to move initialization in that function and use
8454 * helper function so that radeon_device_init pretty much
8455 * do nothing more than calling asic specific function. This
8456 * should also allow to remove a bunch of callback function
8460 * cik_init - asic specific driver and hw init
8462 * @rdev: radeon_device pointer
8464 * Setup asic specific driver variables and program the hw
8465 * to a functional state (CIK).
8466 * Called at driver startup.
8467 * Returns 0 for success, errors for failure.
8469 int cik_init(struct radeon_device *rdev)
8471 struct radeon_ring *ring;
8475 if (!radeon_get_bios(rdev)) {
8476 if (ASIC_IS_AVIVO(rdev))
8479 /* Must be an ATOMBIOS */
8480 if (!rdev->is_atom_bios) {
8481 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8484 r = radeon_atombios_init(rdev);
8488 /* Post card if necessary */
8489 if (!radeon_card_posted(rdev)) {
8491 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8494 DRM_INFO("GPU not posted. posting now...\n");
8495 atom_asic_init(rdev->mode_info.atom_context);
8497 /* init golden registers */
8498 cik_init_golden_registers(rdev);
8499 /* Initialize scratch registers */
8500 cik_scratch_init(rdev);
8501 /* Initialize surface registers */
8502 radeon_surface_init(rdev);
8503 /* Initialize clocks */
8504 radeon_get_clock_info(rdev->ddev);
8507 r = radeon_fence_driver_init(rdev);
8511 /* initialize memory controller */
8512 r = cik_mc_init(rdev);
8515 /* Memory manager */
8516 r = radeon_bo_init(rdev);
8520 if (rdev->flags & RADEON_IS_IGP) {
8521 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8522 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8523 r = cik_init_microcode(rdev);
8525 DRM_ERROR("Failed to load firmware!\n");
8530 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8531 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8533 r = cik_init_microcode(rdev);
8535 DRM_ERROR("Failed to load firmware!\n");
8541 /* Initialize power management */
8542 radeon_pm_init(rdev);
8544 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8545 ring->ring_obj = NULL;
8546 r600_ring_init(rdev, ring, 1024 * 1024);
8548 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8549 ring->ring_obj = NULL;
8550 r600_ring_init(rdev, ring, 1024 * 1024);
8551 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8555 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8556 ring->ring_obj = NULL;
8557 r600_ring_init(rdev, ring, 1024 * 1024);
8558 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8562 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8563 ring->ring_obj = NULL;
8564 r600_ring_init(rdev, ring, 256 * 1024);
8566 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8567 ring->ring_obj = NULL;
8568 r600_ring_init(rdev, ring, 256 * 1024);
8573 rdev->ih.ring_obj = NULL;
8574 r600_ih_ring_init(rdev, 64 * 1024);
8576 r = r600_pcie_gart_init(rdev);
8580 rdev->accel_working = true;
8581 r = cik_startup(rdev);
8583 dev_err(rdev->dev, "disabling GPU acceleration\n");
8585 cik_sdma_fini(rdev);
8587 sumo_rlc_fini(rdev);
8589 radeon_wb_fini(rdev);
8590 radeon_ib_pool_fini(rdev);
8591 radeon_vm_manager_fini(rdev);
8592 radeon_irq_kms_fini(rdev);
8593 cik_pcie_gart_fini(rdev);
8594 rdev->accel_working = false;
8597 /* Don't start up if the MC ucode is missing.
8598 * The default clocks and voltages before the MC ucode
8599 * is loaded are not suffient for advanced operations.
8601 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8602 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8610 * cik_fini - asic specific driver and hw fini
8612 * @rdev: radeon_device pointer
8614 * Tear down the asic specific driver variables and program the hw
8615 * to an idle state (CIK).
8616 * Called at driver unload.
8618 void cik_fini(struct radeon_device *rdev)
8620 radeon_pm_fini(rdev);
8622 cik_sdma_fini(rdev);
8626 sumo_rlc_fini(rdev);
8628 radeon_wb_fini(rdev);
8629 radeon_vm_manager_fini(rdev);
8630 radeon_ib_pool_fini(rdev);
8631 radeon_irq_kms_fini(rdev);
8632 uvd_v1_0_fini(rdev);
8633 radeon_uvd_fini(rdev);
8634 radeon_vce_fini(rdev);
8635 cik_pcie_gart_fini(rdev);
8636 r600_vram_scratch_fini(rdev);
8637 radeon_gem_fini(rdev);
8638 radeon_fence_driver_fini(rdev);
8639 radeon_bo_fini(rdev);
8640 radeon_atombios_fini(rdev);
8645 void dce8_program_fmt(struct drm_encoder *encoder)
8647 struct drm_device *dev = encoder->dev;
8648 struct radeon_device *rdev = dev->dev_private;
8649 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8650 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8651 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8654 enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8657 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8658 bpc = radeon_get_monitor_bpc(connector);
8659 dither = radeon_connector->dither;
8662 /* LVDS/eDP FMT is set up by atom */
8663 if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8666 /* not needed for analog */
8667 if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8668 (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8676 if (dither == RADEON_FMT_DITHER_ENABLE)
8677 /* XXX sort out optimal dither settings */
8678 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8679 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8681 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8684 if (dither == RADEON_FMT_DITHER_ENABLE)
8685 /* XXX sort out optimal dither settings */
8686 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8687 FMT_RGB_RANDOM_ENABLE |
8688 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8690 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8693 if (dither == RADEON_FMT_DITHER_ENABLE)
8694 /* XXX sort out optimal dither settings */
8695 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8696 FMT_RGB_RANDOM_ENABLE |
8697 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8699 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8706 WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8709 /* display watermark setup */
8711 * dce8_line_buffer_adjust - Set up the line buffer
8713 * @rdev: radeon_device pointer
8714 * @radeon_crtc: the selected display controller
8715 * @mode: the current display mode on the selected display
8718 * Setup up the line buffer allocation for
8719 * the selected display controller (CIK).
8720 * Returns the line buffer size in pixels.
8722 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8723 struct radeon_crtc *radeon_crtc,
8724 struct drm_display_mode *mode)
8726 u32 tmp, buffer_alloc, i;
8727 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8730 * There are 6 line buffers, one for each display controllers.
8731 * There are 3 partitions per LB. Select the number of partitions
8732 * to enable based on the display width. For display widths larger
8733 * than 4096, you need use to use 2 display controllers and combine
8734 * them using the stereo blender.
8736 if (radeon_crtc->base.enabled && mode) {
8737 if (mode->crtc_hdisplay < 1920) {
8740 } else if (mode->crtc_hdisplay < 2560) {
8743 } else if (mode->crtc_hdisplay < 4096) {
8745 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8747 DRM_DEBUG_KMS("Mode too big for LB!\n");
8749 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8756 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8757 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8759 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8760 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8761 for (i = 0; i < rdev->usec_timeout; i++) {
8762 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8763 DMIF_BUFFERS_ALLOCATED_COMPLETED)
8768 if (radeon_crtc->base.enabled && mode) {
8780 /* controller not enabled, so no lb used */
8785 * cik_get_number_of_dram_channels - get the number of dram channels
8787 * @rdev: radeon_device pointer
8789 * Look up the number of video ram channels (CIK).
8790 * Used for display watermark bandwidth calculations
8791 * Returns the number of dram channels
8793 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8795 u32 tmp = RREG32(MC_SHARED_CHMAP);
8797 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8820 struct dce8_wm_params {
8821 u32 dram_channels; /* number of dram channels */
8822 u32 yclk; /* bandwidth per dram data pin in kHz */
8823 u32 sclk; /* engine clock in kHz */
8824 u32 disp_clk; /* display clock in kHz */
8825 u32 src_width; /* viewport width */
8826 u32 active_time; /* active display time in ns */
8827 u32 blank_time; /* blank time in ns */
8828 bool interlaced; /* mode is interlaced */
8829 fixed20_12 vsc; /* vertical scale ratio */
8830 u32 num_heads; /* number of active crtcs */
8831 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8832 u32 lb_size; /* line buffer allocated to pipe */
8833 u32 vtaps; /* vertical scaler taps */
8837 * dce8_dram_bandwidth - get the dram bandwidth
8839 * @wm: watermark calculation data
8841 * Calculate the raw dram bandwidth (CIK).
8842 * Used for display watermark bandwidth calculations
8843 * Returns the dram bandwidth in MBytes/s
8845 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8847 /* Calculate raw DRAM Bandwidth */
8848 fixed20_12 dram_efficiency; /* 0.7 */
8849 fixed20_12 yclk, dram_channels, bandwidth;
8852 a.full = dfixed_const(1000);
8853 yclk.full = dfixed_const(wm->yclk);
8854 yclk.full = dfixed_div(yclk, a);
8855 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8856 a.full = dfixed_const(10);
8857 dram_efficiency.full = dfixed_const(7);
8858 dram_efficiency.full = dfixed_div(dram_efficiency, a);
8859 bandwidth.full = dfixed_mul(dram_channels, yclk);
8860 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8862 return dfixed_trunc(bandwidth);
8866 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8868 * @wm: watermark calculation data
8870 * Calculate the dram bandwidth used for display (CIK).
8871 * Used for display watermark bandwidth calculations
8872 * Returns the dram bandwidth for display in MBytes/s
8874 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8876 /* Calculate DRAM Bandwidth and the part allocated to display. */
8877 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8878 fixed20_12 yclk, dram_channels, bandwidth;
8881 a.full = dfixed_const(1000);
8882 yclk.full = dfixed_const(wm->yclk);
8883 yclk.full = dfixed_div(yclk, a);
8884 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8885 a.full = dfixed_const(10);
8886 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8887 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8888 bandwidth.full = dfixed_mul(dram_channels, yclk);
8889 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8891 return dfixed_trunc(bandwidth);
8895 * dce8_data_return_bandwidth - get the data return bandwidth
8897 * @wm: watermark calculation data
8899 * Calculate the data return bandwidth used for display (CIK).
8900 * Used for display watermark bandwidth calculations
8901 * Returns the data return bandwidth in MBytes/s
8903 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8905 /* Calculate the display Data return Bandwidth */
8906 fixed20_12 return_efficiency; /* 0.8 */
8907 fixed20_12 sclk, bandwidth;
8910 a.full = dfixed_const(1000);
8911 sclk.full = dfixed_const(wm->sclk);
8912 sclk.full = dfixed_div(sclk, a);
8913 a.full = dfixed_const(10);
8914 return_efficiency.full = dfixed_const(8);
8915 return_efficiency.full = dfixed_div(return_efficiency, a);
8916 a.full = dfixed_const(32);
8917 bandwidth.full = dfixed_mul(a, sclk);
8918 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8920 return dfixed_trunc(bandwidth);
8924 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8926 * @wm: watermark calculation data
8928 * Calculate the dmif bandwidth used for display (CIK).
8929 * Used for display watermark bandwidth calculations
8930 * Returns the dmif bandwidth in MBytes/s
8932 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8934 /* Calculate the DMIF Request Bandwidth */
8935 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8936 fixed20_12 disp_clk, bandwidth;
8939 a.full = dfixed_const(1000);
8940 disp_clk.full = dfixed_const(wm->disp_clk);
8941 disp_clk.full = dfixed_div(disp_clk, a);
8942 a.full = dfixed_const(32);
8943 b.full = dfixed_mul(a, disp_clk);
8945 a.full = dfixed_const(10);
8946 disp_clk_request_efficiency.full = dfixed_const(8);
8947 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8949 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8951 return dfixed_trunc(bandwidth);
8955 * dce8_available_bandwidth - get the min available bandwidth
8957 * @wm: watermark calculation data
8959 * Calculate the min available bandwidth used for display (CIK).
8960 * Used for display watermark bandwidth calculations
8961 * Returns the min available bandwidth in MBytes/s
8963 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8965 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8966 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8967 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8968 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8970 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8974 * dce8_average_bandwidth - get the average available bandwidth
8976 * @wm: watermark calculation data
8978 * Calculate the average available bandwidth used for display (CIK).
8979 * Used for display watermark bandwidth calculations
8980 * Returns the average available bandwidth in MBytes/s
8982 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8984 /* Calculate the display mode Average Bandwidth
8985 * DisplayMode should contain the source and destination dimensions,
8989 fixed20_12 line_time;
8990 fixed20_12 src_width;
8991 fixed20_12 bandwidth;
8994 a.full = dfixed_const(1000);
8995 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8996 line_time.full = dfixed_div(line_time, a);
8997 bpp.full = dfixed_const(wm->bytes_per_pixel);
8998 src_width.full = dfixed_const(wm->src_width);
8999 bandwidth.full = dfixed_mul(src_width, bpp);
9000 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9001 bandwidth.full = dfixed_div(bandwidth, line_time);
9003 return dfixed_trunc(bandwidth);
9007 * dce8_latency_watermark - get the latency watermark
9009 * @wm: watermark calculation data
9011 * Calculate the latency watermark (CIK).
9012 * Used for display watermark bandwidth calculations
9013 * Returns the latency watermark in ns
9015 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9017 /* First calculate the latency in ns */
9018 u32 mc_latency = 2000; /* 2000 ns. */
9019 u32 available_bandwidth = dce8_available_bandwidth(wm);
9020 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9021 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9022 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9023 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9024 (wm->num_heads * cursor_line_pair_return_time);
9025 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9026 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9027 u32 tmp, dmif_size = 12288;
9030 if (wm->num_heads == 0)
9033 a.full = dfixed_const(2);
9034 b.full = dfixed_const(1);
9035 if ((wm->vsc.full > a.full) ||
9036 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9038 ((wm->vsc.full >= a.full) && wm->interlaced))
9039 max_src_lines_per_dst_line = 4;
9041 max_src_lines_per_dst_line = 2;
9043 a.full = dfixed_const(available_bandwidth);
9044 b.full = dfixed_const(wm->num_heads);
9045 a.full = dfixed_div(a, b);
9046 tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9047 tmp = min(dfixed_trunc(a), tmp);
9049 lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9051 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9052 b.full = dfixed_const(1000);
9053 c.full = dfixed_const(lb_fill_bw);
9054 b.full = dfixed_div(c, b);
9055 a.full = dfixed_div(a, b);
9056 line_fill_time = dfixed_trunc(a);
9058 if (line_fill_time < wm->active_time)
9061 return latency + (line_fill_time - wm->active_time);
9066 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9067 * average and available dram bandwidth
9069 * @wm: watermark calculation data
9071 * Check if the display average bandwidth fits in the display
9072 * dram bandwidth (CIK).
9073 * Used for display watermark bandwidth calculations
9074 * Returns true if the display fits, false if not.
9076 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9078 if (dce8_average_bandwidth(wm) <=
9079 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9086 * dce8_average_bandwidth_vs_available_bandwidth - check
9087 * average and available bandwidth
9089 * @wm: watermark calculation data
9091 * Check if the display average bandwidth fits in the display
9092 * available bandwidth (CIK).
9093 * Used for display watermark bandwidth calculations
9094 * Returns true if the display fits, false if not.
9096 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9098 if (dce8_average_bandwidth(wm) <=
9099 (dce8_available_bandwidth(wm) / wm->num_heads))
9106 * dce8_check_latency_hiding - check latency hiding
9108 * @wm: watermark calculation data
9110 * Check latency hiding (CIK).
9111 * Used for display watermark bandwidth calculations
9112 * Returns true if the display fits, false if not.
9114 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9116 u32 lb_partitions = wm->lb_size / wm->src_width;
9117 u32 line_time = wm->active_time + wm->blank_time;
9118 u32 latency_tolerant_lines;
9122 a.full = dfixed_const(1);
9123 if (wm->vsc.full > a.full)
9124 latency_tolerant_lines = 1;
9126 if (lb_partitions <= (wm->vtaps + 1))
9127 latency_tolerant_lines = 1;
9129 latency_tolerant_lines = 2;
9132 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9134 if (dce8_latency_watermark(wm) <= latency_hiding)
9141 * dce8_program_watermarks - program display watermarks
9143 * @rdev: radeon_device pointer
9144 * @radeon_crtc: the selected display controller
9145 * @lb_size: line buffer size
9146 * @num_heads: number of display controllers in use
9148 * Calculate and program the display watermarks for the
9149 * selected display controller (CIK).
9151 static void dce8_program_watermarks(struct radeon_device *rdev,
9152 struct radeon_crtc *radeon_crtc,
9153 u32 lb_size, u32 num_heads)
9155 struct drm_display_mode *mode = &radeon_crtc->base.mode;
9156 struct dce8_wm_params wm_low, wm_high;
9159 u32 latency_watermark_a = 0, latency_watermark_b = 0;
9162 if (radeon_crtc->base.enabled && num_heads && mode) {
9163 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9165 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9167 line_time = min(line_time, (u32)65535);
9169 /* watermark for high clocks */
9170 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9171 rdev->pm.dpm_enabled) {
9173 radeon_dpm_get_mclk(rdev, false) * 10;
9175 radeon_dpm_get_sclk(rdev, false) * 10;
9177 wm_high.yclk = rdev->pm.current_mclk * 10;
9178 wm_high.sclk = rdev->pm.current_sclk * 10;
9181 wm_high.disp_clk = mode->clock;
9182 wm_high.src_width = mode->crtc_hdisplay;
9183 wm_high.active_time = active_time;
9184 wm_high.blank_time = line_time - wm_high.active_time;
9185 wm_high.interlaced = false;
9186 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9187 wm_high.interlaced = true;
9188 wm_high.vsc = radeon_crtc->vsc;
9190 if (radeon_crtc->rmx_type != RMX_OFF)
9192 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9193 wm_high.lb_size = lb_size;
9194 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9195 wm_high.num_heads = num_heads;
9197 /* set for high clocks */
9198 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9200 /* possibly force display priority to high */
9201 /* should really do this at mode validation time... */
9202 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9203 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9204 !dce8_check_latency_hiding(&wm_high) ||
9205 (rdev->disp_priority == 2)) {
9206 DRM_DEBUG_KMS("force priority to high\n");
9209 /* watermark for low clocks */
9210 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9211 rdev->pm.dpm_enabled) {
9213 radeon_dpm_get_mclk(rdev, true) * 10;
9215 radeon_dpm_get_sclk(rdev, true) * 10;
9217 wm_low.yclk = rdev->pm.current_mclk * 10;
9218 wm_low.sclk = rdev->pm.current_sclk * 10;
9221 wm_low.disp_clk = mode->clock;
9222 wm_low.src_width = mode->crtc_hdisplay;
9223 wm_low.active_time = active_time;
9224 wm_low.blank_time = line_time - wm_low.active_time;
9225 wm_low.interlaced = false;
9226 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9227 wm_low.interlaced = true;
9228 wm_low.vsc = radeon_crtc->vsc;
9230 if (radeon_crtc->rmx_type != RMX_OFF)
9232 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9233 wm_low.lb_size = lb_size;
9234 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9235 wm_low.num_heads = num_heads;
9237 /* set for low clocks */
9238 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9240 /* possibly force display priority to high */
9241 /* should really do this at mode validation time... */
9242 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9243 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9244 !dce8_check_latency_hiding(&wm_low) ||
9245 (rdev->disp_priority == 2)) {
9246 DRM_DEBUG_KMS("force priority to high\n");
9249 /* Save number of lines the linebuffer leads before the scanout */
9250 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9254 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9256 tmp &= ~LATENCY_WATERMARK_MASK(3);
9257 tmp |= LATENCY_WATERMARK_MASK(1);
9258 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9259 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9260 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9261 LATENCY_HIGH_WATERMARK(line_time)));
9263 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9264 tmp &= ~LATENCY_WATERMARK_MASK(3);
9265 tmp |= LATENCY_WATERMARK_MASK(2);
9266 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9267 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9268 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9269 LATENCY_HIGH_WATERMARK(line_time)));
9270 /* restore original selection */
9271 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9273 /* save values for DPM */
9274 radeon_crtc->line_time = line_time;
9275 radeon_crtc->wm_high = latency_watermark_a;
9276 radeon_crtc->wm_low = latency_watermark_b;
9280 * dce8_bandwidth_update - program display watermarks
9282 * @rdev: radeon_device pointer
9284 * Calculate and program the display watermarks and line
9285 * buffer allocation (CIK).
9287 void dce8_bandwidth_update(struct radeon_device *rdev)
9289 struct drm_display_mode *mode = NULL;
9290 u32 num_heads = 0, lb_size;
9293 if (!rdev->mode_info.mode_config_initialized)
9296 radeon_update_display_priority(rdev);
9298 for (i = 0; i < rdev->num_crtc; i++) {
9299 if (rdev->mode_info.crtcs[i]->base.enabled)
9302 for (i = 0; i < rdev->num_crtc; i++) {
9303 mode = &rdev->mode_info.crtcs[i]->base.mode;
9304 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9305 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9310 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9312 * @rdev: radeon_device pointer
9314 * Fetches a GPU clock counter snapshot (SI).
9315 * Returns the 64 bit clock counter snapshot.
9317 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9321 mutex_lock(&rdev->gpu_clock_mutex);
9322 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9323 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9324 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9325 mutex_unlock(&rdev->gpu_clock_mutex);
9329 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9330 u32 cntl_reg, u32 status_reg)
9333 struct atom_clock_dividers dividers;
9336 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9337 clock, false, ÷rs);
9341 tmp = RREG32_SMC(cntl_reg);
9342 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9343 tmp |= dividers.post_divider;
9344 WREG32_SMC(cntl_reg, tmp);
9346 for (i = 0; i < 100; i++) {
9347 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9357 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9361 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9365 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9369 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9372 struct atom_clock_dividers dividers;
9375 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9376 ecclk, false, ÷rs);
9380 for (i = 0; i < 100; i++) {
9381 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9388 tmp = RREG32_SMC(CG_ECLK_CNTL);
9389 tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9390 tmp |= dividers.post_divider;
9391 WREG32_SMC(CG_ECLK_CNTL, tmp);
9393 for (i = 0; i < 100; i++) {
9394 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9404 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9406 struct pci_dev *root = rdev->pdev->bus->self;
9407 int bridge_pos, gpu_pos;
9408 u32 speed_cntl, mask, current_data_rate;
9412 if (pci_is_root_bus(rdev->pdev->bus))
9415 if (radeon_pcie_gen2 == 0)
9418 if (rdev->flags & RADEON_IS_IGP)
9421 if (!(rdev->flags & RADEON_IS_PCIE))
9424 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9428 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9431 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9432 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9433 LC_CURRENT_DATA_RATE_SHIFT;
9434 if (mask & DRM_PCIE_SPEED_80) {
9435 if (current_data_rate == 2) {
9436 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9439 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9440 } else if (mask & DRM_PCIE_SPEED_50) {
9441 if (current_data_rate == 1) {
9442 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9445 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9448 bridge_pos = pci_pcie_cap(root);
9452 gpu_pos = pci_pcie_cap(rdev->pdev);
9456 if (mask & DRM_PCIE_SPEED_80) {
9457 /* re-try equalization if gen3 is not already enabled */
9458 if (current_data_rate != 2) {
9459 u16 bridge_cfg, gpu_cfg;
9460 u16 bridge_cfg2, gpu_cfg2;
9461 u32 max_lw, current_lw, tmp;
9463 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9464 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9466 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9467 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9469 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9470 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9472 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9473 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9474 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9476 if (current_lw < max_lw) {
9477 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9478 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9479 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9480 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9481 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9482 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9486 for (i = 0; i < 10; i++) {
9488 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9489 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9492 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9493 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9495 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9496 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9498 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9499 tmp |= LC_SET_QUIESCE;
9500 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9502 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9504 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9509 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9510 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9511 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9512 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9514 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9515 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9516 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9517 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9520 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9521 tmp16 &= ~((1 << 4) | (7 << 9));
9522 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9523 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9525 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9526 tmp16 &= ~((1 << 4) | (7 << 9));
9527 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9528 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9530 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9531 tmp &= ~LC_SET_QUIESCE;
9532 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9537 /* set the link speed */
9538 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9539 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9540 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9542 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9544 if (mask & DRM_PCIE_SPEED_80)
9545 tmp16 |= 3; /* gen3 */
9546 else if (mask & DRM_PCIE_SPEED_50)
9547 tmp16 |= 2; /* gen2 */
9549 tmp16 |= 1; /* gen1 */
9550 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9552 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9553 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9554 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9556 for (i = 0; i < rdev->usec_timeout; i++) {
9557 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9558 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9564 static void cik_program_aspm(struct radeon_device *rdev)
9567 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9568 bool disable_clkreq = false;
9570 if (radeon_aspm == 0)
9573 /* XXX double check IGPs */
9574 if (rdev->flags & RADEON_IS_IGP)
9577 if (!(rdev->flags & RADEON_IS_PCIE))
9580 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9581 data &= ~LC_XMIT_N_FTS_MASK;
9582 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9584 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9586 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9587 data |= LC_GO_TO_RECOVERY;
9589 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9591 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9592 data |= P_IGNORE_EDB_ERR;
9594 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9596 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9597 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9598 data |= LC_PMI_TO_L1_DIS;
9600 data |= LC_L0S_INACTIVITY(7);
9603 data |= LC_L1_INACTIVITY(7);
9604 data &= ~LC_PMI_TO_L1_DIS;
9606 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9608 if (!disable_plloff_in_l1) {
9609 bool clk_req_support;
9611 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9612 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9613 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9615 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9617 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9618 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9619 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9621 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9623 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9624 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9625 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9627 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9629 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9630 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9631 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9633 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9635 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9636 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9637 data |= LC_DYN_LANES_PWR_STATE(3);
9639 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9641 if (!disable_clkreq &&
9642 !pci_is_root_bus(rdev->pdev->bus)) {
9643 struct pci_dev *root = rdev->pdev->bus->self;
9646 clk_req_support = false;
9647 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9648 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9649 clk_req_support = true;
9651 clk_req_support = false;
9654 if (clk_req_support) {
9655 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9656 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9658 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9660 orig = data = RREG32_SMC(THM_CLK_CNTL);
9661 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9662 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9664 WREG32_SMC(THM_CLK_CNTL, data);
9666 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9667 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9668 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9670 WREG32_SMC(MISC_CLK_CTRL, data);
9672 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9673 data &= ~BCLK_AS_XCLK;
9675 WREG32_SMC(CG_CLKPIN_CNTL, data);
9677 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9678 data &= ~FORCE_BIF_REFCLK_EN;
9680 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9682 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9683 data &= ~MPLL_CLKOUT_SEL_MASK;
9684 data |= MPLL_CLKOUT_SEL(4);
9686 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9691 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9694 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9695 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9697 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9700 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9701 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9702 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9703 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9704 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9705 data &= ~LC_L0S_INACTIVITY_MASK;
9707 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);