2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
37 #define SH_MEM_CONFIG_GFX_DEFAULT \
38 ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
42 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
43 extern void r600_ih_ring_fini(struct radeon_device *rdev);
44 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
45 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
46 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
47 extern void sumo_rlc_fini(struct radeon_device *rdev);
48 extern int sumo_rlc_init(struct radeon_device *rdev);
49 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
50 extern void si_rlc_reset(struct radeon_device *rdev);
51 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
52 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
53 extern int cik_sdma_resume(struct radeon_device *rdev);
54 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
55 extern void cik_sdma_fini(struct radeon_device *rdev);
56 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
57 static void cik_rlc_stop(struct radeon_device *rdev);
58 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
59 static void cik_program_aspm(struct radeon_device *rdev);
60 static void cik_init_pg(struct radeon_device *rdev);
61 static void cik_init_cg(struct radeon_device *rdev);
62 static void cik_fini_pg(struct radeon_device *rdev);
63 static void cik_fini_cg(struct radeon_device *rdev);
64 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
68 * cik_get_allowed_info_register - fetch the register for the info ioctl
70 * @rdev: radeon_device pointer
71 * @reg: register offset in bytes
72 * @val: register value
74 * Returns 0 for success or -EINVAL for an invalid register
77 int cik_get_allowed_info_register(struct radeon_device *rdev,
89 case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
90 case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
101 * Indirect registers accessor
103 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
108 spin_lock_irqsave(&rdev->didt_idx_lock, flags);
109 WREG32(CIK_DIDT_IND_INDEX, (reg));
110 r = RREG32(CIK_DIDT_IND_DATA);
111 spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
115 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
119 spin_lock_irqsave(&rdev->didt_idx_lock, flags);
120 WREG32(CIK_DIDT_IND_INDEX, (reg));
121 WREG32(CIK_DIDT_IND_DATA, (v));
122 spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
125 /* get temperature in millidegrees */
126 int ci_get_temp(struct radeon_device *rdev)
131 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
137 actual_temp = temp & 0x1ff;
139 actual_temp = actual_temp * 1000;
144 /* get temperature in millidegrees */
145 int kv_get_temp(struct radeon_device *rdev)
150 temp = RREG32_SMC(0xC0300E0C);
153 actual_temp = (temp / 8) - 49;
157 actual_temp = actual_temp * 1000;
163 * Indirect registers accessor
165 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
170 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
171 WREG32(PCIE_INDEX, reg);
172 (void)RREG32(PCIE_INDEX);
173 r = RREG32(PCIE_DATA);
174 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
178 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
182 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
183 WREG32(PCIE_INDEX, reg);
184 (void)RREG32(PCIE_INDEX);
185 WREG32(PCIE_DATA, v);
186 (void)RREG32(PCIE_DATA);
187 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
190 static const u32 spectre_rlc_save_restore_register_list[] =
192 (0x0e00 << 16) | (0xc12c >> 2),
194 (0x0e00 << 16) | (0xc140 >> 2),
196 (0x0e00 << 16) | (0xc150 >> 2),
198 (0x0e00 << 16) | (0xc15c >> 2),
200 (0x0e00 << 16) | (0xc168 >> 2),
202 (0x0e00 << 16) | (0xc170 >> 2),
204 (0x0e00 << 16) | (0xc178 >> 2),
206 (0x0e00 << 16) | (0xc204 >> 2),
208 (0x0e00 << 16) | (0xc2b4 >> 2),
210 (0x0e00 << 16) | (0xc2b8 >> 2),
212 (0x0e00 << 16) | (0xc2bc >> 2),
214 (0x0e00 << 16) | (0xc2c0 >> 2),
216 (0x0e00 << 16) | (0x8228 >> 2),
218 (0x0e00 << 16) | (0x829c >> 2),
220 (0x0e00 << 16) | (0x869c >> 2),
222 (0x0600 << 16) | (0x98f4 >> 2),
224 (0x0e00 << 16) | (0x98f8 >> 2),
226 (0x0e00 << 16) | (0x9900 >> 2),
228 (0x0e00 << 16) | (0xc260 >> 2),
230 (0x0e00 << 16) | (0x90e8 >> 2),
232 (0x0e00 << 16) | (0x3c000 >> 2),
234 (0x0e00 << 16) | (0x3c00c >> 2),
236 (0x0e00 << 16) | (0x8c1c >> 2),
238 (0x0e00 << 16) | (0x9700 >> 2),
240 (0x0e00 << 16) | (0xcd20 >> 2),
242 (0x4e00 << 16) | (0xcd20 >> 2),
244 (0x5e00 << 16) | (0xcd20 >> 2),
246 (0x6e00 << 16) | (0xcd20 >> 2),
248 (0x7e00 << 16) | (0xcd20 >> 2),
250 (0x8e00 << 16) | (0xcd20 >> 2),
252 (0x9e00 << 16) | (0xcd20 >> 2),
254 (0xae00 << 16) | (0xcd20 >> 2),
256 (0xbe00 << 16) | (0xcd20 >> 2),
258 (0x0e00 << 16) | (0x89bc >> 2),
260 (0x0e00 << 16) | (0x8900 >> 2),
263 (0x0e00 << 16) | (0xc130 >> 2),
265 (0x0e00 << 16) | (0xc134 >> 2),
267 (0x0e00 << 16) | (0xc1fc >> 2),
269 (0x0e00 << 16) | (0xc208 >> 2),
271 (0x0e00 << 16) | (0xc264 >> 2),
273 (0x0e00 << 16) | (0xc268 >> 2),
275 (0x0e00 << 16) | (0xc26c >> 2),
277 (0x0e00 << 16) | (0xc270 >> 2),
279 (0x0e00 << 16) | (0xc274 >> 2),
281 (0x0e00 << 16) | (0xc278 >> 2),
283 (0x0e00 << 16) | (0xc27c >> 2),
285 (0x0e00 << 16) | (0xc280 >> 2),
287 (0x0e00 << 16) | (0xc284 >> 2),
289 (0x0e00 << 16) | (0xc288 >> 2),
291 (0x0e00 << 16) | (0xc28c >> 2),
293 (0x0e00 << 16) | (0xc290 >> 2),
295 (0x0e00 << 16) | (0xc294 >> 2),
297 (0x0e00 << 16) | (0xc298 >> 2),
299 (0x0e00 << 16) | (0xc29c >> 2),
301 (0x0e00 << 16) | (0xc2a0 >> 2),
303 (0x0e00 << 16) | (0xc2a4 >> 2),
305 (0x0e00 << 16) | (0xc2a8 >> 2),
307 (0x0e00 << 16) | (0xc2ac >> 2),
309 (0x0e00 << 16) | (0xc2b0 >> 2),
311 (0x0e00 << 16) | (0x301d0 >> 2),
313 (0x0e00 << 16) | (0x30238 >> 2),
315 (0x0e00 << 16) | (0x30250 >> 2),
317 (0x0e00 << 16) | (0x30254 >> 2),
319 (0x0e00 << 16) | (0x30258 >> 2),
321 (0x0e00 << 16) | (0x3025c >> 2),
323 (0x4e00 << 16) | (0xc900 >> 2),
325 (0x5e00 << 16) | (0xc900 >> 2),
327 (0x6e00 << 16) | (0xc900 >> 2),
329 (0x7e00 << 16) | (0xc900 >> 2),
331 (0x8e00 << 16) | (0xc900 >> 2),
333 (0x9e00 << 16) | (0xc900 >> 2),
335 (0xae00 << 16) | (0xc900 >> 2),
337 (0xbe00 << 16) | (0xc900 >> 2),
339 (0x4e00 << 16) | (0xc904 >> 2),
341 (0x5e00 << 16) | (0xc904 >> 2),
343 (0x6e00 << 16) | (0xc904 >> 2),
345 (0x7e00 << 16) | (0xc904 >> 2),
347 (0x8e00 << 16) | (0xc904 >> 2),
349 (0x9e00 << 16) | (0xc904 >> 2),
351 (0xae00 << 16) | (0xc904 >> 2),
353 (0xbe00 << 16) | (0xc904 >> 2),
355 (0x4e00 << 16) | (0xc908 >> 2),
357 (0x5e00 << 16) | (0xc908 >> 2),
359 (0x6e00 << 16) | (0xc908 >> 2),
361 (0x7e00 << 16) | (0xc908 >> 2),
363 (0x8e00 << 16) | (0xc908 >> 2),
365 (0x9e00 << 16) | (0xc908 >> 2),
367 (0xae00 << 16) | (0xc908 >> 2),
369 (0xbe00 << 16) | (0xc908 >> 2),
371 (0x4e00 << 16) | (0xc90c >> 2),
373 (0x5e00 << 16) | (0xc90c >> 2),
375 (0x6e00 << 16) | (0xc90c >> 2),
377 (0x7e00 << 16) | (0xc90c >> 2),
379 (0x8e00 << 16) | (0xc90c >> 2),
381 (0x9e00 << 16) | (0xc90c >> 2),
383 (0xae00 << 16) | (0xc90c >> 2),
385 (0xbe00 << 16) | (0xc90c >> 2),
387 (0x4e00 << 16) | (0xc910 >> 2),
389 (0x5e00 << 16) | (0xc910 >> 2),
391 (0x6e00 << 16) | (0xc910 >> 2),
393 (0x7e00 << 16) | (0xc910 >> 2),
395 (0x8e00 << 16) | (0xc910 >> 2),
397 (0x9e00 << 16) | (0xc910 >> 2),
399 (0xae00 << 16) | (0xc910 >> 2),
401 (0xbe00 << 16) | (0xc910 >> 2),
403 (0x0e00 << 16) | (0xc99c >> 2),
405 (0x0e00 << 16) | (0x9834 >> 2),
407 (0x0000 << 16) | (0x30f00 >> 2),
409 (0x0001 << 16) | (0x30f00 >> 2),
411 (0x0000 << 16) | (0x30f04 >> 2),
413 (0x0001 << 16) | (0x30f04 >> 2),
415 (0x0000 << 16) | (0x30f08 >> 2),
417 (0x0001 << 16) | (0x30f08 >> 2),
419 (0x0000 << 16) | (0x30f0c >> 2),
421 (0x0001 << 16) | (0x30f0c >> 2),
423 (0x0600 << 16) | (0x9b7c >> 2),
425 (0x0e00 << 16) | (0x8a14 >> 2),
427 (0x0e00 << 16) | (0x8a18 >> 2),
429 (0x0600 << 16) | (0x30a00 >> 2),
431 (0x0e00 << 16) | (0x8bf0 >> 2),
433 (0x0e00 << 16) | (0x8bcc >> 2),
435 (0x0e00 << 16) | (0x8b24 >> 2),
437 (0x0e00 << 16) | (0x30a04 >> 2),
439 (0x0600 << 16) | (0x30a10 >> 2),
441 (0x0600 << 16) | (0x30a14 >> 2),
443 (0x0600 << 16) | (0x30a18 >> 2),
445 (0x0600 << 16) | (0x30a2c >> 2),
447 (0x0e00 << 16) | (0xc700 >> 2),
449 (0x0e00 << 16) | (0xc704 >> 2),
451 (0x0e00 << 16) | (0xc708 >> 2),
453 (0x0e00 << 16) | (0xc768 >> 2),
455 (0x0400 << 16) | (0xc770 >> 2),
457 (0x0400 << 16) | (0xc774 >> 2),
459 (0x0400 << 16) | (0xc778 >> 2),
461 (0x0400 << 16) | (0xc77c >> 2),
463 (0x0400 << 16) | (0xc780 >> 2),
465 (0x0400 << 16) | (0xc784 >> 2),
467 (0x0400 << 16) | (0xc788 >> 2),
469 (0x0400 << 16) | (0xc78c >> 2),
471 (0x0400 << 16) | (0xc798 >> 2),
473 (0x0400 << 16) | (0xc79c >> 2),
475 (0x0400 << 16) | (0xc7a0 >> 2),
477 (0x0400 << 16) | (0xc7a4 >> 2),
479 (0x0400 << 16) | (0xc7a8 >> 2),
481 (0x0400 << 16) | (0xc7ac >> 2),
483 (0x0400 << 16) | (0xc7b0 >> 2),
485 (0x0400 << 16) | (0xc7b4 >> 2),
487 (0x0e00 << 16) | (0x9100 >> 2),
489 (0x0e00 << 16) | (0x3c010 >> 2),
491 (0x0e00 << 16) | (0x92a8 >> 2),
493 (0x0e00 << 16) | (0x92ac >> 2),
495 (0x0e00 << 16) | (0x92b4 >> 2),
497 (0x0e00 << 16) | (0x92b8 >> 2),
499 (0x0e00 << 16) | (0x92bc >> 2),
501 (0x0e00 << 16) | (0x92c0 >> 2),
503 (0x0e00 << 16) | (0x92c4 >> 2),
505 (0x0e00 << 16) | (0x92c8 >> 2),
507 (0x0e00 << 16) | (0x92cc >> 2),
509 (0x0e00 << 16) | (0x92d0 >> 2),
511 (0x0e00 << 16) | (0x8c00 >> 2),
513 (0x0e00 << 16) | (0x8c04 >> 2),
515 (0x0e00 << 16) | (0x8c20 >> 2),
517 (0x0e00 << 16) | (0x8c38 >> 2),
519 (0x0e00 << 16) | (0x8c3c >> 2),
521 (0x0e00 << 16) | (0xae00 >> 2),
523 (0x0e00 << 16) | (0x9604 >> 2),
525 (0x0e00 << 16) | (0xac08 >> 2),
527 (0x0e00 << 16) | (0xac0c >> 2),
529 (0x0e00 << 16) | (0xac10 >> 2),
531 (0x0e00 << 16) | (0xac14 >> 2),
533 (0x0e00 << 16) | (0xac58 >> 2),
535 (0x0e00 << 16) | (0xac68 >> 2),
537 (0x0e00 << 16) | (0xac6c >> 2),
539 (0x0e00 << 16) | (0xac70 >> 2),
541 (0x0e00 << 16) | (0xac74 >> 2),
543 (0x0e00 << 16) | (0xac78 >> 2),
545 (0x0e00 << 16) | (0xac7c >> 2),
547 (0x0e00 << 16) | (0xac80 >> 2),
549 (0x0e00 << 16) | (0xac84 >> 2),
551 (0x0e00 << 16) | (0xac88 >> 2),
553 (0x0e00 << 16) | (0xac8c >> 2),
555 (0x0e00 << 16) | (0x970c >> 2),
557 (0x0e00 << 16) | (0x9714 >> 2),
559 (0x0e00 << 16) | (0x9718 >> 2),
561 (0x0e00 << 16) | (0x971c >> 2),
563 (0x0e00 << 16) | (0x31068 >> 2),
565 (0x4e00 << 16) | (0x31068 >> 2),
567 (0x5e00 << 16) | (0x31068 >> 2),
569 (0x6e00 << 16) | (0x31068 >> 2),
571 (0x7e00 << 16) | (0x31068 >> 2),
573 (0x8e00 << 16) | (0x31068 >> 2),
575 (0x9e00 << 16) | (0x31068 >> 2),
577 (0xae00 << 16) | (0x31068 >> 2),
579 (0xbe00 << 16) | (0x31068 >> 2),
581 (0x0e00 << 16) | (0xcd10 >> 2),
583 (0x0e00 << 16) | (0xcd14 >> 2),
585 (0x0e00 << 16) | (0x88b0 >> 2),
587 (0x0e00 << 16) | (0x88b4 >> 2),
589 (0x0e00 << 16) | (0x88b8 >> 2),
591 (0x0e00 << 16) | (0x88bc >> 2),
593 (0x0400 << 16) | (0x89c0 >> 2),
595 (0x0e00 << 16) | (0x88c4 >> 2),
597 (0x0e00 << 16) | (0x88c8 >> 2),
599 (0x0e00 << 16) | (0x88d0 >> 2),
601 (0x0e00 << 16) | (0x88d4 >> 2),
603 (0x0e00 << 16) | (0x88d8 >> 2),
605 (0x0e00 << 16) | (0x8980 >> 2),
607 (0x0e00 << 16) | (0x30938 >> 2),
609 (0x0e00 << 16) | (0x3093c >> 2),
611 (0x0e00 << 16) | (0x30940 >> 2),
613 (0x0e00 << 16) | (0x89a0 >> 2),
615 (0x0e00 << 16) | (0x30900 >> 2),
617 (0x0e00 << 16) | (0x30904 >> 2),
619 (0x0e00 << 16) | (0x89b4 >> 2),
621 (0x0e00 << 16) | (0x3c210 >> 2),
623 (0x0e00 << 16) | (0x3c214 >> 2),
625 (0x0e00 << 16) | (0x3c218 >> 2),
627 (0x0e00 << 16) | (0x8904 >> 2),
630 (0x0e00 << 16) | (0x8c28 >> 2),
631 (0x0e00 << 16) | (0x8c2c >> 2),
632 (0x0e00 << 16) | (0x8c30 >> 2),
633 (0x0e00 << 16) | (0x8c34 >> 2),
634 (0x0e00 << 16) | (0x9600 >> 2),
637 static const u32 kalindi_rlc_save_restore_register_list[] =
639 (0x0e00 << 16) | (0xc12c >> 2),
641 (0x0e00 << 16) | (0xc140 >> 2),
643 (0x0e00 << 16) | (0xc150 >> 2),
645 (0x0e00 << 16) | (0xc15c >> 2),
647 (0x0e00 << 16) | (0xc168 >> 2),
649 (0x0e00 << 16) | (0xc170 >> 2),
651 (0x0e00 << 16) | (0xc204 >> 2),
653 (0x0e00 << 16) | (0xc2b4 >> 2),
655 (0x0e00 << 16) | (0xc2b8 >> 2),
657 (0x0e00 << 16) | (0xc2bc >> 2),
659 (0x0e00 << 16) | (0xc2c0 >> 2),
661 (0x0e00 << 16) | (0x8228 >> 2),
663 (0x0e00 << 16) | (0x829c >> 2),
665 (0x0e00 << 16) | (0x869c >> 2),
667 (0x0600 << 16) | (0x98f4 >> 2),
669 (0x0e00 << 16) | (0x98f8 >> 2),
671 (0x0e00 << 16) | (0x9900 >> 2),
673 (0x0e00 << 16) | (0xc260 >> 2),
675 (0x0e00 << 16) | (0x90e8 >> 2),
677 (0x0e00 << 16) | (0x3c000 >> 2),
679 (0x0e00 << 16) | (0x3c00c >> 2),
681 (0x0e00 << 16) | (0x8c1c >> 2),
683 (0x0e00 << 16) | (0x9700 >> 2),
685 (0x0e00 << 16) | (0xcd20 >> 2),
687 (0x4e00 << 16) | (0xcd20 >> 2),
689 (0x5e00 << 16) | (0xcd20 >> 2),
691 (0x6e00 << 16) | (0xcd20 >> 2),
693 (0x7e00 << 16) | (0xcd20 >> 2),
695 (0x0e00 << 16) | (0x89bc >> 2),
697 (0x0e00 << 16) | (0x8900 >> 2),
700 (0x0e00 << 16) | (0xc130 >> 2),
702 (0x0e00 << 16) | (0xc134 >> 2),
704 (0x0e00 << 16) | (0xc1fc >> 2),
706 (0x0e00 << 16) | (0xc208 >> 2),
708 (0x0e00 << 16) | (0xc264 >> 2),
710 (0x0e00 << 16) | (0xc268 >> 2),
712 (0x0e00 << 16) | (0xc26c >> 2),
714 (0x0e00 << 16) | (0xc270 >> 2),
716 (0x0e00 << 16) | (0xc274 >> 2),
718 (0x0e00 << 16) | (0xc28c >> 2),
720 (0x0e00 << 16) | (0xc290 >> 2),
722 (0x0e00 << 16) | (0xc294 >> 2),
724 (0x0e00 << 16) | (0xc298 >> 2),
726 (0x0e00 << 16) | (0xc2a0 >> 2),
728 (0x0e00 << 16) | (0xc2a4 >> 2),
730 (0x0e00 << 16) | (0xc2a8 >> 2),
732 (0x0e00 << 16) | (0xc2ac >> 2),
734 (0x0e00 << 16) | (0x301d0 >> 2),
736 (0x0e00 << 16) | (0x30238 >> 2),
738 (0x0e00 << 16) | (0x30250 >> 2),
740 (0x0e00 << 16) | (0x30254 >> 2),
742 (0x0e00 << 16) | (0x30258 >> 2),
744 (0x0e00 << 16) | (0x3025c >> 2),
746 (0x4e00 << 16) | (0xc900 >> 2),
748 (0x5e00 << 16) | (0xc900 >> 2),
750 (0x6e00 << 16) | (0xc900 >> 2),
752 (0x7e00 << 16) | (0xc900 >> 2),
754 (0x4e00 << 16) | (0xc904 >> 2),
756 (0x5e00 << 16) | (0xc904 >> 2),
758 (0x6e00 << 16) | (0xc904 >> 2),
760 (0x7e00 << 16) | (0xc904 >> 2),
762 (0x4e00 << 16) | (0xc908 >> 2),
764 (0x5e00 << 16) | (0xc908 >> 2),
766 (0x6e00 << 16) | (0xc908 >> 2),
768 (0x7e00 << 16) | (0xc908 >> 2),
770 (0x4e00 << 16) | (0xc90c >> 2),
772 (0x5e00 << 16) | (0xc90c >> 2),
774 (0x6e00 << 16) | (0xc90c >> 2),
776 (0x7e00 << 16) | (0xc90c >> 2),
778 (0x4e00 << 16) | (0xc910 >> 2),
780 (0x5e00 << 16) | (0xc910 >> 2),
782 (0x6e00 << 16) | (0xc910 >> 2),
784 (0x7e00 << 16) | (0xc910 >> 2),
786 (0x0e00 << 16) | (0xc99c >> 2),
788 (0x0e00 << 16) | (0x9834 >> 2),
790 (0x0000 << 16) | (0x30f00 >> 2),
792 (0x0000 << 16) | (0x30f04 >> 2),
794 (0x0000 << 16) | (0x30f08 >> 2),
796 (0x0000 << 16) | (0x30f0c >> 2),
798 (0x0600 << 16) | (0x9b7c >> 2),
800 (0x0e00 << 16) | (0x8a14 >> 2),
802 (0x0e00 << 16) | (0x8a18 >> 2),
804 (0x0600 << 16) | (0x30a00 >> 2),
806 (0x0e00 << 16) | (0x8bf0 >> 2),
808 (0x0e00 << 16) | (0x8bcc >> 2),
810 (0x0e00 << 16) | (0x8b24 >> 2),
812 (0x0e00 << 16) | (0x30a04 >> 2),
814 (0x0600 << 16) | (0x30a10 >> 2),
816 (0x0600 << 16) | (0x30a14 >> 2),
818 (0x0600 << 16) | (0x30a18 >> 2),
820 (0x0600 << 16) | (0x30a2c >> 2),
822 (0x0e00 << 16) | (0xc700 >> 2),
824 (0x0e00 << 16) | (0xc704 >> 2),
826 (0x0e00 << 16) | (0xc708 >> 2),
828 (0x0e00 << 16) | (0xc768 >> 2),
830 (0x0400 << 16) | (0xc770 >> 2),
832 (0x0400 << 16) | (0xc774 >> 2),
834 (0x0400 << 16) | (0xc798 >> 2),
836 (0x0400 << 16) | (0xc79c >> 2),
838 (0x0e00 << 16) | (0x9100 >> 2),
840 (0x0e00 << 16) | (0x3c010 >> 2),
842 (0x0e00 << 16) | (0x8c00 >> 2),
844 (0x0e00 << 16) | (0x8c04 >> 2),
846 (0x0e00 << 16) | (0x8c20 >> 2),
848 (0x0e00 << 16) | (0x8c38 >> 2),
850 (0x0e00 << 16) | (0x8c3c >> 2),
852 (0x0e00 << 16) | (0xae00 >> 2),
854 (0x0e00 << 16) | (0x9604 >> 2),
856 (0x0e00 << 16) | (0xac08 >> 2),
858 (0x0e00 << 16) | (0xac0c >> 2),
860 (0x0e00 << 16) | (0xac10 >> 2),
862 (0x0e00 << 16) | (0xac14 >> 2),
864 (0x0e00 << 16) | (0xac58 >> 2),
866 (0x0e00 << 16) | (0xac68 >> 2),
868 (0x0e00 << 16) | (0xac6c >> 2),
870 (0x0e00 << 16) | (0xac70 >> 2),
872 (0x0e00 << 16) | (0xac74 >> 2),
874 (0x0e00 << 16) | (0xac78 >> 2),
876 (0x0e00 << 16) | (0xac7c >> 2),
878 (0x0e00 << 16) | (0xac80 >> 2),
880 (0x0e00 << 16) | (0xac84 >> 2),
882 (0x0e00 << 16) | (0xac88 >> 2),
884 (0x0e00 << 16) | (0xac8c >> 2),
886 (0x0e00 << 16) | (0x970c >> 2),
888 (0x0e00 << 16) | (0x9714 >> 2),
890 (0x0e00 << 16) | (0x9718 >> 2),
892 (0x0e00 << 16) | (0x971c >> 2),
894 (0x0e00 << 16) | (0x31068 >> 2),
896 (0x4e00 << 16) | (0x31068 >> 2),
898 (0x5e00 << 16) | (0x31068 >> 2),
900 (0x6e00 << 16) | (0x31068 >> 2),
902 (0x7e00 << 16) | (0x31068 >> 2),
904 (0x0e00 << 16) | (0xcd10 >> 2),
906 (0x0e00 << 16) | (0xcd14 >> 2),
908 (0x0e00 << 16) | (0x88b0 >> 2),
910 (0x0e00 << 16) | (0x88b4 >> 2),
912 (0x0e00 << 16) | (0x88b8 >> 2),
914 (0x0e00 << 16) | (0x88bc >> 2),
916 (0x0400 << 16) | (0x89c0 >> 2),
918 (0x0e00 << 16) | (0x88c4 >> 2),
920 (0x0e00 << 16) | (0x88c8 >> 2),
922 (0x0e00 << 16) | (0x88d0 >> 2),
924 (0x0e00 << 16) | (0x88d4 >> 2),
926 (0x0e00 << 16) | (0x88d8 >> 2),
928 (0x0e00 << 16) | (0x8980 >> 2),
930 (0x0e00 << 16) | (0x30938 >> 2),
932 (0x0e00 << 16) | (0x3093c >> 2),
934 (0x0e00 << 16) | (0x30940 >> 2),
936 (0x0e00 << 16) | (0x89a0 >> 2),
938 (0x0e00 << 16) | (0x30900 >> 2),
940 (0x0e00 << 16) | (0x30904 >> 2),
942 (0x0e00 << 16) | (0x89b4 >> 2),
944 (0x0e00 << 16) | (0x3e1fc >> 2),
946 (0x0e00 << 16) | (0x3c210 >> 2),
948 (0x0e00 << 16) | (0x3c214 >> 2),
950 (0x0e00 << 16) | (0x3c218 >> 2),
952 (0x0e00 << 16) | (0x8904 >> 2),
955 (0x0e00 << 16) | (0x8c28 >> 2),
956 (0x0e00 << 16) | (0x8c2c >> 2),
957 (0x0e00 << 16) | (0x8c30 >> 2),
958 (0x0e00 << 16) | (0x8c34 >> 2),
959 (0x0e00 << 16) | (0x9600 >> 2),
962 static const u32 bonaire_golden_spm_registers[] =
964 0x30800, 0xe0ffffff, 0xe0000000
967 static const u32 bonaire_golden_common_registers[] =
969 0xc770, 0xffffffff, 0x00000800,
970 0xc774, 0xffffffff, 0x00000800,
971 0xc798, 0xffffffff, 0x00007fbf,
972 0xc79c, 0xffffffff, 0x00007faf
975 static const u32 bonaire_golden_registers[] =
977 0x3354, 0x00000333, 0x00000333,
978 0x3350, 0x000c0fc0, 0x00040200,
979 0x9a10, 0x00010000, 0x00058208,
980 0x3c000, 0xffff1fff, 0x00140000,
981 0x3c200, 0xfdfc0fff, 0x00000100,
982 0x3c234, 0x40000000, 0x40000200,
983 0x9830, 0xffffffff, 0x00000000,
984 0x9834, 0xf00fffff, 0x00000400,
985 0x9838, 0x0002021c, 0x00020200,
986 0xc78, 0x00000080, 0x00000000,
987 0x5bb0, 0x000000f0, 0x00000070,
988 0x5bc0, 0xf0311fff, 0x80300000,
989 0x98f8, 0x73773777, 0x12010001,
990 0x350c, 0x00810000, 0x408af000,
991 0x7030, 0x31000111, 0x00000011,
992 0x2f48, 0x73773777, 0x12010001,
993 0x220c, 0x00007fb6, 0x0021a1b1,
994 0x2210, 0x00007fb6, 0x002021b1,
995 0x2180, 0x00007fb6, 0x00002191,
996 0x2218, 0x00007fb6, 0x002121b1,
997 0x221c, 0x00007fb6, 0x002021b1,
998 0x21dc, 0x00007fb6, 0x00002191,
999 0x21e0, 0x00007fb6, 0x00002191,
1000 0x3628, 0x0000003f, 0x0000000a,
1001 0x362c, 0x0000003f, 0x0000000a,
1002 0x2ae4, 0x00073ffe, 0x000022a2,
1003 0x240c, 0x000007ff, 0x00000000,
1004 0x8a14, 0xf000003f, 0x00000007,
1005 0x8bf0, 0x00002001, 0x00000001,
1006 0x8b24, 0xffffffff, 0x00ffffff,
1007 0x30a04, 0x0000ff0f, 0x00000000,
1008 0x28a4c, 0x07ffffff, 0x06000000,
1009 0x4d8, 0x00000fff, 0x00000100,
1010 0x3e78, 0x00000001, 0x00000002,
1011 0x9100, 0x03000000, 0x0362c688,
1012 0x8c00, 0x000000ff, 0x00000001,
1013 0xe40, 0x00001fff, 0x00001fff,
1014 0x9060, 0x0000007f, 0x00000020,
1015 0x9508, 0x00010000, 0x00010000,
1016 0xac14, 0x000003ff, 0x000000f3,
1017 0xac0c, 0xffffffff, 0x00001032
1020 static const u32 bonaire_mgcg_cgcg_init[] =
1022 0xc420, 0xffffffff, 0xfffffffc,
1023 0x30800, 0xffffffff, 0xe0000000,
1024 0x3c2a0, 0xffffffff, 0x00000100,
1025 0x3c208, 0xffffffff, 0x00000100,
1026 0x3c2c0, 0xffffffff, 0xc0000100,
1027 0x3c2c8, 0xffffffff, 0xc0000100,
1028 0x3c2c4, 0xffffffff, 0xc0000100,
1029 0x55e4, 0xffffffff, 0x00600100,
1030 0x3c280, 0xffffffff, 0x00000100,
1031 0x3c214, 0xffffffff, 0x06000100,
1032 0x3c220, 0xffffffff, 0x00000100,
1033 0x3c218, 0xffffffff, 0x06000100,
1034 0x3c204, 0xffffffff, 0x00000100,
1035 0x3c2e0, 0xffffffff, 0x00000100,
1036 0x3c224, 0xffffffff, 0x00000100,
1037 0x3c200, 0xffffffff, 0x00000100,
1038 0x3c230, 0xffffffff, 0x00000100,
1039 0x3c234, 0xffffffff, 0x00000100,
1040 0x3c250, 0xffffffff, 0x00000100,
1041 0x3c254, 0xffffffff, 0x00000100,
1042 0x3c258, 0xffffffff, 0x00000100,
1043 0x3c25c, 0xffffffff, 0x00000100,
1044 0x3c260, 0xffffffff, 0x00000100,
1045 0x3c27c, 0xffffffff, 0x00000100,
1046 0x3c278, 0xffffffff, 0x00000100,
1047 0x3c210, 0xffffffff, 0x06000100,
1048 0x3c290, 0xffffffff, 0x00000100,
1049 0x3c274, 0xffffffff, 0x00000100,
1050 0x3c2b4, 0xffffffff, 0x00000100,
1051 0x3c2b0, 0xffffffff, 0x00000100,
1052 0x3c270, 0xffffffff, 0x00000100,
1053 0x30800, 0xffffffff, 0xe0000000,
1054 0x3c020, 0xffffffff, 0x00010000,
1055 0x3c024, 0xffffffff, 0x00030002,
1056 0x3c028, 0xffffffff, 0x00040007,
1057 0x3c02c, 0xffffffff, 0x00060005,
1058 0x3c030, 0xffffffff, 0x00090008,
1059 0x3c034, 0xffffffff, 0x00010000,
1060 0x3c038, 0xffffffff, 0x00030002,
1061 0x3c03c, 0xffffffff, 0x00040007,
1062 0x3c040, 0xffffffff, 0x00060005,
1063 0x3c044, 0xffffffff, 0x00090008,
1064 0x3c048, 0xffffffff, 0x00010000,
1065 0x3c04c, 0xffffffff, 0x00030002,
1066 0x3c050, 0xffffffff, 0x00040007,
1067 0x3c054, 0xffffffff, 0x00060005,
1068 0x3c058, 0xffffffff, 0x00090008,
1069 0x3c05c, 0xffffffff, 0x00010000,
1070 0x3c060, 0xffffffff, 0x00030002,
1071 0x3c064, 0xffffffff, 0x00040007,
1072 0x3c068, 0xffffffff, 0x00060005,
1073 0x3c06c, 0xffffffff, 0x00090008,
1074 0x3c070, 0xffffffff, 0x00010000,
1075 0x3c074, 0xffffffff, 0x00030002,
1076 0x3c078, 0xffffffff, 0x00040007,
1077 0x3c07c, 0xffffffff, 0x00060005,
1078 0x3c080, 0xffffffff, 0x00090008,
1079 0x3c084, 0xffffffff, 0x00010000,
1080 0x3c088, 0xffffffff, 0x00030002,
1081 0x3c08c, 0xffffffff, 0x00040007,
1082 0x3c090, 0xffffffff, 0x00060005,
1083 0x3c094, 0xffffffff, 0x00090008,
1084 0x3c098, 0xffffffff, 0x00010000,
1085 0x3c09c, 0xffffffff, 0x00030002,
1086 0x3c0a0, 0xffffffff, 0x00040007,
1087 0x3c0a4, 0xffffffff, 0x00060005,
1088 0x3c0a8, 0xffffffff, 0x00090008,
1089 0x3c000, 0xffffffff, 0x96e00200,
1090 0x8708, 0xffffffff, 0x00900100,
1091 0xc424, 0xffffffff, 0x0020003f,
1092 0x38, 0xffffffff, 0x0140001c,
1093 0x3c, 0x000f0000, 0x000f0000,
1094 0x220, 0xffffffff, 0xC060000C,
1095 0x224, 0xc0000fff, 0x00000100,
1096 0xf90, 0xffffffff, 0x00000100,
1097 0xf98, 0x00000101, 0x00000000,
1098 0x20a8, 0xffffffff, 0x00000104,
1099 0x55e4, 0xff000fff, 0x00000100,
1100 0x30cc, 0xc0000fff, 0x00000104,
1101 0xc1e4, 0x00000001, 0x00000001,
1102 0xd00c, 0xff000ff0, 0x00000100,
1103 0xd80c, 0xff000ff0, 0x00000100
1106 static const u32 spectre_golden_spm_registers[] =
1108 0x30800, 0xe0ffffff, 0xe0000000
1111 static const u32 spectre_golden_common_registers[] =
1113 0xc770, 0xffffffff, 0x00000800,
1114 0xc774, 0xffffffff, 0x00000800,
1115 0xc798, 0xffffffff, 0x00007fbf,
1116 0xc79c, 0xffffffff, 0x00007faf
1119 static const u32 spectre_golden_registers[] =
1121 0x3c000, 0xffff1fff, 0x96940200,
1122 0x3c00c, 0xffff0001, 0xff000000,
1123 0x3c200, 0xfffc0fff, 0x00000100,
1124 0x6ed8, 0x00010101, 0x00010000,
1125 0x9834, 0xf00fffff, 0x00000400,
1126 0x9838, 0xfffffffc, 0x00020200,
1127 0x5bb0, 0x000000f0, 0x00000070,
1128 0x5bc0, 0xf0311fff, 0x80300000,
1129 0x98f8, 0x73773777, 0x12010001,
1130 0x9b7c, 0x00ff0000, 0x00fc0000,
1131 0x2f48, 0x73773777, 0x12010001,
1132 0x8a14, 0xf000003f, 0x00000007,
1133 0x8b24, 0xffffffff, 0x00ffffff,
1134 0x28350, 0x3f3f3fff, 0x00000082,
1135 0x28354, 0x0000003f, 0x00000000,
1136 0x3e78, 0x00000001, 0x00000002,
1137 0x913c, 0xffff03df, 0x00000004,
1138 0xc768, 0x00000008, 0x00000008,
1139 0x8c00, 0x000008ff, 0x00000800,
1140 0x9508, 0x00010000, 0x00010000,
1141 0xac0c, 0xffffffff, 0x54763210,
1142 0x214f8, 0x01ff01ff, 0x00000002,
1143 0x21498, 0x007ff800, 0x00200000,
1144 0x2015c, 0xffffffff, 0x00000f40,
1145 0x30934, 0xffffffff, 0x00000001
1148 static const u32 spectre_mgcg_cgcg_init[] =
1150 0xc420, 0xffffffff, 0xfffffffc,
1151 0x30800, 0xffffffff, 0xe0000000,
1152 0x3c2a0, 0xffffffff, 0x00000100,
1153 0x3c208, 0xffffffff, 0x00000100,
1154 0x3c2c0, 0xffffffff, 0x00000100,
1155 0x3c2c8, 0xffffffff, 0x00000100,
1156 0x3c2c4, 0xffffffff, 0x00000100,
1157 0x55e4, 0xffffffff, 0x00600100,
1158 0x3c280, 0xffffffff, 0x00000100,
1159 0x3c214, 0xffffffff, 0x06000100,
1160 0x3c220, 0xffffffff, 0x00000100,
1161 0x3c218, 0xffffffff, 0x06000100,
1162 0x3c204, 0xffffffff, 0x00000100,
1163 0x3c2e0, 0xffffffff, 0x00000100,
1164 0x3c224, 0xffffffff, 0x00000100,
1165 0x3c200, 0xffffffff, 0x00000100,
1166 0x3c230, 0xffffffff, 0x00000100,
1167 0x3c234, 0xffffffff, 0x00000100,
1168 0x3c250, 0xffffffff, 0x00000100,
1169 0x3c254, 0xffffffff, 0x00000100,
1170 0x3c258, 0xffffffff, 0x00000100,
1171 0x3c25c, 0xffffffff, 0x00000100,
1172 0x3c260, 0xffffffff, 0x00000100,
1173 0x3c27c, 0xffffffff, 0x00000100,
1174 0x3c278, 0xffffffff, 0x00000100,
1175 0x3c210, 0xffffffff, 0x06000100,
1176 0x3c290, 0xffffffff, 0x00000100,
1177 0x3c274, 0xffffffff, 0x00000100,
1178 0x3c2b4, 0xffffffff, 0x00000100,
1179 0x3c2b0, 0xffffffff, 0x00000100,
1180 0x3c270, 0xffffffff, 0x00000100,
1181 0x30800, 0xffffffff, 0xe0000000,
1182 0x3c020, 0xffffffff, 0x00010000,
1183 0x3c024, 0xffffffff, 0x00030002,
1184 0x3c028, 0xffffffff, 0x00040007,
1185 0x3c02c, 0xffffffff, 0x00060005,
1186 0x3c030, 0xffffffff, 0x00090008,
1187 0x3c034, 0xffffffff, 0x00010000,
1188 0x3c038, 0xffffffff, 0x00030002,
1189 0x3c03c, 0xffffffff, 0x00040007,
1190 0x3c040, 0xffffffff, 0x00060005,
1191 0x3c044, 0xffffffff, 0x00090008,
1192 0x3c048, 0xffffffff, 0x00010000,
1193 0x3c04c, 0xffffffff, 0x00030002,
1194 0x3c050, 0xffffffff, 0x00040007,
1195 0x3c054, 0xffffffff, 0x00060005,
1196 0x3c058, 0xffffffff, 0x00090008,
1197 0x3c05c, 0xffffffff, 0x00010000,
1198 0x3c060, 0xffffffff, 0x00030002,
1199 0x3c064, 0xffffffff, 0x00040007,
1200 0x3c068, 0xffffffff, 0x00060005,
1201 0x3c06c, 0xffffffff, 0x00090008,
1202 0x3c070, 0xffffffff, 0x00010000,
1203 0x3c074, 0xffffffff, 0x00030002,
1204 0x3c078, 0xffffffff, 0x00040007,
1205 0x3c07c, 0xffffffff, 0x00060005,
1206 0x3c080, 0xffffffff, 0x00090008,
1207 0x3c084, 0xffffffff, 0x00010000,
1208 0x3c088, 0xffffffff, 0x00030002,
1209 0x3c08c, 0xffffffff, 0x00040007,
1210 0x3c090, 0xffffffff, 0x00060005,
1211 0x3c094, 0xffffffff, 0x00090008,
1212 0x3c098, 0xffffffff, 0x00010000,
1213 0x3c09c, 0xffffffff, 0x00030002,
1214 0x3c0a0, 0xffffffff, 0x00040007,
1215 0x3c0a4, 0xffffffff, 0x00060005,
1216 0x3c0a8, 0xffffffff, 0x00090008,
1217 0x3c0ac, 0xffffffff, 0x00010000,
1218 0x3c0b0, 0xffffffff, 0x00030002,
1219 0x3c0b4, 0xffffffff, 0x00040007,
1220 0x3c0b8, 0xffffffff, 0x00060005,
1221 0x3c0bc, 0xffffffff, 0x00090008,
1222 0x3c000, 0xffffffff, 0x96e00200,
1223 0x8708, 0xffffffff, 0x00900100,
1224 0xc424, 0xffffffff, 0x0020003f,
1225 0x38, 0xffffffff, 0x0140001c,
1226 0x3c, 0x000f0000, 0x000f0000,
1227 0x220, 0xffffffff, 0xC060000C,
1228 0x224, 0xc0000fff, 0x00000100,
1229 0xf90, 0xffffffff, 0x00000100,
1230 0xf98, 0x00000101, 0x00000000,
1231 0x20a8, 0xffffffff, 0x00000104,
1232 0x55e4, 0xff000fff, 0x00000100,
1233 0x30cc, 0xc0000fff, 0x00000104,
1234 0xc1e4, 0x00000001, 0x00000001,
1235 0xd00c, 0xff000ff0, 0x00000100,
1236 0xd80c, 0xff000ff0, 0x00000100
1239 static const u32 kalindi_golden_spm_registers[] =
1241 0x30800, 0xe0ffffff, 0xe0000000
1244 static const u32 kalindi_golden_common_registers[] =
1246 0xc770, 0xffffffff, 0x00000800,
1247 0xc774, 0xffffffff, 0x00000800,
1248 0xc798, 0xffffffff, 0x00007fbf,
1249 0xc79c, 0xffffffff, 0x00007faf
1252 static const u32 kalindi_golden_registers[] =
1254 0x3c000, 0xffffdfff, 0x6e944040,
1255 0x55e4, 0xff607fff, 0xfc000100,
1256 0x3c220, 0xff000fff, 0x00000100,
1257 0x3c224, 0xff000fff, 0x00000100,
1258 0x3c200, 0xfffc0fff, 0x00000100,
1259 0x6ed8, 0x00010101, 0x00010000,
1260 0x9830, 0xffffffff, 0x00000000,
1261 0x9834, 0xf00fffff, 0x00000400,
1262 0x5bb0, 0x000000f0, 0x00000070,
1263 0x5bc0, 0xf0311fff, 0x80300000,
1264 0x98f8, 0x73773777, 0x12010001,
1265 0x98fc, 0xffffffff, 0x00000010,
1266 0x9b7c, 0x00ff0000, 0x00fc0000,
1267 0x8030, 0x00001f0f, 0x0000100a,
1268 0x2f48, 0x73773777, 0x12010001,
1269 0x2408, 0x000fffff, 0x000c007f,
1270 0x8a14, 0xf000003f, 0x00000007,
1271 0x8b24, 0x3fff3fff, 0x00ffcfff,
1272 0x30a04, 0x0000ff0f, 0x00000000,
1273 0x28a4c, 0x07ffffff, 0x06000000,
1274 0x4d8, 0x00000fff, 0x00000100,
1275 0x3e78, 0x00000001, 0x00000002,
1276 0xc768, 0x00000008, 0x00000008,
1277 0x8c00, 0x000000ff, 0x00000003,
1278 0x214f8, 0x01ff01ff, 0x00000002,
1279 0x21498, 0x007ff800, 0x00200000,
1280 0x2015c, 0xffffffff, 0x00000f40,
1281 0x88c4, 0x001f3ae3, 0x00000082,
1282 0x88d4, 0x0000001f, 0x00000010,
1283 0x30934, 0xffffffff, 0x00000000
1286 static const u32 kalindi_mgcg_cgcg_init[] =
1288 0xc420, 0xffffffff, 0xfffffffc,
1289 0x30800, 0xffffffff, 0xe0000000,
1290 0x3c2a0, 0xffffffff, 0x00000100,
1291 0x3c208, 0xffffffff, 0x00000100,
1292 0x3c2c0, 0xffffffff, 0x00000100,
1293 0x3c2c8, 0xffffffff, 0x00000100,
1294 0x3c2c4, 0xffffffff, 0x00000100,
1295 0x55e4, 0xffffffff, 0x00600100,
1296 0x3c280, 0xffffffff, 0x00000100,
1297 0x3c214, 0xffffffff, 0x06000100,
1298 0x3c220, 0xffffffff, 0x00000100,
1299 0x3c218, 0xffffffff, 0x06000100,
1300 0x3c204, 0xffffffff, 0x00000100,
1301 0x3c2e0, 0xffffffff, 0x00000100,
1302 0x3c224, 0xffffffff, 0x00000100,
1303 0x3c200, 0xffffffff, 0x00000100,
1304 0x3c230, 0xffffffff, 0x00000100,
1305 0x3c234, 0xffffffff, 0x00000100,
1306 0x3c250, 0xffffffff, 0x00000100,
1307 0x3c254, 0xffffffff, 0x00000100,
1308 0x3c258, 0xffffffff, 0x00000100,
1309 0x3c25c, 0xffffffff, 0x00000100,
1310 0x3c260, 0xffffffff, 0x00000100,
1311 0x3c27c, 0xffffffff, 0x00000100,
1312 0x3c278, 0xffffffff, 0x00000100,
1313 0x3c210, 0xffffffff, 0x06000100,
1314 0x3c290, 0xffffffff, 0x00000100,
1315 0x3c274, 0xffffffff, 0x00000100,
1316 0x3c2b4, 0xffffffff, 0x00000100,
1317 0x3c2b0, 0xffffffff, 0x00000100,
1318 0x3c270, 0xffffffff, 0x00000100,
1319 0x30800, 0xffffffff, 0xe0000000,
1320 0x3c020, 0xffffffff, 0x00010000,
1321 0x3c024, 0xffffffff, 0x00030002,
1322 0x3c028, 0xffffffff, 0x00040007,
1323 0x3c02c, 0xffffffff, 0x00060005,
1324 0x3c030, 0xffffffff, 0x00090008,
1325 0x3c034, 0xffffffff, 0x00010000,
1326 0x3c038, 0xffffffff, 0x00030002,
1327 0x3c03c, 0xffffffff, 0x00040007,
1328 0x3c040, 0xffffffff, 0x00060005,
1329 0x3c044, 0xffffffff, 0x00090008,
1330 0x3c000, 0xffffffff, 0x96e00200,
1331 0x8708, 0xffffffff, 0x00900100,
1332 0xc424, 0xffffffff, 0x0020003f,
1333 0x38, 0xffffffff, 0x0140001c,
1334 0x3c, 0x000f0000, 0x000f0000,
1335 0x220, 0xffffffff, 0xC060000C,
1336 0x224, 0xc0000fff, 0x00000100,
1337 0x20a8, 0xffffffff, 0x00000104,
1338 0x55e4, 0xff000fff, 0x00000100,
1339 0x30cc, 0xc0000fff, 0x00000104,
1340 0xc1e4, 0x00000001, 0x00000001,
1341 0xd00c, 0xff000ff0, 0x00000100,
1342 0xd80c, 0xff000ff0, 0x00000100
1345 static const u32 hawaii_golden_spm_registers[] =
1347 0x30800, 0xe0ffffff, 0xe0000000
1350 static const u32 hawaii_golden_common_registers[] =
1352 0x30800, 0xffffffff, 0xe0000000,
1353 0x28350, 0xffffffff, 0x3a00161a,
1354 0x28354, 0xffffffff, 0x0000002e,
1355 0x9a10, 0xffffffff, 0x00018208,
1356 0x98f8, 0xffffffff, 0x12011003
1359 static const u32 hawaii_golden_registers[] =
1361 0x3354, 0x00000333, 0x00000333,
1362 0x9a10, 0x00010000, 0x00058208,
1363 0x9830, 0xffffffff, 0x00000000,
1364 0x9834, 0xf00fffff, 0x00000400,
1365 0x9838, 0x0002021c, 0x00020200,
1366 0xc78, 0x00000080, 0x00000000,
1367 0x5bb0, 0x000000f0, 0x00000070,
1368 0x5bc0, 0xf0311fff, 0x80300000,
1369 0x350c, 0x00810000, 0x408af000,
1370 0x7030, 0x31000111, 0x00000011,
1371 0x2f48, 0x73773777, 0x12010001,
1372 0x2120, 0x0000007f, 0x0000001b,
1373 0x21dc, 0x00007fb6, 0x00002191,
1374 0x3628, 0x0000003f, 0x0000000a,
1375 0x362c, 0x0000003f, 0x0000000a,
1376 0x2ae4, 0x00073ffe, 0x000022a2,
1377 0x240c, 0x000007ff, 0x00000000,
1378 0x8bf0, 0x00002001, 0x00000001,
1379 0x8b24, 0xffffffff, 0x00ffffff,
1380 0x30a04, 0x0000ff0f, 0x00000000,
1381 0x28a4c, 0x07ffffff, 0x06000000,
1382 0x3e78, 0x00000001, 0x00000002,
1383 0xc768, 0x00000008, 0x00000008,
1384 0xc770, 0x00000f00, 0x00000800,
1385 0xc774, 0x00000f00, 0x00000800,
1386 0xc798, 0x00ffffff, 0x00ff7fbf,
1387 0xc79c, 0x00ffffff, 0x00ff7faf,
1388 0x8c00, 0x000000ff, 0x00000800,
1389 0xe40, 0x00001fff, 0x00001fff,
1390 0x9060, 0x0000007f, 0x00000020,
1391 0x9508, 0x00010000, 0x00010000,
1392 0xae00, 0x00100000, 0x000ff07c,
1393 0xac14, 0x000003ff, 0x0000000f,
1394 0xac10, 0xffffffff, 0x7564fdec,
1395 0xac0c, 0xffffffff, 0x3120b9a8,
1396 0xac08, 0x20000000, 0x0f9c0000
1399 static const u32 hawaii_mgcg_cgcg_init[] =
1401 0xc420, 0xffffffff, 0xfffffffd,
1402 0x30800, 0xffffffff, 0xe0000000,
1403 0x3c2a0, 0xffffffff, 0x00000100,
1404 0x3c208, 0xffffffff, 0x00000100,
1405 0x3c2c0, 0xffffffff, 0x00000100,
1406 0x3c2c8, 0xffffffff, 0x00000100,
1407 0x3c2c4, 0xffffffff, 0x00000100,
1408 0x55e4, 0xffffffff, 0x00200100,
1409 0x3c280, 0xffffffff, 0x00000100,
1410 0x3c214, 0xffffffff, 0x06000100,
1411 0x3c220, 0xffffffff, 0x00000100,
1412 0x3c218, 0xffffffff, 0x06000100,
1413 0x3c204, 0xffffffff, 0x00000100,
1414 0x3c2e0, 0xffffffff, 0x00000100,
1415 0x3c224, 0xffffffff, 0x00000100,
1416 0x3c200, 0xffffffff, 0x00000100,
1417 0x3c230, 0xffffffff, 0x00000100,
1418 0x3c234, 0xffffffff, 0x00000100,
1419 0x3c250, 0xffffffff, 0x00000100,
1420 0x3c254, 0xffffffff, 0x00000100,
1421 0x3c258, 0xffffffff, 0x00000100,
1422 0x3c25c, 0xffffffff, 0x00000100,
1423 0x3c260, 0xffffffff, 0x00000100,
1424 0x3c27c, 0xffffffff, 0x00000100,
1425 0x3c278, 0xffffffff, 0x00000100,
1426 0x3c210, 0xffffffff, 0x06000100,
1427 0x3c290, 0xffffffff, 0x00000100,
1428 0x3c274, 0xffffffff, 0x00000100,
1429 0x3c2b4, 0xffffffff, 0x00000100,
1430 0x3c2b0, 0xffffffff, 0x00000100,
1431 0x3c270, 0xffffffff, 0x00000100,
1432 0x30800, 0xffffffff, 0xe0000000,
1433 0x3c020, 0xffffffff, 0x00010000,
1434 0x3c024, 0xffffffff, 0x00030002,
1435 0x3c028, 0xffffffff, 0x00040007,
1436 0x3c02c, 0xffffffff, 0x00060005,
1437 0x3c030, 0xffffffff, 0x00090008,
1438 0x3c034, 0xffffffff, 0x00010000,
1439 0x3c038, 0xffffffff, 0x00030002,
1440 0x3c03c, 0xffffffff, 0x00040007,
1441 0x3c040, 0xffffffff, 0x00060005,
1442 0x3c044, 0xffffffff, 0x00090008,
1443 0x3c048, 0xffffffff, 0x00010000,
1444 0x3c04c, 0xffffffff, 0x00030002,
1445 0x3c050, 0xffffffff, 0x00040007,
1446 0x3c054, 0xffffffff, 0x00060005,
1447 0x3c058, 0xffffffff, 0x00090008,
1448 0x3c05c, 0xffffffff, 0x00010000,
1449 0x3c060, 0xffffffff, 0x00030002,
1450 0x3c064, 0xffffffff, 0x00040007,
1451 0x3c068, 0xffffffff, 0x00060005,
1452 0x3c06c, 0xffffffff, 0x00090008,
1453 0x3c070, 0xffffffff, 0x00010000,
1454 0x3c074, 0xffffffff, 0x00030002,
1455 0x3c078, 0xffffffff, 0x00040007,
1456 0x3c07c, 0xffffffff, 0x00060005,
1457 0x3c080, 0xffffffff, 0x00090008,
1458 0x3c084, 0xffffffff, 0x00010000,
1459 0x3c088, 0xffffffff, 0x00030002,
1460 0x3c08c, 0xffffffff, 0x00040007,
1461 0x3c090, 0xffffffff, 0x00060005,
1462 0x3c094, 0xffffffff, 0x00090008,
1463 0x3c098, 0xffffffff, 0x00010000,
1464 0x3c09c, 0xffffffff, 0x00030002,
1465 0x3c0a0, 0xffffffff, 0x00040007,
1466 0x3c0a4, 0xffffffff, 0x00060005,
1467 0x3c0a8, 0xffffffff, 0x00090008,
1468 0x3c0ac, 0xffffffff, 0x00010000,
1469 0x3c0b0, 0xffffffff, 0x00030002,
1470 0x3c0b4, 0xffffffff, 0x00040007,
1471 0x3c0b8, 0xffffffff, 0x00060005,
1472 0x3c0bc, 0xffffffff, 0x00090008,
1473 0x3c0c0, 0xffffffff, 0x00010000,
1474 0x3c0c4, 0xffffffff, 0x00030002,
1475 0x3c0c8, 0xffffffff, 0x00040007,
1476 0x3c0cc, 0xffffffff, 0x00060005,
1477 0x3c0d0, 0xffffffff, 0x00090008,
1478 0x3c0d4, 0xffffffff, 0x00010000,
1479 0x3c0d8, 0xffffffff, 0x00030002,
1480 0x3c0dc, 0xffffffff, 0x00040007,
1481 0x3c0e0, 0xffffffff, 0x00060005,
1482 0x3c0e4, 0xffffffff, 0x00090008,
1483 0x3c0e8, 0xffffffff, 0x00010000,
1484 0x3c0ec, 0xffffffff, 0x00030002,
1485 0x3c0f0, 0xffffffff, 0x00040007,
1486 0x3c0f4, 0xffffffff, 0x00060005,
1487 0x3c0f8, 0xffffffff, 0x00090008,
1488 0xc318, 0xffffffff, 0x00020200,
1489 0x3350, 0xffffffff, 0x00000200,
1490 0x15c0, 0xffffffff, 0x00000400,
1491 0x55e8, 0xffffffff, 0x00000000,
1492 0x2f50, 0xffffffff, 0x00000902,
1493 0x3c000, 0xffffffff, 0x96940200,
1494 0x8708, 0xffffffff, 0x00900100,
1495 0xc424, 0xffffffff, 0x0020003f,
1496 0x38, 0xffffffff, 0x0140001c,
1497 0x3c, 0x000f0000, 0x000f0000,
1498 0x220, 0xffffffff, 0xc060000c,
1499 0x224, 0xc0000fff, 0x00000100,
1500 0xf90, 0xffffffff, 0x00000100,
1501 0xf98, 0x00000101, 0x00000000,
1502 0x20a8, 0xffffffff, 0x00000104,
1503 0x55e4, 0xff000fff, 0x00000100,
1504 0x30cc, 0xc0000fff, 0x00000104,
1505 0xc1e4, 0x00000001, 0x00000001,
1506 0xd00c, 0xff000ff0, 0x00000100,
1507 0xd80c, 0xff000ff0, 0x00000100
1510 static const u32 godavari_golden_registers[] =
1512 0x55e4, 0xff607fff, 0xfc000100,
1513 0x6ed8, 0x00010101, 0x00010000,
1514 0x9830, 0xffffffff, 0x00000000,
1515 0x98302, 0xf00fffff, 0x00000400,
1516 0x6130, 0xffffffff, 0x00010000,
1517 0x5bb0, 0x000000f0, 0x00000070,
1518 0x5bc0, 0xf0311fff, 0x80300000,
1519 0x98f8, 0x73773777, 0x12010001,
1520 0x98fc, 0xffffffff, 0x00000010,
1521 0x8030, 0x00001f0f, 0x0000100a,
1522 0x2f48, 0x73773777, 0x12010001,
1523 0x2408, 0x000fffff, 0x000c007f,
1524 0x8a14, 0xf000003f, 0x00000007,
1525 0x8b24, 0xffffffff, 0x00ff0fff,
1526 0x30a04, 0x0000ff0f, 0x00000000,
1527 0x28a4c, 0x07ffffff, 0x06000000,
1528 0x4d8, 0x00000fff, 0x00000100,
1529 0xd014, 0x00010000, 0x00810001,
1530 0xd814, 0x00010000, 0x00810001,
1531 0x3e78, 0x00000001, 0x00000002,
1532 0xc768, 0x00000008, 0x00000008,
1533 0xc770, 0x00000f00, 0x00000800,
1534 0xc774, 0x00000f00, 0x00000800,
1535 0xc798, 0x00ffffff, 0x00ff7fbf,
1536 0xc79c, 0x00ffffff, 0x00ff7faf,
1537 0x8c00, 0x000000ff, 0x00000001,
1538 0x214f8, 0x01ff01ff, 0x00000002,
1539 0x21498, 0x007ff800, 0x00200000,
1540 0x2015c, 0xffffffff, 0x00000f40,
1541 0x88c4, 0x001f3ae3, 0x00000082,
1542 0x88d4, 0x0000001f, 0x00000010,
1543 0x30934, 0xffffffff, 0x00000000
1547 static void cik_init_golden_registers(struct radeon_device *rdev)
1549 switch (rdev->family) {
1551 radeon_program_register_sequence(rdev,
1552 bonaire_mgcg_cgcg_init,
1553 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1554 radeon_program_register_sequence(rdev,
1555 bonaire_golden_registers,
1556 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1557 radeon_program_register_sequence(rdev,
1558 bonaire_golden_common_registers,
1559 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1560 radeon_program_register_sequence(rdev,
1561 bonaire_golden_spm_registers,
1562 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1565 radeon_program_register_sequence(rdev,
1566 kalindi_mgcg_cgcg_init,
1567 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1568 radeon_program_register_sequence(rdev,
1569 kalindi_golden_registers,
1570 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1571 radeon_program_register_sequence(rdev,
1572 kalindi_golden_common_registers,
1573 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1574 radeon_program_register_sequence(rdev,
1575 kalindi_golden_spm_registers,
1576 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1579 radeon_program_register_sequence(rdev,
1580 kalindi_mgcg_cgcg_init,
1581 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1582 radeon_program_register_sequence(rdev,
1583 godavari_golden_registers,
1584 (const u32)ARRAY_SIZE(godavari_golden_registers));
1585 radeon_program_register_sequence(rdev,
1586 kalindi_golden_common_registers,
1587 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1588 radeon_program_register_sequence(rdev,
1589 kalindi_golden_spm_registers,
1590 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1593 radeon_program_register_sequence(rdev,
1594 spectre_mgcg_cgcg_init,
1595 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1596 radeon_program_register_sequence(rdev,
1597 spectre_golden_registers,
1598 (const u32)ARRAY_SIZE(spectre_golden_registers));
1599 radeon_program_register_sequence(rdev,
1600 spectre_golden_common_registers,
1601 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1602 radeon_program_register_sequence(rdev,
1603 spectre_golden_spm_registers,
1604 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1607 radeon_program_register_sequence(rdev,
1608 hawaii_mgcg_cgcg_init,
1609 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1610 radeon_program_register_sequence(rdev,
1611 hawaii_golden_registers,
1612 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1613 radeon_program_register_sequence(rdev,
1614 hawaii_golden_common_registers,
1615 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1616 radeon_program_register_sequence(rdev,
1617 hawaii_golden_spm_registers,
1618 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1626 * cik_get_xclk - get the xclk
1628 * @rdev: radeon_device pointer
1630 * Returns the reference clock used by the gfx engine
1633 u32 cik_get_xclk(struct radeon_device *rdev)
1635 u32 reference_clock = rdev->clock.spll.reference_freq;
1637 if (rdev->flags & RADEON_IS_IGP) {
1638 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1639 return reference_clock / 2;
1641 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1642 return reference_clock / 4;
1644 return reference_clock;
1648 * cik_mm_rdoorbell - read a doorbell dword
1650 * @rdev: radeon_device pointer
1651 * @index: doorbell index
1653 * Returns the value in the doorbell aperture at the
1654 * requested doorbell index (CIK).
1656 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1658 if (index < rdev->doorbell.num_doorbells) {
1659 return readl(rdev->doorbell.ptr + index);
1661 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1667 * cik_mm_wdoorbell - write a doorbell dword
1669 * @rdev: radeon_device pointer
1670 * @index: doorbell index
1671 * @v: value to write
1673 * Writes @v to the doorbell aperture at the
1674 * requested doorbell index (CIK).
1676 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1678 if (index < rdev->doorbell.num_doorbells) {
1679 writel(v, rdev->doorbell.ptr + index);
1681 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1685 #define BONAIRE_IO_MC_REGS_SIZE 36
1687 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1689 {0x00000070, 0x04400000},
1690 {0x00000071, 0x80c01803},
1691 {0x00000072, 0x00004004},
1692 {0x00000073, 0x00000100},
1693 {0x00000074, 0x00ff0000},
1694 {0x00000075, 0x34000000},
1695 {0x00000076, 0x08000014},
1696 {0x00000077, 0x00cc08ec},
1697 {0x00000078, 0x00000400},
1698 {0x00000079, 0x00000000},
1699 {0x0000007a, 0x04090000},
1700 {0x0000007c, 0x00000000},
1701 {0x0000007e, 0x4408a8e8},
1702 {0x0000007f, 0x00000304},
1703 {0x00000080, 0x00000000},
1704 {0x00000082, 0x00000001},
1705 {0x00000083, 0x00000002},
1706 {0x00000084, 0xf3e4f400},
1707 {0x00000085, 0x052024e3},
1708 {0x00000087, 0x00000000},
1709 {0x00000088, 0x01000000},
1710 {0x0000008a, 0x1c0a0000},
1711 {0x0000008b, 0xff010000},
1712 {0x0000008d, 0xffffefff},
1713 {0x0000008e, 0xfff3efff},
1714 {0x0000008f, 0xfff3efbf},
1715 {0x00000092, 0xf7ffffff},
1716 {0x00000093, 0xffffff7f},
1717 {0x00000095, 0x00101101},
1718 {0x00000096, 0x00000fff},
1719 {0x00000097, 0x00116fff},
1720 {0x00000098, 0x60010000},
1721 {0x00000099, 0x10010000},
1722 {0x0000009a, 0x00006000},
1723 {0x0000009b, 0x00001000},
1724 {0x0000009f, 0x00b48000}
1727 #define HAWAII_IO_MC_REGS_SIZE 22
1729 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1731 {0x0000007d, 0x40000000},
1732 {0x0000007e, 0x40180304},
1733 {0x0000007f, 0x0000ff00},
1734 {0x00000081, 0x00000000},
1735 {0x00000083, 0x00000800},
1736 {0x00000086, 0x00000000},
1737 {0x00000087, 0x00000100},
1738 {0x00000088, 0x00020100},
1739 {0x00000089, 0x00000000},
1740 {0x0000008b, 0x00040000},
1741 {0x0000008c, 0x00000100},
1742 {0x0000008e, 0xff010000},
1743 {0x00000090, 0xffffefff},
1744 {0x00000091, 0xfff3efff},
1745 {0x00000092, 0xfff3efbf},
1746 {0x00000093, 0xf7ffffff},
1747 {0x00000094, 0xffffff7f},
1748 {0x00000095, 0x00000fff},
1749 {0x00000096, 0x00116fff},
1750 {0x00000097, 0x60010000},
1751 {0x00000098, 0x10010000},
1752 {0x0000009f, 0x00c79000}
1757 * cik_srbm_select - select specific register instances
1759 * @rdev: radeon_device pointer
1760 * @me: selected ME (micro engine)
1765 * Switches the currently active registers instances. Some
1766 * registers are instanced per VMID, others are instanced per
1767 * me/pipe/queue combination.
1769 static void cik_srbm_select(struct radeon_device *rdev,
1770 u32 me, u32 pipe, u32 queue, u32 vmid)
1772 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1775 QUEUEID(queue & 0x7));
1776 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1781 * ci_mc_load_microcode - load MC ucode into the hw
1783 * @rdev: radeon_device pointer
1785 * Load the GDDR MC ucode into the hw (CIK).
1786 * Returns 0 on success, error on failure.
1788 int ci_mc_load_microcode(struct radeon_device *rdev)
1790 const __be32 *fw_data = NULL;
1791 const __le32 *new_fw_data = NULL;
1793 u32 *io_mc_regs = NULL;
1794 const __le32 *new_io_mc_regs = NULL;
1795 int i, regs_size, ucode_size;
1801 const struct mc_firmware_header_v1_0 *hdr =
1802 (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1804 radeon_ucode_print_mc_hdr(&hdr->header);
1806 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1807 new_io_mc_regs = (const __le32 *)
1808 (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1809 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1810 new_fw_data = (const __le32 *)
1811 (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1813 ucode_size = rdev->mc_fw->size / 4;
1815 switch (rdev->family) {
1817 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1818 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1821 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1822 regs_size = HAWAII_IO_MC_REGS_SIZE;
1827 fw_data = (const __be32 *)rdev->mc_fw->data;
1830 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1833 /* reset the engine and set to writable */
1834 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1835 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1837 /* load mc io regs */
1838 for (i = 0; i < regs_size; i++) {
1840 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1841 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1843 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1844 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1848 tmp = RREG32(MC_SEQ_MISC0);
1849 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1850 WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1851 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1852 WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1853 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1856 /* load the MC ucode */
1857 for (i = 0; i < ucode_size; i++) {
1859 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1861 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1864 /* put the engine back into the active state */
1865 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1866 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1867 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1869 /* wait for training to complete */
1870 for (i = 0; i < rdev->usec_timeout; i++) {
1871 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1875 for (i = 0; i < rdev->usec_timeout; i++) {
1876 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1886 * cik_init_microcode - load ucode images from disk
1888 * @rdev: radeon_device pointer
1890 * Use the firmware interface to load the ucode images into
1891 * the driver (not loaded into hw).
1892 * Returns 0 on success, error on failure.
1894 static int cik_init_microcode(struct radeon_device *rdev)
1896 const char *chip_name;
1897 const char *new_chip_name;
1898 size_t pfp_req_size, me_req_size, ce_req_size,
1899 mec_req_size, rlc_req_size, mc_req_size = 0,
1900 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1905 bool new_smc = false;
1909 switch (rdev->family) {
1911 chip_name = "BONAIRE";
1912 if ((rdev->pdev->revision == 0x80) ||
1913 (rdev->pdev->revision == 0x81) ||
1914 (rdev->pdev->device == 0x665f))
1916 new_chip_name = "bonaire";
1917 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1918 me_req_size = CIK_ME_UCODE_SIZE * 4;
1919 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1920 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1921 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1922 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1923 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1924 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1925 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1929 chip_name = "HAWAII";
1930 if (rdev->pdev->revision == 0x80)
1932 new_chip_name = "hawaii";
1933 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1934 me_req_size = CIK_ME_UCODE_SIZE * 4;
1935 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1936 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1937 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1938 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1939 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1940 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1941 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1945 chip_name = "KAVERI";
1946 new_chip_name = "kaveri";
1947 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1948 me_req_size = CIK_ME_UCODE_SIZE * 4;
1949 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1950 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1951 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1952 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1956 chip_name = "KABINI";
1957 new_chip_name = "kabini";
1958 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1959 me_req_size = CIK_ME_UCODE_SIZE * 4;
1960 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1961 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1962 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1963 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1967 chip_name = "MULLINS";
1968 new_chip_name = "mullins";
1969 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1970 me_req_size = CIK_ME_UCODE_SIZE * 4;
1971 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1972 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1973 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1974 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1980 DRM_INFO("Loading %s Microcode\n", new_chip_name);
1982 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1983 err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1985 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1986 err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1989 if (rdev->pfp_fw->size != pfp_req_size) {
1990 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
1991 rdev->pfp_fw->size, fw_name);
1996 err = radeon_ucode_validate(rdev->pfp_fw);
1998 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2006 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2007 err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2009 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2010 err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2013 if (rdev->me_fw->size != me_req_size) {
2014 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2015 rdev->me_fw->size, fw_name);
2019 err = radeon_ucode_validate(rdev->me_fw);
2021 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2029 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2030 err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2032 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2033 err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2036 if (rdev->ce_fw->size != ce_req_size) {
2037 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2038 rdev->ce_fw->size, fw_name);
2042 err = radeon_ucode_validate(rdev->ce_fw);
2044 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2052 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2053 err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2055 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2056 err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2059 if (rdev->mec_fw->size != mec_req_size) {
2060 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2061 rdev->mec_fw->size, fw_name);
2065 err = radeon_ucode_validate(rdev->mec_fw);
2067 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2075 if (rdev->family == CHIP_KAVERI) {
2076 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2077 err = reject_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2081 err = radeon_ucode_validate(rdev->mec2_fw);
2090 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2091 err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2093 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2094 err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2097 if (rdev->rlc_fw->size != rlc_req_size) {
2098 pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2099 rdev->rlc_fw->size, fw_name);
2103 err = radeon_ucode_validate(rdev->rlc_fw);
2105 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2113 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2114 err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2116 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2117 err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2120 if (rdev->sdma_fw->size != sdma_req_size) {
2121 pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2122 rdev->sdma_fw->size, fw_name);
2126 err = radeon_ucode_validate(rdev->sdma_fw);
2128 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2136 /* No SMC, MC ucode on APUs */
2137 if (!(rdev->flags & RADEON_IS_IGP)) {
2138 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2139 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2141 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2142 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2144 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2145 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2149 if ((rdev->mc_fw->size != mc_req_size) &&
2150 (rdev->mc_fw->size != mc2_req_size)){
2151 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2152 rdev->mc_fw->size, fw_name);
2155 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2157 err = radeon_ucode_validate(rdev->mc_fw);
2159 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2168 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2170 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2171 err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2173 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2174 err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2176 pr_err("smc: error loading firmware \"%s\"\n",
2178 release_firmware(rdev->smc_fw);
2179 rdev->smc_fw = NULL;
2181 } else if (rdev->smc_fw->size != smc_req_size) {
2182 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2183 rdev->smc_fw->size, fw_name);
2187 err = radeon_ucode_validate(rdev->smc_fw);
2189 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2199 rdev->new_fw = false;
2200 } else if (new_fw < num_fw) {
2201 pr_err("ci_fw: mixing new and old firmware!\n");
2204 rdev->new_fw = true;
2210 pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2212 release_firmware(rdev->pfp_fw);
2213 rdev->pfp_fw = NULL;
2214 release_firmware(rdev->me_fw);
2216 release_firmware(rdev->ce_fw);
2218 release_firmware(rdev->mec_fw);
2219 rdev->mec_fw = NULL;
2220 release_firmware(rdev->mec2_fw);
2221 rdev->mec2_fw = NULL;
2222 release_firmware(rdev->rlc_fw);
2223 rdev->rlc_fw = NULL;
2224 release_firmware(rdev->sdma_fw);
2225 rdev->sdma_fw = NULL;
2226 release_firmware(rdev->mc_fw);
2228 release_firmware(rdev->smc_fw);
2229 rdev->smc_fw = NULL;
2238 * cik_tiling_mode_table_init - init the hw tiling table
2240 * @rdev: radeon_device pointer
2242 * Starting with SI, the tiling setup is done globally in a
2243 * set of 32 tiling modes. Rather than selecting each set of
2244 * parameters per surface as on older asics, we just select
2245 * which index in the tiling table we want to use, and the
2246 * surface uses those parameters (CIK).
2248 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2250 u32 *tile = rdev->config.cik.tile_mode_array;
2251 u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2252 const u32 num_tile_mode_states =
2253 ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2254 const u32 num_secondary_tile_mode_states =
2255 ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2256 u32 reg_offset, split_equal_to_row_size;
2257 u32 num_pipe_configs;
2258 u32 num_rbs = rdev->config.cik.max_backends_per_se *
2259 rdev->config.cik.max_shader_engines;
2261 switch (rdev->config.cik.mem_row_size_in_kb) {
2263 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2267 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2270 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2274 num_pipe_configs = rdev->config.cik.max_tile_pipes;
2275 if (num_pipe_configs > 8)
2276 num_pipe_configs = 16;
2278 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2279 tile[reg_offset] = 0;
2280 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2281 macrotile[reg_offset] = 0;
2283 switch(num_pipe_configs) {
2285 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2286 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2287 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2289 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2291 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2293 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2295 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2297 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2298 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2299 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2301 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2303 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304 TILE_SPLIT(split_equal_to_row_size));
2305 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2306 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2307 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2308 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2309 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2310 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2312 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2313 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2314 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315 TILE_SPLIT(split_equal_to_row_size));
2316 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2317 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2318 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2319 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2321 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2323 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2325 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2326 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2329 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2330 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2331 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2333 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2334 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2335 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2336 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2338 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2340 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2342 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2345 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2346 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2349 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2351 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2352 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2353 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2355 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2356 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2360 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2361 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2364 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2367 NUM_BANKS(ADDR_SURF_16_BANK));
2368 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2371 NUM_BANKS(ADDR_SURF_16_BANK));
2372 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2374 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2375 NUM_BANKS(ADDR_SURF_16_BANK));
2376 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2379 NUM_BANKS(ADDR_SURF_16_BANK));
2380 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2383 NUM_BANKS(ADDR_SURF_8_BANK));
2384 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2387 NUM_BANKS(ADDR_SURF_4_BANK));
2388 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2390 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2391 NUM_BANKS(ADDR_SURF_2_BANK));
2392 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2394 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2395 NUM_BANKS(ADDR_SURF_16_BANK));
2396 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2399 NUM_BANKS(ADDR_SURF_16_BANK));
2400 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2402 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2403 NUM_BANKS(ADDR_SURF_16_BANK));
2404 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2407 NUM_BANKS(ADDR_SURF_8_BANK));
2408 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2410 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2411 NUM_BANKS(ADDR_SURF_4_BANK));
2412 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2414 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2415 NUM_BANKS(ADDR_SURF_2_BANK));
2416 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2419 NUM_BANKS(ADDR_SURF_2_BANK));
2421 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2422 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2423 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2424 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2428 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2430 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2431 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2432 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2434 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2435 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2436 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2438 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2439 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2440 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2442 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2443 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2444 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2446 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2447 TILE_SPLIT(split_equal_to_row_size));
2448 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2450 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2451 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2452 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2453 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2454 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2455 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2456 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2457 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2458 TILE_SPLIT(split_equal_to_row_size));
2459 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2460 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2461 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2462 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2463 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2464 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2465 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2466 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2468 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2469 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2470 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2473 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2474 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2476 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2477 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2478 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2479 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2481 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2483 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2484 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2485 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2487 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2488 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2489 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2491 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2492 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2494 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2495 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2496 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2498 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2500 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2501 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2503 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2504 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2507 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510 NUM_BANKS(ADDR_SURF_16_BANK));
2511 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2514 NUM_BANKS(ADDR_SURF_16_BANK));
2515 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518 NUM_BANKS(ADDR_SURF_16_BANK));
2519 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522 NUM_BANKS(ADDR_SURF_16_BANK));
2523 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2526 NUM_BANKS(ADDR_SURF_8_BANK));
2527 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2530 NUM_BANKS(ADDR_SURF_4_BANK));
2531 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2533 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2534 NUM_BANKS(ADDR_SURF_2_BANK));
2535 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2537 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2538 NUM_BANKS(ADDR_SURF_16_BANK));
2539 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2542 NUM_BANKS(ADDR_SURF_16_BANK));
2543 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2545 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2546 NUM_BANKS(ADDR_SURF_16_BANK));
2547 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2550 NUM_BANKS(ADDR_SURF_16_BANK));
2551 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2553 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2554 NUM_BANKS(ADDR_SURF_8_BANK));
2555 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2557 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2558 NUM_BANKS(ADDR_SURF_4_BANK));
2559 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2561 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2562 NUM_BANKS(ADDR_SURF_2_BANK));
2564 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2565 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2566 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2567 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2572 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2574 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2575 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2576 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2578 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2579 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2580 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2581 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2582 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2583 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2584 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2586 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2587 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2588 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2590 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2591 TILE_SPLIT(split_equal_to_row_size));
2592 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2593 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2594 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2595 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2596 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2597 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2598 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2599 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2600 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2601 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2602 TILE_SPLIT(split_equal_to_row_size));
2603 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2604 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2605 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2606 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2607 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2608 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2609 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2610 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2611 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2612 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2613 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2614 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2615 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2616 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2617 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2618 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2619 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2621 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2622 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2623 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2625 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2627 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2628 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2629 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2630 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2631 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2632 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2633 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2635 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2636 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2637 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2638 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2639 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2640 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2641 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2642 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2644 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2645 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2646 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2648 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2649 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2651 } else if (num_rbs < 4) {
2652 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2653 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2654 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2655 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2656 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2658 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2659 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2660 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2663 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2664 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2666 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2667 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2668 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2670 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671 TILE_SPLIT(split_equal_to_row_size));
2672 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2673 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2674 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2675 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2676 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2677 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2679 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2680 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2681 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2682 TILE_SPLIT(split_equal_to_row_size));
2683 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2684 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2685 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2686 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2688 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2690 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2691 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2693 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2694 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2695 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2696 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2697 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2698 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2699 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2700 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2701 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2702 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2703 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2704 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2705 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2706 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2708 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2709 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2710 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2712 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2713 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2714 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2716 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2717 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2718 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2719 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2720 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2721 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2724 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2727 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2728 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2732 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2734 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735 NUM_BANKS(ADDR_SURF_16_BANK));
2736 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2738 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2739 NUM_BANKS(ADDR_SURF_16_BANK));
2740 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2742 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2743 NUM_BANKS(ADDR_SURF_16_BANK));
2744 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747 NUM_BANKS(ADDR_SURF_16_BANK));
2748 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2750 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2751 NUM_BANKS(ADDR_SURF_16_BANK));
2752 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2754 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2755 NUM_BANKS(ADDR_SURF_8_BANK));
2756 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2759 NUM_BANKS(ADDR_SURF_4_BANK));
2760 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2761 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2762 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2763 NUM_BANKS(ADDR_SURF_16_BANK));
2764 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2765 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2766 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2767 NUM_BANKS(ADDR_SURF_16_BANK));
2768 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2770 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771 NUM_BANKS(ADDR_SURF_16_BANK));
2772 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2774 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2775 NUM_BANKS(ADDR_SURF_16_BANK));
2776 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2778 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2779 NUM_BANKS(ADDR_SURF_16_BANK));
2780 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783 NUM_BANKS(ADDR_SURF_8_BANK));
2784 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2785 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2786 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2787 NUM_BANKS(ADDR_SURF_4_BANK));
2789 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2790 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2791 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2792 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2796 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798 PIPE_CONFIG(ADDR_SURF_P2) |
2799 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2800 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2801 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2802 PIPE_CONFIG(ADDR_SURF_P2) |
2803 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2804 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2805 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2806 PIPE_CONFIG(ADDR_SURF_P2) |
2807 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2808 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810 PIPE_CONFIG(ADDR_SURF_P2) |
2811 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2812 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2813 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2814 PIPE_CONFIG(ADDR_SURF_P2) |
2815 TILE_SPLIT(split_equal_to_row_size));
2816 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2817 PIPE_CONFIG(ADDR_SURF_P2) |
2818 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2819 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2820 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2821 PIPE_CONFIG(ADDR_SURF_P2) |
2822 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2823 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2824 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2825 PIPE_CONFIG(ADDR_SURF_P2) |
2826 TILE_SPLIT(split_equal_to_row_size));
2827 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2828 PIPE_CONFIG(ADDR_SURF_P2);
2829 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2830 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2831 PIPE_CONFIG(ADDR_SURF_P2));
2832 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2833 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2834 PIPE_CONFIG(ADDR_SURF_P2) |
2835 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2836 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2837 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2838 PIPE_CONFIG(ADDR_SURF_P2) |
2839 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2840 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2841 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2842 PIPE_CONFIG(ADDR_SURF_P2) |
2843 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2845 PIPE_CONFIG(ADDR_SURF_P2) |
2846 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2847 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2848 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2849 PIPE_CONFIG(ADDR_SURF_P2) |
2850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2852 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2853 PIPE_CONFIG(ADDR_SURF_P2) |
2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2856 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2857 PIPE_CONFIG(ADDR_SURF_P2) |
2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2859 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2860 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2861 PIPE_CONFIG(ADDR_SURF_P2));
2862 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2863 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2864 PIPE_CONFIG(ADDR_SURF_P2) |
2865 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2866 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2867 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2868 PIPE_CONFIG(ADDR_SURF_P2) |
2869 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2870 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2871 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2872 PIPE_CONFIG(ADDR_SURF_P2) |
2873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2876 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2877 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2878 NUM_BANKS(ADDR_SURF_16_BANK));
2879 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2880 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2881 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2882 NUM_BANKS(ADDR_SURF_16_BANK));
2883 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2884 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2885 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2886 NUM_BANKS(ADDR_SURF_16_BANK));
2887 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2888 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2889 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2890 NUM_BANKS(ADDR_SURF_16_BANK));
2891 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2892 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2893 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2894 NUM_BANKS(ADDR_SURF_16_BANK));
2895 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2896 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2897 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2898 NUM_BANKS(ADDR_SURF_16_BANK));
2899 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2900 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2901 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2902 NUM_BANKS(ADDR_SURF_8_BANK));
2903 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2904 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2905 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2906 NUM_BANKS(ADDR_SURF_16_BANK));
2907 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2908 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2909 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2910 NUM_BANKS(ADDR_SURF_16_BANK));
2911 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2912 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2913 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2914 NUM_BANKS(ADDR_SURF_16_BANK));
2915 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2918 NUM_BANKS(ADDR_SURF_16_BANK));
2919 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2920 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2921 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2922 NUM_BANKS(ADDR_SURF_16_BANK));
2923 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2925 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2926 NUM_BANKS(ADDR_SURF_16_BANK));
2927 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2928 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2929 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2930 NUM_BANKS(ADDR_SURF_8_BANK));
2932 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2933 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2934 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2935 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2939 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2944 * cik_select_se_sh - select which SE, SH to address
2946 * @rdev: radeon_device pointer
2947 * @se_num: shader engine to address
2948 * @sh_num: sh block to address
2950 * Select which SE, SH combinations to address. Certain
2951 * registers are instanced per SE or SH. 0xffffffff means
2952 * broadcast to all SEs or SHs (CIK).
2954 static void cik_select_se_sh(struct radeon_device *rdev,
2955 u32 se_num, u32 sh_num)
2957 u32 data = INSTANCE_BROADCAST_WRITES;
2959 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2960 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2961 else if (se_num == 0xffffffff)
2962 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2963 else if (sh_num == 0xffffffff)
2964 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2966 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2967 WREG32(GRBM_GFX_INDEX, data);
2971 * cik_create_bitmask - create a bitmask
2973 * @bit_width: length of the mask
2975 * create a variable length bit mask (CIK).
2976 * Returns the bitmask.
2978 static u32 cik_create_bitmask(u32 bit_width)
2982 for (i = 0; i < bit_width; i++) {
2990 * cik_get_rb_disabled - computes the mask of disabled RBs
2992 * @rdev: radeon_device pointer
2993 * @max_rb_num: max RBs (render backends) for the asic
2994 * @se_num: number of SEs (shader engines) for the asic
2995 * @sh_per_se: number of SH blocks per SE for the asic
2997 * Calculates the bitmask of disabled RBs (CIK).
2998 * Returns the disabled RB bitmask.
3000 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3001 u32 max_rb_num_per_se,
3006 data = RREG32(CC_RB_BACKEND_DISABLE);
3008 data &= BACKEND_DISABLE_MASK;
3011 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3013 data >>= BACKEND_DISABLE_SHIFT;
3015 mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3021 * cik_setup_rb - setup the RBs on the asic
3023 * @rdev: radeon_device pointer
3024 * @se_num: number of SEs (shader engines) for the asic
3025 * @sh_per_se: number of SH blocks per SE for the asic
3026 * @max_rb_num: max RBs (render backends) for the asic
3028 * Configures per-SE/SH RB registers (CIK).
3030 static void cik_setup_rb(struct radeon_device *rdev,
3031 u32 se_num, u32 sh_per_se,
3032 u32 max_rb_num_per_se)
3036 u32 disabled_rbs = 0;
3037 u32 enabled_rbs = 0;
3039 for (i = 0; i < se_num; i++) {
3040 for (j = 0; j < sh_per_se; j++) {
3041 cik_select_se_sh(rdev, i, j);
3042 data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3043 if (rdev->family == CHIP_HAWAII)
3044 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3046 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3049 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3052 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3053 if (!(disabled_rbs & mask))
3054 enabled_rbs |= mask;
3058 rdev->config.cik.backend_enable_mask = enabled_rbs;
3060 for (i = 0; i < se_num; i++) {
3061 cik_select_se_sh(rdev, i, 0xffffffff);
3063 for (j = 0; j < sh_per_se; j++) {
3064 switch (enabled_rbs & 3) {
3067 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3069 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3072 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3075 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3079 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3084 WREG32(PA_SC_RASTER_CONFIG, data);
3086 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3090 * cik_gpu_init - setup the 3D engine
3092 * @rdev: radeon_device pointer
3094 * Configures the 3D engine and tiling configuration
3095 * registers so that the 3D engine is usable.
3097 static void cik_gpu_init(struct radeon_device *rdev)
3099 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3100 u32 mc_shared_chmap, mc_arb_ramcfg;
3101 u32 hdp_host_path_cntl;
3105 switch (rdev->family) {
3107 rdev->config.cik.max_shader_engines = 2;
3108 rdev->config.cik.max_tile_pipes = 4;
3109 rdev->config.cik.max_cu_per_sh = 7;
3110 rdev->config.cik.max_sh_per_se = 1;
3111 rdev->config.cik.max_backends_per_se = 2;
3112 rdev->config.cik.max_texture_channel_caches = 4;
3113 rdev->config.cik.max_gprs = 256;
3114 rdev->config.cik.max_gs_threads = 32;
3115 rdev->config.cik.max_hw_contexts = 8;
3117 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3118 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3119 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3120 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3121 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3124 rdev->config.cik.max_shader_engines = 4;
3125 rdev->config.cik.max_tile_pipes = 16;
3126 rdev->config.cik.max_cu_per_sh = 11;
3127 rdev->config.cik.max_sh_per_se = 1;
3128 rdev->config.cik.max_backends_per_se = 4;
3129 rdev->config.cik.max_texture_channel_caches = 16;
3130 rdev->config.cik.max_gprs = 256;
3131 rdev->config.cik.max_gs_threads = 32;
3132 rdev->config.cik.max_hw_contexts = 8;
3134 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3135 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3136 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3137 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3138 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3141 rdev->config.cik.max_shader_engines = 1;
3142 rdev->config.cik.max_tile_pipes = 4;
3143 rdev->config.cik.max_cu_per_sh = 8;
3144 rdev->config.cik.max_backends_per_se = 2;
3145 rdev->config.cik.max_sh_per_se = 1;
3146 rdev->config.cik.max_texture_channel_caches = 4;
3147 rdev->config.cik.max_gprs = 256;
3148 rdev->config.cik.max_gs_threads = 16;
3149 rdev->config.cik.max_hw_contexts = 8;
3151 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3152 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3153 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3154 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3155 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3160 rdev->config.cik.max_shader_engines = 1;
3161 rdev->config.cik.max_tile_pipes = 2;
3162 rdev->config.cik.max_cu_per_sh = 2;
3163 rdev->config.cik.max_sh_per_se = 1;
3164 rdev->config.cik.max_backends_per_se = 1;
3165 rdev->config.cik.max_texture_channel_caches = 2;
3166 rdev->config.cik.max_gprs = 256;
3167 rdev->config.cik.max_gs_threads = 16;
3168 rdev->config.cik.max_hw_contexts = 8;
3170 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3171 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3172 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3173 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3174 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3178 /* Initialize HDP */
3179 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3180 WREG32((0x2c14 + j), 0x00000000);
3181 WREG32((0x2c18 + j), 0x00000000);
3182 WREG32((0x2c1c + j), 0x00000000);
3183 WREG32((0x2c20 + j), 0x00000000);
3184 WREG32((0x2c24 + j), 0x00000000);
3187 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3188 WREG32(SRBM_INT_CNTL, 0x1);
3189 WREG32(SRBM_INT_ACK, 0x1);
3191 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3193 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3194 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3196 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3197 rdev->config.cik.mem_max_burst_length_bytes = 256;
3198 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3199 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3200 if (rdev->config.cik.mem_row_size_in_kb > 4)
3201 rdev->config.cik.mem_row_size_in_kb = 4;
3202 /* XXX use MC settings? */
3203 rdev->config.cik.shader_engine_tile_size = 32;
3204 rdev->config.cik.num_gpus = 1;
3205 rdev->config.cik.multi_gpu_tile_size = 64;
3207 /* fix up row size */
3208 gb_addr_config &= ~ROW_SIZE_MASK;
3209 switch (rdev->config.cik.mem_row_size_in_kb) {
3212 gb_addr_config |= ROW_SIZE(0);
3215 gb_addr_config |= ROW_SIZE(1);
3218 gb_addr_config |= ROW_SIZE(2);
3222 /* setup tiling info dword. gb_addr_config is not adequate since it does
3223 * not have bank info, so create a custom tiling dword.
3224 * bits 3:0 num_pipes
3225 * bits 7:4 num_banks
3226 * bits 11:8 group_size
3227 * bits 15:12 row_size
3229 rdev->config.cik.tile_config = 0;
3230 switch (rdev->config.cik.num_tile_pipes) {
3232 rdev->config.cik.tile_config |= (0 << 0);
3235 rdev->config.cik.tile_config |= (1 << 0);
3238 rdev->config.cik.tile_config |= (2 << 0);
3242 /* XXX what about 12? */
3243 rdev->config.cik.tile_config |= (3 << 0);
3246 rdev->config.cik.tile_config |=
3247 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3248 rdev->config.cik.tile_config |=
3249 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3250 rdev->config.cik.tile_config |=
3251 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3253 WREG32(GB_ADDR_CONFIG, gb_addr_config);
3254 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3255 WREG32(DMIF_ADDR_CALC, gb_addr_config);
3256 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3257 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3258 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3259 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3260 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3262 cik_tiling_mode_table_init(rdev);
3264 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3265 rdev->config.cik.max_sh_per_se,
3266 rdev->config.cik.max_backends_per_se);
3268 rdev->config.cik.active_cus = 0;
3269 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3270 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3271 rdev->config.cik.active_cus +=
3272 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3276 /* set HW defaults for 3D engine */
3277 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3279 WREG32(SX_DEBUG_1, 0x20);
3281 WREG32(TA_CNTL_AUX, 0x00010000);
3283 tmp = RREG32(SPI_CONFIG_CNTL);
3285 WREG32(SPI_CONFIG_CNTL, tmp);
3287 WREG32(SQ_CONFIG, 1);
3289 WREG32(DB_DEBUG, 0);
3291 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3293 WREG32(DB_DEBUG2, tmp);
3295 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3297 WREG32(DB_DEBUG3, tmp);
3299 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3301 WREG32(CB_HW_CONTROL, tmp);
3303 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3305 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3306 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3307 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3308 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3310 WREG32(VGT_NUM_INSTANCES, 1);
3312 WREG32(CP_PERFMON_CNTL, 0);
3314 WREG32(SQ_CONFIG, 0);
3316 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3317 FORCE_EOV_MAX_REZ_CNT(255)));
3319 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3320 AUTO_INVLD_EN(ES_AND_GS_AUTO));
3322 WREG32(VGT_GS_VERTEX_REUSE, 16);
3323 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3325 tmp = RREG32(HDP_MISC_CNTL);
3326 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3327 WREG32(HDP_MISC_CNTL, tmp);
3329 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3330 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3332 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3333 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3339 * GPU scratch registers helpers function.
3342 * cik_scratch_init - setup driver info for CP scratch regs
3344 * @rdev: radeon_device pointer
3346 * Set up the number and offset of the CP scratch registers.
3347 * NOTE: use of CP scratch registers is a legacy inferface and
3348 * is not used by default on newer asics (r6xx+). On newer asics,
3349 * memory buffers are used for fences rather than scratch regs.
3351 static void cik_scratch_init(struct radeon_device *rdev)
3355 rdev->scratch.num_reg = 7;
3356 rdev->scratch.reg_base = SCRATCH_REG0;
3357 for (i = 0; i < rdev->scratch.num_reg; i++) {
3358 rdev->scratch.free[i] = true;
3359 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3364 * cik_ring_test - basic gfx ring test
3366 * @rdev: radeon_device pointer
3367 * @ring: radeon_ring structure holding ring information
3369 * Allocate a scratch register and write to it using the gfx ring (CIK).
3370 * Provides a basic gfx ring test to verify that the ring is working.
3371 * Used by cik_cp_gfx_resume();
3372 * Returns 0 on success, error on failure.
3374 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3381 r = radeon_scratch_get(rdev, &scratch);
3383 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3386 WREG32(scratch, 0xCAFEDEAD);
3387 r = radeon_ring_lock(rdev, ring, 3);
3389 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3390 radeon_scratch_free(rdev, scratch);
3393 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3394 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3395 radeon_ring_write(ring, 0xDEADBEEF);
3396 radeon_ring_unlock_commit(rdev, ring, false);
3398 for (i = 0; i < rdev->usec_timeout; i++) {
3399 tmp = RREG32(scratch);
3400 if (tmp == 0xDEADBEEF)
3404 if (i < rdev->usec_timeout) {
3405 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3407 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3408 ring->idx, scratch, tmp);
3411 radeon_scratch_free(rdev, scratch);
3416 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3418 * @rdev: radeon_device pointer
3419 * @ridx: radeon ring index
3421 * Emits an hdp flush on the cp.
3423 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3426 struct radeon_ring *ring = &rdev->ring[ridx];
3429 switch (ring->idx) {
3430 case CAYMAN_RING_TYPE_CP1_INDEX:
3431 case CAYMAN_RING_TYPE_CP2_INDEX:
3435 ref_and_mask = CP2 << ring->pipe;
3438 ref_and_mask = CP6 << ring->pipe;
3444 case RADEON_RING_TYPE_GFX_INDEX:
3449 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3450 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3451 WAIT_REG_MEM_FUNCTION(3) | /* == */
3452 WAIT_REG_MEM_ENGINE(1))); /* pfp */
3453 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3454 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3455 radeon_ring_write(ring, ref_and_mask);
3456 radeon_ring_write(ring, ref_and_mask);
3457 radeon_ring_write(ring, 0x20); /* poll interval */
3461 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3463 * @rdev: radeon_device pointer
3464 * @fence: radeon fence object
3466 * Emits a fence sequnce number on the gfx ring and flushes
3469 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3470 struct radeon_fence *fence)
3472 struct radeon_ring *ring = &rdev->ring[fence->ring];
3473 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3475 /* Workaround for cache flush problems. First send a dummy EOP
3476 * event down the pipe with seq one below.
3478 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3479 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3481 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3483 radeon_ring_write(ring, addr & 0xfffffffc);
3484 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3485 DATA_SEL(1) | INT_SEL(0));
3486 radeon_ring_write(ring, fence->seq - 1);
3487 radeon_ring_write(ring, 0);
3489 /* Then send the real EOP event down the pipe. */
3490 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3491 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3493 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3495 radeon_ring_write(ring, addr & 0xfffffffc);
3496 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3497 radeon_ring_write(ring, fence->seq);
3498 radeon_ring_write(ring, 0);
3502 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3504 * @rdev: radeon_device pointer
3505 * @fence: radeon fence object
3507 * Emits a fence sequnce number on the compute ring and flushes
3510 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3511 struct radeon_fence *fence)
3513 struct radeon_ring *ring = &rdev->ring[fence->ring];
3514 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3516 /* RELEASE_MEM - flush caches, send int */
3517 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3518 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3520 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3522 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3523 radeon_ring_write(ring, addr & 0xfffffffc);
3524 radeon_ring_write(ring, upper_32_bits(addr));
3525 radeon_ring_write(ring, fence->seq);
3526 radeon_ring_write(ring, 0);
3530 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3532 * @rdev: radeon_device pointer
3533 * @ring: radeon ring buffer object
3534 * @semaphore: radeon semaphore object
3535 * @emit_wait: Is this a sempahore wait?
3537 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3538 * from running ahead of semaphore waits.
3540 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3541 struct radeon_ring *ring,
3542 struct radeon_semaphore *semaphore,
3545 uint64_t addr = semaphore->gpu_addr;
3546 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3548 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3549 radeon_ring_write(ring, lower_32_bits(addr));
3550 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3552 if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3553 /* Prevent the PFP from running ahead of the semaphore wait */
3554 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3555 radeon_ring_write(ring, 0x0);
3562 * cik_copy_cpdma - copy pages using the CP DMA engine
3564 * @rdev: radeon_device pointer
3565 * @src_offset: src GPU address
3566 * @dst_offset: dst GPU address
3567 * @num_gpu_pages: number of GPU pages to xfer
3568 * @resv: reservation object to sync to
3570 * Copy GPU paging using the CP DMA engine (CIK+).
3571 * Used by the radeon ttm implementation to move pages if
3572 * registered as the asic copy callback.
3574 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3575 uint64_t src_offset, uint64_t dst_offset,
3576 unsigned num_gpu_pages,
3577 struct reservation_object *resv)
3579 struct radeon_fence *fence;
3580 struct radeon_sync sync;
3581 int ring_index = rdev->asic->copy.blit_ring_index;
3582 struct radeon_ring *ring = &rdev->ring[ring_index];
3583 u32 size_in_bytes, cur_size_in_bytes, control;
3587 radeon_sync_create(&sync);
3589 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3590 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3591 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3593 DRM_ERROR("radeon: moving bo (%d).\n", r);
3594 radeon_sync_free(rdev, &sync, NULL);
3598 radeon_sync_resv(rdev, &sync, resv, false);
3599 radeon_sync_rings(rdev, &sync, ring->idx);
3601 for (i = 0; i < num_loops; i++) {
3602 cur_size_in_bytes = size_in_bytes;
3603 if (cur_size_in_bytes > 0x1fffff)
3604 cur_size_in_bytes = 0x1fffff;
3605 size_in_bytes -= cur_size_in_bytes;
3607 if (size_in_bytes == 0)
3608 control |= PACKET3_DMA_DATA_CP_SYNC;
3609 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3610 radeon_ring_write(ring, control);
3611 radeon_ring_write(ring, lower_32_bits(src_offset));
3612 radeon_ring_write(ring, upper_32_bits(src_offset));
3613 radeon_ring_write(ring, lower_32_bits(dst_offset));
3614 radeon_ring_write(ring, upper_32_bits(dst_offset));
3615 radeon_ring_write(ring, cur_size_in_bytes);
3616 src_offset += cur_size_in_bytes;
3617 dst_offset += cur_size_in_bytes;
3620 r = radeon_fence_emit(rdev, &fence, ring->idx);
3622 radeon_ring_unlock_undo(rdev, ring);
3623 radeon_sync_free(rdev, &sync, NULL);
3627 radeon_ring_unlock_commit(rdev, ring, false);
3628 radeon_sync_free(rdev, &sync, fence);
3637 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3639 * @rdev: radeon_device pointer
3640 * @ib: radeon indirect buffer object
3642 * Emits a DE (drawing engine) or CE (constant engine) IB
3643 * on the gfx ring. IBs are usually generated by userspace
3644 * acceleration drivers and submitted to the kernel for
3645 * scheduling on the ring. This function schedules the IB
3646 * on the gfx ring for execution by the GPU.
3648 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3650 struct radeon_ring *ring = &rdev->ring[ib->ring];
3651 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3652 u32 header, control = INDIRECT_BUFFER_VALID;
3654 if (ib->is_const_ib) {
3655 /* set switch buffer packet before const IB */
3656 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3657 radeon_ring_write(ring, 0);
3659 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3662 if (ring->rptr_save_reg) {
3663 next_rptr = ring->wptr + 3 + 4;
3664 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3665 radeon_ring_write(ring, ((ring->rptr_save_reg -
3666 PACKET3_SET_UCONFIG_REG_START) >> 2));
3667 radeon_ring_write(ring, next_rptr);
3668 } else if (rdev->wb.enabled) {
3669 next_rptr = ring->wptr + 5 + 4;
3670 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3671 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3672 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3673 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3674 radeon_ring_write(ring, next_rptr);
3677 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3680 control |= ib->length_dw | (vm_id << 24);
3682 radeon_ring_write(ring, header);
3683 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3684 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3685 radeon_ring_write(ring, control);
3689 * cik_ib_test - basic gfx ring IB test
3691 * @rdev: radeon_device pointer
3692 * @ring: radeon_ring structure holding ring information
3694 * Allocate an IB and execute it on the gfx ring (CIK).
3695 * Provides a basic gfx ring test to verify that IBs are working.
3696 * Returns 0 on success, error on failure.
3698 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3700 struct radeon_ib ib;
3706 r = radeon_scratch_get(rdev, &scratch);
3708 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3711 WREG32(scratch, 0xCAFEDEAD);
3712 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3714 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3715 radeon_scratch_free(rdev, scratch);
3718 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3719 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3720 ib.ptr[2] = 0xDEADBEEF;
3722 r = radeon_ib_schedule(rdev, &ib, NULL, false);
3724 radeon_scratch_free(rdev, scratch);
3725 radeon_ib_free(rdev, &ib);
3726 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3729 r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3730 RADEON_USEC_IB_TEST_TIMEOUT));
3732 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3733 radeon_scratch_free(rdev, scratch);
3734 radeon_ib_free(rdev, &ib);
3736 } else if (r == 0) {
3737 DRM_ERROR("radeon: fence wait timed out.\n");
3738 radeon_scratch_free(rdev, scratch);
3739 radeon_ib_free(rdev, &ib);
3743 for (i = 0; i < rdev->usec_timeout; i++) {
3744 tmp = RREG32(scratch);
3745 if (tmp == 0xDEADBEEF)
3749 if (i < rdev->usec_timeout) {
3750 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3752 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3756 radeon_scratch_free(rdev, scratch);
3757 radeon_ib_free(rdev, &ib);
3763 * On CIK, gfx and compute now have independant command processors.
3766 * Gfx consists of a single ring and can process both gfx jobs and
3767 * compute jobs. The gfx CP consists of three microengines (ME):
3768 * PFP - Pre-Fetch Parser
3770 * CE - Constant Engine
3771 * The PFP and ME make up what is considered the Drawing Engine (DE).
3772 * The CE is an asynchronous engine used for updating buffer desciptors
3773 * used by the DE so that they can be loaded into cache in parallel
3774 * while the DE is processing state update packets.
3777 * The compute CP consists of two microengines (ME):
3778 * MEC1 - Compute MicroEngine 1
3779 * MEC2 - Compute MicroEngine 2
3780 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3781 * The queues are exposed to userspace and are programmed directly
3782 * by the compute runtime.
3785 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3787 * @rdev: radeon_device pointer
3788 * @enable: enable or disable the MEs
3790 * Halts or unhalts the gfx MEs.
3792 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3795 WREG32(CP_ME_CNTL, 0);
3797 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3798 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3799 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3800 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3806 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3808 * @rdev: radeon_device pointer
3810 * Loads the gfx PFP, ME, and CE ucode.
3811 * Returns 0 for success, -EINVAL if the ucode is not available.
3813 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3817 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3820 cik_cp_gfx_enable(rdev, false);
3823 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3824 (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3825 const struct gfx_firmware_header_v1_0 *ce_hdr =
3826 (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3827 const struct gfx_firmware_header_v1_0 *me_hdr =
3828 (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3829 const __le32 *fw_data;
3832 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3833 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3834 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3837 fw_data = (const __le32 *)
3838 (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3839 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3840 WREG32(CP_PFP_UCODE_ADDR, 0);
3841 for (i = 0; i < fw_size; i++)
3842 WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3843 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3846 fw_data = (const __le32 *)
3847 (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3848 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3849 WREG32(CP_CE_UCODE_ADDR, 0);
3850 for (i = 0; i < fw_size; i++)
3851 WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3852 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3855 fw_data = (const __be32 *)
3856 (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3857 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3858 WREG32(CP_ME_RAM_WADDR, 0);
3859 for (i = 0; i < fw_size; i++)
3860 WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3861 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3862 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3864 const __be32 *fw_data;
3867 fw_data = (const __be32 *)rdev->pfp_fw->data;
3868 WREG32(CP_PFP_UCODE_ADDR, 0);
3869 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3870 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3871 WREG32(CP_PFP_UCODE_ADDR, 0);
3874 fw_data = (const __be32 *)rdev->ce_fw->data;
3875 WREG32(CP_CE_UCODE_ADDR, 0);
3876 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3877 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3878 WREG32(CP_CE_UCODE_ADDR, 0);
3881 fw_data = (const __be32 *)rdev->me_fw->data;
3882 WREG32(CP_ME_RAM_WADDR, 0);
3883 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3884 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3885 WREG32(CP_ME_RAM_WADDR, 0);
3892 * cik_cp_gfx_start - start the gfx ring
3894 * @rdev: radeon_device pointer
3896 * Enables the ring and loads the clear state context and other
3897 * packets required to init the ring.
3898 * Returns 0 for success, error for failure.
3900 static int cik_cp_gfx_start(struct radeon_device *rdev)
3902 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3906 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3907 WREG32(CP_ENDIAN_SWAP, 0);
3908 WREG32(CP_DEVICE_ID, 1);
3910 cik_cp_gfx_enable(rdev, true);
3912 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3914 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3918 /* init the CE partitions. CE only used for gfx on CIK */
3919 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3920 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3921 radeon_ring_write(ring, 0x8000);
3922 radeon_ring_write(ring, 0x8000);
3924 /* setup clear context state */
3925 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3926 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3928 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3929 radeon_ring_write(ring, 0x80000000);
3930 radeon_ring_write(ring, 0x80000000);
3932 for (i = 0; i < cik_default_size; i++)
3933 radeon_ring_write(ring, cik_default_state[i]);
3935 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3936 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3938 /* set clear context state */
3939 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3940 radeon_ring_write(ring, 0);
3942 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3943 radeon_ring_write(ring, 0x00000316);
3944 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3945 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3947 radeon_ring_unlock_commit(rdev, ring, false);
3953 * cik_cp_gfx_fini - stop the gfx ring
3955 * @rdev: radeon_device pointer
3957 * Stop the gfx ring and tear down the driver ring
3960 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3962 cik_cp_gfx_enable(rdev, false);
3963 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3967 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3969 * @rdev: radeon_device pointer
3971 * Program the location and size of the gfx ring buffer
3972 * and test it to make sure it's working.
3973 * Returns 0 for success, error for failure.
3975 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3977 struct radeon_ring *ring;
3983 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3984 if (rdev->family != CHIP_HAWAII)
3985 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3987 /* Set the write pointer delay */
3988 WREG32(CP_RB_WPTR_DELAY, 0);
3990 /* set the RB to use vmid 0 */
3991 WREG32(CP_RB_VMID, 0);
3993 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3995 /* ring 0 - compute and gfx */
3996 /* Set ring buffer size */
3997 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3998 rb_bufsz = order_base_2(ring->ring_size / 8);
3999 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4001 tmp |= BUF_SWAP_32BIT;
4003 WREG32(CP_RB0_CNTL, tmp);
4005 /* Initialize the ring buffer's read and write pointers */
4006 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4008 WREG32(CP_RB0_WPTR, ring->wptr);
4010 /* set the wb address wether it's enabled or not */
4011 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4012 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4014 /* scratch register shadowing is no longer supported */
4015 WREG32(SCRATCH_UMSK, 0);
4017 if (!rdev->wb.enabled)
4018 tmp |= RB_NO_UPDATE;
4021 WREG32(CP_RB0_CNTL, tmp);
4023 rb_addr = ring->gpu_addr >> 8;
4024 WREG32(CP_RB0_BASE, rb_addr);
4025 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4027 /* start the ring */
4028 cik_cp_gfx_start(rdev);
4029 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4030 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4032 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4036 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4037 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4042 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4043 struct radeon_ring *ring)
4047 if (rdev->wb.enabled)
4048 rptr = rdev->wb.wb[ring->rptr_offs/4];
4050 rptr = RREG32(CP_RB0_RPTR);
4055 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4056 struct radeon_ring *ring)
4058 return RREG32(CP_RB0_WPTR);
4061 void cik_gfx_set_wptr(struct radeon_device *rdev,
4062 struct radeon_ring *ring)
4064 WREG32(CP_RB0_WPTR, ring->wptr);
4065 (void)RREG32(CP_RB0_WPTR);
4068 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4069 struct radeon_ring *ring)
4073 if (rdev->wb.enabled) {
4074 rptr = rdev->wb.wb[ring->rptr_offs/4];
4076 mutex_lock(&rdev->srbm_mutex);
4077 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4078 rptr = RREG32(CP_HQD_PQ_RPTR);
4079 cik_srbm_select(rdev, 0, 0, 0, 0);
4080 mutex_unlock(&rdev->srbm_mutex);
4086 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4087 struct radeon_ring *ring)
4091 if (rdev->wb.enabled) {
4092 /* XXX check if swapping is necessary on BE */
4093 wptr = rdev->wb.wb[ring->wptr_offs/4];
4095 mutex_lock(&rdev->srbm_mutex);
4096 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4097 wptr = RREG32(CP_HQD_PQ_WPTR);
4098 cik_srbm_select(rdev, 0, 0, 0, 0);
4099 mutex_unlock(&rdev->srbm_mutex);
4105 void cik_compute_set_wptr(struct radeon_device *rdev,
4106 struct radeon_ring *ring)
4108 /* XXX check if swapping is necessary on BE */
4109 rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4110 WDOORBELL32(ring->doorbell_index, ring->wptr);
4113 static void cik_compute_stop(struct radeon_device *rdev,
4114 struct radeon_ring *ring)
4118 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4119 /* Disable wptr polling. */
4120 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4121 tmp &= ~WPTR_POLL_EN;
4122 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4124 if (RREG32(CP_HQD_ACTIVE) & 1) {
4125 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4126 for (j = 0; j < rdev->usec_timeout; j++) {
4127 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4131 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4132 WREG32(CP_HQD_PQ_RPTR, 0);
4133 WREG32(CP_HQD_PQ_WPTR, 0);
4135 cik_srbm_select(rdev, 0, 0, 0, 0);
4139 * cik_cp_compute_enable - enable/disable the compute CP MEs
4141 * @rdev: radeon_device pointer
4142 * @enable: enable or disable the MEs
4144 * Halts or unhalts the compute MEs.
4146 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4149 WREG32(CP_MEC_CNTL, 0);
4152 * To make hibernation reliable we need to clear compute ring
4153 * configuration before halting the compute ring.
4155 mutex_lock(&rdev->srbm_mutex);
4156 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4157 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4158 mutex_unlock(&rdev->srbm_mutex);
4160 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4161 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4162 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4168 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4170 * @rdev: radeon_device pointer
4172 * Loads the compute MEC1&2 ucode.
4173 * Returns 0 for success, -EINVAL if the ucode is not available.
4175 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4182 cik_cp_compute_enable(rdev, false);
4185 const struct gfx_firmware_header_v1_0 *mec_hdr =
4186 (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4187 const __le32 *fw_data;
4190 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4193 fw_data = (const __le32 *)
4194 (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4195 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4196 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4197 for (i = 0; i < fw_size; i++)
4198 WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4199 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4202 if (rdev->family == CHIP_KAVERI) {
4203 const struct gfx_firmware_header_v1_0 *mec2_hdr =
4204 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4206 fw_data = (const __le32 *)
4207 (rdev->mec2_fw->data +
4208 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4209 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4210 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4211 for (i = 0; i < fw_size; i++)
4212 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4213 WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4216 const __be32 *fw_data;
4219 fw_data = (const __be32 *)rdev->mec_fw->data;
4220 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4221 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4222 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4223 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4225 if (rdev->family == CHIP_KAVERI) {
4227 fw_data = (const __be32 *)rdev->mec_fw->data;
4228 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4229 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4230 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4231 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4239 * cik_cp_compute_start - start the compute queues
4241 * @rdev: radeon_device pointer
4243 * Enable the compute queues.
4244 * Returns 0 for success, error for failure.
4246 static int cik_cp_compute_start(struct radeon_device *rdev)
4248 cik_cp_compute_enable(rdev, true);
4254 * cik_cp_compute_fini - stop the compute queues
4256 * @rdev: radeon_device pointer
4258 * Stop the compute queues and tear down the driver queue
4261 static void cik_cp_compute_fini(struct radeon_device *rdev)
4265 cik_cp_compute_enable(rdev, false);
4267 for (i = 0; i < 2; i++) {
4269 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4271 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4273 if (rdev->ring[idx].mqd_obj) {
4274 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4275 if (unlikely(r != 0))
4276 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4278 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4279 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4281 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4282 rdev->ring[idx].mqd_obj = NULL;
4287 static void cik_mec_fini(struct radeon_device *rdev)
4291 if (rdev->mec.hpd_eop_obj) {
4292 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4293 if (unlikely(r != 0))
4294 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4295 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4296 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4298 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4299 rdev->mec.hpd_eop_obj = NULL;
4303 #define MEC_HPD_SIZE 2048
4305 static int cik_mec_init(struct radeon_device *rdev)
4311 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4312 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4314 if (rdev->family == CHIP_KAVERI)
4315 rdev->mec.num_mec = 2;
4317 rdev->mec.num_mec = 1;
4318 rdev->mec.num_pipe = 4;
4319 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4321 if (rdev->mec.hpd_eop_obj == NULL) {
4322 r = radeon_bo_create(rdev,
4323 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4325 RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4326 &rdev->mec.hpd_eop_obj);
4328 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4333 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4334 if (unlikely(r != 0)) {
4338 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4339 &rdev->mec.hpd_eop_gpu_addr);
4341 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4345 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4347 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4352 /* clear memory. Not sure if this is required or not */
4353 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4355 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4356 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4361 struct hqd_registers
4363 u32 cp_mqd_base_addr;
4364 u32 cp_mqd_base_addr_hi;
4367 u32 cp_hqd_persistent_state;
4368 u32 cp_hqd_pipe_priority;
4369 u32 cp_hqd_queue_priority;
4372 u32 cp_hqd_pq_base_hi;
4374 u32 cp_hqd_pq_rptr_report_addr;
4375 u32 cp_hqd_pq_rptr_report_addr_hi;
4376 u32 cp_hqd_pq_wptr_poll_addr;
4377 u32 cp_hqd_pq_wptr_poll_addr_hi;
4378 u32 cp_hqd_pq_doorbell_control;
4380 u32 cp_hqd_pq_control;
4381 u32 cp_hqd_ib_base_addr;
4382 u32 cp_hqd_ib_base_addr_hi;
4384 u32 cp_hqd_ib_control;
4385 u32 cp_hqd_iq_timer;
4387 u32 cp_hqd_dequeue_request;
4388 u32 cp_hqd_dma_offload;
4389 u32 cp_hqd_sema_cmd;
4390 u32 cp_hqd_msg_type;
4391 u32 cp_hqd_atomic0_preop_lo;
4392 u32 cp_hqd_atomic0_preop_hi;
4393 u32 cp_hqd_atomic1_preop_lo;
4394 u32 cp_hqd_atomic1_preop_hi;
4395 u32 cp_hqd_hq_scheduler0;
4396 u32 cp_hqd_hq_scheduler1;
4403 u32 dispatch_initiator;
4407 u32 pipeline_stat_enable;
4408 u32 perf_counter_enable;
4414 u32 resource_limits;
4415 u32 static_thread_mgmt01[2];
4417 u32 static_thread_mgmt23[2];
4419 u32 thread_trace_enable;
4422 u32 vgtcs_invoke_count[2];
4423 struct hqd_registers queue_state;
4425 u32 interrupt_queue[64];
4429 * cik_cp_compute_resume - setup the compute queue registers
4431 * @rdev: radeon_device pointer
4433 * Program the compute queues and test them to make sure they
4435 * Returns 0 for success, error for failure.
4437 static int cik_cp_compute_resume(struct radeon_device *rdev)
4441 bool use_doorbell = true;
4447 struct bonaire_mqd *mqd;
4449 r = cik_cp_compute_start(rdev);
4453 /* fix up chicken bits */
4454 tmp = RREG32(CP_CPF_DEBUG);
4456 WREG32(CP_CPF_DEBUG, tmp);
4458 /* init the pipes */
4459 mutex_lock(&rdev->srbm_mutex);
4461 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4462 int me = (i < 4) ? 1 : 2;
4463 int pipe = (i < 4) ? i : (i - 4);
4465 cik_srbm_select(rdev, me, pipe, 0, 0);
4467 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4468 /* write the EOP addr */
4469 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4470 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4472 /* set the VMID assigned */
4473 WREG32(CP_HPD_EOP_VMID, 0);
4475 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4476 tmp = RREG32(CP_HPD_EOP_CONTROL);
4477 tmp &= ~EOP_SIZE_MASK;
4478 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4479 WREG32(CP_HPD_EOP_CONTROL, tmp);
4482 cik_srbm_select(rdev, 0, 0, 0, 0);
4483 mutex_unlock(&rdev->srbm_mutex);
4485 /* init the queues. Just two for now. */
4486 for (i = 0; i < 2; i++) {
4488 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4490 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4492 if (rdev->ring[idx].mqd_obj == NULL) {
4493 r = radeon_bo_create(rdev,
4494 sizeof(struct bonaire_mqd),
4496 RADEON_GEM_DOMAIN_GTT, 0, NULL,
4497 NULL, &rdev->ring[idx].mqd_obj);
4499 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4504 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4505 if (unlikely(r != 0)) {
4506 cik_cp_compute_fini(rdev);
4509 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4512 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4513 cik_cp_compute_fini(rdev);
4516 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4518 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4519 cik_cp_compute_fini(rdev);
4523 /* init the mqd struct */
4524 memset(buf, 0, sizeof(struct bonaire_mqd));
4526 mqd = (struct bonaire_mqd *)buf;
4527 mqd->header = 0xC0310800;
4528 mqd->static_thread_mgmt01[0] = 0xffffffff;
4529 mqd->static_thread_mgmt01[1] = 0xffffffff;
4530 mqd->static_thread_mgmt23[0] = 0xffffffff;
4531 mqd->static_thread_mgmt23[1] = 0xffffffff;
4533 mutex_lock(&rdev->srbm_mutex);
4534 cik_srbm_select(rdev, rdev->ring[idx].me,
4535 rdev->ring[idx].pipe,
4536 rdev->ring[idx].queue, 0);
4538 /* disable wptr polling */
4539 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4540 tmp &= ~WPTR_POLL_EN;
4541 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4543 /* enable doorbell? */
4544 mqd->queue_state.cp_hqd_pq_doorbell_control =
4545 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4547 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4549 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4550 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4551 mqd->queue_state.cp_hqd_pq_doorbell_control);
4553 /* disable the queue if it's active */
4554 mqd->queue_state.cp_hqd_dequeue_request = 0;
4555 mqd->queue_state.cp_hqd_pq_rptr = 0;
4556 mqd->queue_state.cp_hqd_pq_wptr= 0;
4557 if (RREG32(CP_HQD_ACTIVE) & 1) {
4558 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4559 for (j = 0; j < rdev->usec_timeout; j++) {
4560 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4564 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4565 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4566 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4569 /* set the pointer to the MQD */
4570 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4571 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4572 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4573 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4574 /* set MQD vmid to 0 */
4575 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4576 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4577 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4579 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4580 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4581 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4582 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4583 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4584 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4586 /* set up the HQD, this is similar to CP_RB0_CNTL */
4587 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4588 mqd->queue_state.cp_hqd_pq_control &=
4589 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4591 mqd->queue_state.cp_hqd_pq_control |=
4592 order_base_2(rdev->ring[idx].ring_size / 8);
4593 mqd->queue_state.cp_hqd_pq_control |=
4594 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4596 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4598 mqd->queue_state.cp_hqd_pq_control &=
4599 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4600 mqd->queue_state.cp_hqd_pq_control |=
4601 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4602 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4604 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4606 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4608 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4609 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4610 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4611 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4612 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4613 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4615 /* set the wb address wether it's enabled or not */
4617 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4619 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4620 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4621 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4622 upper_32_bits(wb_gpu_addr) & 0xffff;
4623 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4624 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4625 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4626 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4628 /* enable the doorbell if requested */
4630 mqd->queue_state.cp_hqd_pq_doorbell_control =
4631 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4632 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4633 mqd->queue_state.cp_hqd_pq_doorbell_control |=
4634 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4635 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4636 mqd->queue_state.cp_hqd_pq_doorbell_control &=
4637 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4640 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4642 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4643 mqd->queue_state.cp_hqd_pq_doorbell_control);
4645 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4646 rdev->ring[idx].wptr = 0;
4647 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4648 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4649 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4651 /* set the vmid for the queue */
4652 mqd->queue_state.cp_hqd_vmid = 0;
4653 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4655 /* activate the queue */
4656 mqd->queue_state.cp_hqd_active = 1;
4657 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4659 cik_srbm_select(rdev, 0, 0, 0, 0);
4660 mutex_unlock(&rdev->srbm_mutex);
4662 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4663 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4665 rdev->ring[idx].ready = true;
4666 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4668 rdev->ring[idx].ready = false;
4674 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4676 cik_cp_gfx_enable(rdev, enable);
4677 cik_cp_compute_enable(rdev, enable);
4680 static int cik_cp_load_microcode(struct radeon_device *rdev)
4684 r = cik_cp_gfx_load_microcode(rdev);
4687 r = cik_cp_compute_load_microcode(rdev);
4694 static void cik_cp_fini(struct radeon_device *rdev)
4696 cik_cp_gfx_fini(rdev);
4697 cik_cp_compute_fini(rdev);
4700 static int cik_cp_resume(struct radeon_device *rdev)
4704 cik_enable_gui_idle_interrupt(rdev, false);
4706 r = cik_cp_load_microcode(rdev);
4710 r = cik_cp_gfx_resume(rdev);
4713 r = cik_cp_compute_resume(rdev);
4717 cik_enable_gui_idle_interrupt(rdev, true);
4722 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4724 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4725 RREG32(GRBM_STATUS));
4726 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4727 RREG32(GRBM_STATUS2));
4728 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4729 RREG32(GRBM_STATUS_SE0));
4730 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4731 RREG32(GRBM_STATUS_SE1));
4732 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4733 RREG32(GRBM_STATUS_SE2));
4734 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4735 RREG32(GRBM_STATUS_SE3));
4736 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4737 RREG32(SRBM_STATUS));
4738 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4739 RREG32(SRBM_STATUS2));
4740 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4741 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4742 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4743 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4744 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4745 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4746 RREG32(CP_STALLED_STAT1));
4747 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4748 RREG32(CP_STALLED_STAT2));
4749 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4750 RREG32(CP_STALLED_STAT3));
4751 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4752 RREG32(CP_CPF_BUSY_STAT));
4753 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4754 RREG32(CP_CPF_STALLED_STAT1));
4755 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4756 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4757 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4758 RREG32(CP_CPC_STALLED_STAT1));
4759 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4763 * cik_gpu_check_soft_reset - check which blocks are busy
4765 * @rdev: radeon_device pointer
4767 * Check which blocks are busy and return the relevant reset
4768 * mask to be used by cik_gpu_soft_reset().
4769 * Returns a mask of the blocks to be reset.
4771 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4777 tmp = RREG32(GRBM_STATUS);
4778 if (tmp & (PA_BUSY | SC_BUSY |
4779 BCI_BUSY | SX_BUSY |
4780 TA_BUSY | VGT_BUSY |
4782 GDS_BUSY | SPI_BUSY |
4783 IA_BUSY | IA_BUSY_NO_DMA))
4784 reset_mask |= RADEON_RESET_GFX;
4786 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4787 reset_mask |= RADEON_RESET_CP;
4790 tmp = RREG32(GRBM_STATUS2);
4792 reset_mask |= RADEON_RESET_RLC;
4794 /* SDMA0_STATUS_REG */
4795 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4796 if (!(tmp & SDMA_IDLE))
4797 reset_mask |= RADEON_RESET_DMA;
4799 /* SDMA1_STATUS_REG */
4800 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4801 if (!(tmp & SDMA_IDLE))
4802 reset_mask |= RADEON_RESET_DMA1;
4805 tmp = RREG32(SRBM_STATUS2);
4806 if (tmp & SDMA_BUSY)
4807 reset_mask |= RADEON_RESET_DMA;
4809 if (tmp & SDMA1_BUSY)
4810 reset_mask |= RADEON_RESET_DMA1;
4813 tmp = RREG32(SRBM_STATUS);
4816 reset_mask |= RADEON_RESET_IH;
4819 reset_mask |= RADEON_RESET_SEM;
4821 if (tmp & GRBM_RQ_PENDING)
4822 reset_mask |= RADEON_RESET_GRBM;
4825 reset_mask |= RADEON_RESET_VMC;
4827 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4828 MCC_BUSY | MCD_BUSY))
4829 reset_mask |= RADEON_RESET_MC;
4831 if (evergreen_is_display_hung(rdev))
4832 reset_mask |= RADEON_RESET_DISPLAY;
4834 /* Skip MC reset as it's mostly likely not hung, just busy */
4835 if (reset_mask & RADEON_RESET_MC) {
4836 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4837 reset_mask &= ~RADEON_RESET_MC;
4844 * cik_gpu_soft_reset - soft reset GPU
4846 * @rdev: radeon_device pointer
4847 * @reset_mask: mask of which blocks to reset
4849 * Soft reset the blocks specified in @reset_mask.
4851 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4853 struct evergreen_mc_save save;
4854 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4857 if (reset_mask == 0)
4860 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4862 cik_print_gpu_status_regs(rdev);
4863 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4864 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4865 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4866 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4875 /* Disable GFX parsing/prefetching */
4876 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4878 /* Disable MEC parsing/prefetching */
4879 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4881 if (reset_mask & RADEON_RESET_DMA) {
4883 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4885 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4887 if (reset_mask & RADEON_RESET_DMA1) {
4889 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4891 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4894 evergreen_mc_stop(rdev, &save);
4895 if (evergreen_mc_wait_for_idle(rdev)) {
4896 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4899 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4900 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4902 if (reset_mask & RADEON_RESET_CP) {
4903 grbm_soft_reset |= SOFT_RESET_CP;
4905 srbm_soft_reset |= SOFT_RESET_GRBM;
4908 if (reset_mask & RADEON_RESET_DMA)
4909 srbm_soft_reset |= SOFT_RESET_SDMA;
4911 if (reset_mask & RADEON_RESET_DMA1)
4912 srbm_soft_reset |= SOFT_RESET_SDMA1;
4914 if (reset_mask & RADEON_RESET_DISPLAY)
4915 srbm_soft_reset |= SOFT_RESET_DC;
4917 if (reset_mask & RADEON_RESET_RLC)
4918 grbm_soft_reset |= SOFT_RESET_RLC;
4920 if (reset_mask & RADEON_RESET_SEM)
4921 srbm_soft_reset |= SOFT_RESET_SEM;
4923 if (reset_mask & RADEON_RESET_IH)
4924 srbm_soft_reset |= SOFT_RESET_IH;
4926 if (reset_mask & RADEON_RESET_GRBM)
4927 srbm_soft_reset |= SOFT_RESET_GRBM;
4929 if (reset_mask & RADEON_RESET_VMC)
4930 srbm_soft_reset |= SOFT_RESET_VMC;
4932 if (!(rdev->flags & RADEON_IS_IGP)) {
4933 if (reset_mask & RADEON_RESET_MC)
4934 srbm_soft_reset |= SOFT_RESET_MC;
4937 if (grbm_soft_reset) {
4938 tmp = RREG32(GRBM_SOFT_RESET);
4939 tmp |= grbm_soft_reset;
4940 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4941 WREG32(GRBM_SOFT_RESET, tmp);
4942 tmp = RREG32(GRBM_SOFT_RESET);
4946 tmp &= ~grbm_soft_reset;
4947 WREG32(GRBM_SOFT_RESET, tmp);
4948 tmp = RREG32(GRBM_SOFT_RESET);
4951 if (srbm_soft_reset) {
4952 tmp = RREG32(SRBM_SOFT_RESET);
4953 tmp |= srbm_soft_reset;
4954 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4955 WREG32(SRBM_SOFT_RESET, tmp);
4956 tmp = RREG32(SRBM_SOFT_RESET);
4960 tmp &= ~srbm_soft_reset;
4961 WREG32(SRBM_SOFT_RESET, tmp);
4962 tmp = RREG32(SRBM_SOFT_RESET);
4965 /* Wait a little for things to settle down */
4968 evergreen_mc_resume(rdev, &save);
4971 cik_print_gpu_status_regs(rdev);
4974 struct kv_reset_save_regs {
4975 u32 gmcon_reng_execute;
4980 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4981 struct kv_reset_save_regs *save)
4983 save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4984 save->gmcon_misc = RREG32(GMCON_MISC);
4985 save->gmcon_misc3 = RREG32(GMCON_MISC3);
4987 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4988 WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4989 STCTRL_STUTTER_EN));
4992 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
4993 struct kv_reset_save_regs *save)
4997 WREG32(GMCON_PGFSM_WRITE, 0);
4998 WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5000 for (i = 0; i < 5; i++)
5001 WREG32(GMCON_PGFSM_WRITE, 0);
5003 WREG32(GMCON_PGFSM_WRITE, 0);
5004 WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5006 for (i = 0; i < 5; i++)
5007 WREG32(GMCON_PGFSM_WRITE, 0);
5009 WREG32(GMCON_PGFSM_WRITE, 0x210000);
5010 WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5012 for (i = 0; i < 5; i++)
5013 WREG32(GMCON_PGFSM_WRITE, 0);
5015 WREG32(GMCON_PGFSM_WRITE, 0x21003);
5016 WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5018 for (i = 0; i < 5; i++)
5019 WREG32(GMCON_PGFSM_WRITE, 0);
5021 WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5022 WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5024 for (i = 0; i < 5; i++)
5025 WREG32(GMCON_PGFSM_WRITE, 0);
5027 WREG32(GMCON_PGFSM_WRITE, 0);
5028 WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5030 for (i = 0; i < 5; i++)
5031 WREG32(GMCON_PGFSM_WRITE, 0);
5033 WREG32(GMCON_PGFSM_WRITE, 0x420000);
5034 WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5036 for (i = 0; i < 5; i++)
5037 WREG32(GMCON_PGFSM_WRITE, 0);
5039 WREG32(GMCON_PGFSM_WRITE, 0x120202);
5040 WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5042 for (i = 0; i < 5; i++)
5043 WREG32(GMCON_PGFSM_WRITE, 0);
5045 WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5046 WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5048 for (i = 0; i < 5; i++)
5049 WREG32(GMCON_PGFSM_WRITE, 0);
5051 WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5052 WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5054 for (i = 0; i < 5; i++)
5055 WREG32(GMCON_PGFSM_WRITE, 0);
5057 WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5058 WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5060 WREG32(GMCON_MISC3, save->gmcon_misc3);
5061 WREG32(GMCON_MISC, save->gmcon_misc);
5062 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5065 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5067 struct evergreen_mc_save save;
5068 struct kv_reset_save_regs kv_save = { 0 };
5071 dev_info(rdev->dev, "GPU pci config reset\n");
5079 /* Disable GFX parsing/prefetching */
5080 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5082 /* Disable MEC parsing/prefetching */
5083 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5086 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5088 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5090 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5092 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5093 /* XXX other engines? */
5095 /* halt the rlc, disable cp internal ints */
5100 /* disable mem access */
5101 evergreen_mc_stop(rdev, &save);
5102 if (evergreen_mc_wait_for_idle(rdev)) {
5103 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5106 if (rdev->flags & RADEON_IS_IGP)
5107 kv_save_regs_for_reset(rdev, &kv_save);
5110 pci_clear_master(rdev->pdev);
5112 radeon_pci_config_reset(rdev);
5116 /* wait for asic to come out of reset */
5117 for (i = 0; i < rdev->usec_timeout; i++) {
5118 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5123 /* does asic init need to be run first??? */
5124 if (rdev->flags & RADEON_IS_IGP)
5125 kv_restore_regs_for_reset(rdev, &kv_save);
5129 * cik_asic_reset - soft reset GPU
5131 * @rdev: radeon_device pointer
5132 * @hard: force hard reset
5134 * Look up which blocks are hung and attempt
5136 * Returns 0 for success.
5138 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5143 cik_gpu_pci_config_reset(rdev);
5147 reset_mask = cik_gpu_check_soft_reset(rdev);
5150 r600_set_bios_scratch_engine_hung(rdev, true);
5152 /* try soft reset */
5153 cik_gpu_soft_reset(rdev, reset_mask);
5155 reset_mask = cik_gpu_check_soft_reset(rdev);
5157 /* try pci config reset */
5158 if (reset_mask && radeon_hard_reset)
5159 cik_gpu_pci_config_reset(rdev);
5161 reset_mask = cik_gpu_check_soft_reset(rdev);
5164 r600_set_bios_scratch_engine_hung(rdev, false);
5170 * cik_gfx_is_lockup - check if the 3D engine is locked up
5172 * @rdev: radeon_device pointer
5173 * @ring: radeon_ring structure holding ring information
5175 * Check if the 3D engine is locked up (CIK).
5176 * Returns true if the engine is locked, false if not.
5178 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5180 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5182 if (!(reset_mask & (RADEON_RESET_GFX |
5183 RADEON_RESET_COMPUTE |
5184 RADEON_RESET_CP))) {
5185 radeon_ring_lockup_update(rdev, ring);
5188 return radeon_ring_test_lockup(rdev, ring);
5193 * cik_mc_program - program the GPU memory controller
5195 * @rdev: radeon_device pointer
5197 * Set the location of vram, gart, and AGP in the GPU's
5198 * physical address space (CIK).
5200 static void cik_mc_program(struct radeon_device *rdev)
5202 struct evergreen_mc_save save;
5206 /* Initialize HDP */
5207 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5208 WREG32((0x2c14 + j), 0x00000000);
5209 WREG32((0x2c18 + j), 0x00000000);
5210 WREG32((0x2c1c + j), 0x00000000);
5211 WREG32((0x2c20 + j), 0x00000000);
5212 WREG32((0x2c24 + j), 0x00000000);
5214 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5216 evergreen_mc_stop(rdev, &save);
5217 if (radeon_mc_wait_for_idle(rdev)) {
5218 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5220 /* Lockout access through VGA aperture*/
5221 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5222 /* Update configuration */
5223 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5224 rdev->mc.vram_start >> 12);
5225 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5226 rdev->mc.vram_end >> 12);
5227 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5228 rdev->vram_scratch.gpu_addr >> 12);
5229 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5230 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5231 WREG32(MC_VM_FB_LOCATION, tmp);
5232 /* XXX double check these! */
5233 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5234 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5235 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5236 WREG32(MC_VM_AGP_BASE, 0);
5237 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5238 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5239 if (radeon_mc_wait_for_idle(rdev)) {
5240 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5242 evergreen_mc_resume(rdev, &save);
5243 /* we need to own VRAM, so turn off the VGA renderer here
5244 * to stop it overwriting our objects */
5245 rv515_vga_render_disable(rdev);
5249 * cik_mc_init - initialize the memory controller driver params
5251 * @rdev: radeon_device pointer
5253 * Look up the amount of vram, vram width, and decide how to place
5254 * vram and gart within the GPU's physical address space (CIK).
5255 * Returns 0 for success.
5257 static int cik_mc_init(struct radeon_device *rdev)
5260 int chansize, numchan;
5262 /* Get VRAM informations */
5263 rdev->mc.vram_is_ddr = true;
5264 tmp = RREG32(MC_ARB_RAMCFG);
5265 if (tmp & CHANSIZE_MASK) {
5270 tmp = RREG32(MC_SHARED_CHMAP);
5271 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5301 rdev->mc.vram_width = numchan * chansize;
5302 /* Could aper size report 0 ? */
5303 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5304 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5305 /* size in MB on si */
5306 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5307 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5308 rdev->mc.visible_vram_size = rdev->mc.aper_size;
5309 si_vram_gtt_location(rdev, &rdev->mc);
5310 radeon_update_bandwidth_info(rdev);
5317 * VMID 0 is the physical GPU addresses as used by the kernel.
5318 * VMIDs 1-15 are used for userspace clients and are handled
5319 * by the radeon vm/hsa code.
5322 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5324 * @rdev: radeon_device pointer
5326 * Flush the TLB for the VMID 0 page table (CIK).
5328 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5330 /* flush hdp cache */
5331 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5333 /* bits 0-15 are the VM contexts0-15 */
5334 WREG32(VM_INVALIDATE_REQUEST, 0x1);
5338 * cik_pcie_gart_enable - gart enable
5340 * @rdev: radeon_device pointer
5342 * This sets up the TLBs, programs the page tables for VMID0,
5343 * sets up the hw for VMIDs 1-15 which are allocated on
5344 * demand, and sets up the global locations for the LDS, GDS,
5345 * and GPUVM for FSA64 clients (CIK).
5346 * Returns 0 for success, errors for failure.
5348 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5352 if (rdev->gart.robj == NULL) {
5353 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5356 r = radeon_gart_table_vram_pin(rdev);
5359 /* Setup TLB control */
5360 WREG32(MC_VM_MX_L1_TLB_CNTL,
5363 ENABLE_L1_FRAGMENT_PROCESSING |
5364 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5365 ENABLE_ADVANCED_DRIVER_MODEL |
5366 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5367 /* Setup L2 cache */
5368 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5369 ENABLE_L2_FRAGMENT_PROCESSING |
5370 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5371 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5372 EFFECTIVE_L2_QUEUE_SIZE(7) |
5373 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5374 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5375 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5377 L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5378 /* setup context0 */
5379 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5380 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5381 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5382 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5383 (u32)(rdev->dummy_page.addr >> 12));
5384 WREG32(VM_CONTEXT0_CNTL2, 0);
5385 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5386 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5392 /* restore context1-15 */
5393 /* set vm size, must be a multiple of 4 */
5394 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5395 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5396 for (i = 1; i < 16; i++) {
5398 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5399 rdev->vm_manager.saved_table_addr[i]);
5401 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5402 rdev->vm_manager.saved_table_addr[i]);
5405 /* enable context1-15 */
5406 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5407 (u32)(rdev->dummy_page.addr >> 12));
5408 WREG32(VM_CONTEXT1_CNTL2, 4);
5409 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5410 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5411 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5412 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5413 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5414 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5415 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5416 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5417 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5418 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5419 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5420 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5421 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5422 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5424 if (rdev->family == CHIP_KAVERI) {
5425 u32 tmp = RREG32(CHUB_CONTROL);
5427 WREG32(CHUB_CONTROL, tmp);
5430 /* XXX SH_MEM regs */
5431 /* where to put LDS, scratch, GPUVM in FSA64 space */
5432 mutex_lock(&rdev->srbm_mutex);
5433 for (i = 0; i < 16; i++) {
5434 cik_srbm_select(rdev, 0, 0, 0, i);
5435 /* CP and shaders */
5436 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5437 WREG32(SH_MEM_APE1_BASE, 1);
5438 WREG32(SH_MEM_APE1_LIMIT, 0);
5439 WREG32(SH_MEM_BASES, 0);
5441 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5442 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5443 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5444 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5445 /* XXX SDMA RLC - todo */
5447 cik_srbm_select(rdev, 0, 0, 0, 0);
5448 mutex_unlock(&rdev->srbm_mutex);
5450 cik_pcie_gart_tlb_flush(rdev);
5451 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5452 (unsigned)(rdev->mc.gtt_size >> 20),
5453 (unsigned long long)rdev->gart.table_addr);
5454 rdev->gart.ready = true;
5459 * cik_pcie_gart_disable - gart disable
5461 * @rdev: radeon_device pointer
5463 * This disables all VM page table (CIK).
5465 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5469 for (i = 1; i < 16; ++i) {
5472 reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5474 reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5475 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5478 /* Disable all tables */
5479 WREG32(VM_CONTEXT0_CNTL, 0);
5480 WREG32(VM_CONTEXT1_CNTL, 0);
5481 /* Setup TLB control */
5482 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5483 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5484 /* Setup L2 cache */
5486 ENABLE_L2_FRAGMENT_PROCESSING |
5487 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5488 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5489 EFFECTIVE_L2_QUEUE_SIZE(7) |
5490 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5491 WREG32(VM_L2_CNTL2, 0);
5492 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5493 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5494 radeon_gart_table_vram_unpin(rdev);
5498 * cik_pcie_gart_fini - vm fini callback
5500 * @rdev: radeon_device pointer
5502 * Tears down the driver GART/VM setup (CIK).
5504 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5506 cik_pcie_gart_disable(rdev);
5507 radeon_gart_table_vram_free(rdev);
5508 radeon_gart_fini(rdev);
5513 * cik_ib_parse - vm ib_parse callback
5515 * @rdev: radeon_device pointer
5516 * @ib: indirect buffer pointer
5518 * CIK uses hw IB checking so this is a nop (CIK).
5520 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5527 * VMID 0 is the physical GPU addresses as used by the kernel.
5528 * VMIDs 1-15 are used for userspace clients and are handled
5529 * by the radeon vm/hsa code.
5532 * cik_vm_init - cik vm init callback
5534 * @rdev: radeon_device pointer
5536 * Inits cik specific vm parameters (number of VMs, base of vram for
5537 * VMIDs 1-15) (CIK).
5538 * Returns 0 for success.
5540 int cik_vm_init(struct radeon_device *rdev)
5544 * VMID 0 is reserved for System
5545 * radeon graphics/compute will use VMIDs 1-15
5547 rdev->vm_manager.nvm = 16;
5548 /* base offset of vram pages */
5549 if (rdev->flags & RADEON_IS_IGP) {
5550 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5552 rdev->vm_manager.vram_base_offset = tmp;
5554 rdev->vm_manager.vram_base_offset = 0;
5560 * cik_vm_fini - cik vm fini callback
5562 * @rdev: radeon_device pointer
5564 * Tear down any asic specific VM setup (CIK).
5566 void cik_vm_fini(struct radeon_device *rdev)
5571 * cik_vm_decode_fault - print human readable fault info
5573 * @rdev: radeon_device pointer
5574 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5575 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5577 * Print human readable fault information (CIK).
5579 static void cik_vm_decode_fault(struct radeon_device *rdev,
5580 u32 status, u32 addr, u32 mc_client)
5583 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5584 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5585 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5586 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5588 if (rdev->family == CHIP_HAWAII)
5589 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5591 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5593 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5594 protections, vmid, addr,
5595 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5596 block, mc_client, mc_id);
5600 * cik_vm_flush - cik vm flush using the CP
5602 * @rdev: radeon_device pointer
5604 * Update the page table base and flush the VM TLB
5605 * using the CP (CIK).
5607 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5608 unsigned vm_id, uint64_t pd_addr)
5610 int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5612 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5613 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5614 WRITE_DATA_DST_SEL(0)));
5616 radeon_ring_write(ring,
5617 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5619 radeon_ring_write(ring,
5620 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5622 radeon_ring_write(ring, 0);
5623 radeon_ring_write(ring, pd_addr >> 12);
5625 /* update SH_MEM_* regs */
5626 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5627 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5628 WRITE_DATA_DST_SEL(0)));
5629 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5630 radeon_ring_write(ring, 0);
5631 radeon_ring_write(ring, VMID(vm_id));
5633 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5634 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5635 WRITE_DATA_DST_SEL(0)));
5636 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5637 radeon_ring_write(ring, 0);
5639 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5640 radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5641 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5642 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5644 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5645 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5646 WRITE_DATA_DST_SEL(0)));
5647 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5648 radeon_ring_write(ring, 0);
5649 radeon_ring_write(ring, VMID(0));
5652 cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5654 /* bits 0-15 are the VM contexts0-15 */
5655 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5656 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5657 WRITE_DATA_DST_SEL(0)));
5658 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5659 radeon_ring_write(ring, 0);
5660 radeon_ring_write(ring, 1 << vm_id);
5662 /* wait for the invalidate to complete */
5663 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5664 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5665 WAIT_REG_MEM_FUNCTION(0) | /* always */
5666 WAIT_REG_MEM_ENGINE(0))); /* me */
5667 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5668 radeon_ring_write(ring, 0);
5669 radeon_ring_write(ring, 0); /* ref */
5670 radeon_ring_write(ring, 0); /* mask */
5671 radeon_ring_write(ring, 0x20); /* poll interval */
5673 /* compute doesn't have PFP */
5675 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5676 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5677 radeon_ring_write(ring, 0x0);
5683 * The RLC is a multi-purpose microengine that handles a
5684 * variety of functions, the most important of which is
5685 * the interrupt controller.
5687 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5690 u32 tmp = RREG32(CP_INT_CNTL_RING0);
5693 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5695 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5696 WREG32(CP_INT_CNTL_RING0, tmp);
5699 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5703 tmp = RREG32(RLC_LB_CNTL);
5705 tmp |= LOAD_BALANCE_ENABLE;
5707 tmp &= ~LOAD_BALANCE_ENABLE;
5708 WREG32(RLC_LB_CNTL, tmp);
5711 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5716 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5717 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5718 cik_select_se_sh(rdev, i, j);
5719 for (k = 0; k < rdev->usec_timeout; k++) {
5720 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5726 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5728 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5729 for (k = 0; k < rdev->usec_timeout; k++) {
5730 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5736 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5740 tmp = RREG32(RLC_CNTL);
5742 WREG32(RLC_CNTL, rlc);
5745 static u32 cik_halt_rlc(struct radeon_device *rdev)
5749 orig = data = RREG32(RLC_CNTL);
5751 if (data & RLC_ENABLE) {
5754 data &= ~RLC_ENABLE;
5755 WREG32(RLC_CNTL, data);
5757 for (i = 0; i < rdev->usec_timeout; i++) {
5758 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5763 cik_wait_for_rlc_serdes(rdev);
5769 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5773 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5774 WREG32(RLC_GPR_REG2, tmp);
5776 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5777 for (i = 0; i < rdev->usec_timeout; i++) {
5778 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5783 for (i = 0; i < rdev->usec_timeout; i++) {
5784 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5790 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5794 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5795 WREG32(RLC_GPR_REG2, tmp);
5799 * cik_rlc_stop - stop the RLC ME
5801 * @rdev: radeon_device pointer
5803 * Halt the RLC ME (MicroEngine) (CIK).
5805 static void cik_rlc_stop(struct radeon_device *rdev)
5807 WREG32(RLC_CNTL, 0);
5809 cik_enable_gui_idle_interrupt(rdev, false);
5811 cik_wait_for_rlc_serdes(rdev);
5815 * cik_rlc_start - start the RLC ME
5817 * @rdev: radeon_device pointer
5819 * Unhalt the RLC ME (MicroEngine) (CIK).
5821 static void cik_rlc_start(struct radeon_device *rdev)
5823 WREG32(RLC_CNTL, RLC_ENABLE);
5825 cik_enable_gui_idle_interrupt(rdev, true);
5831 * cik_rlc_resume - setup the RLC hw
5833 * @rdev: radeon_device pointer
5835 * Initialize the RLC registers, load the ucode,
5836 * and start the RLC (CIK).
5837 * Returns 0 for success, -EINVAL if the ucode is not available.
5839 static int cik_rlc_resume(struct radeon_device *rdev)
5849 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5850 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5858 WREG32(RLC_LB_CNTR_INIT, 0);
5859 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5861 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5862 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5863 WREG32(RLC_LB_PARAMS, 0x00600408);
5864 WREG32(RLC_LB_CNTL, 0x80000004);
5866 WREG32(RLC_MC_CNTL, 0);
5867 WREG32(RLC_UCODE_CNTL, 0);
5870 const struct rlc_firmware_header_v1_0 *hdr =
5871 (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5872 const __le32 *fw_data = (const __le32 *)
5873 (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5875 radeon_ucode_print_rlc_hdr(&hdr->header);
5877 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5878 WREG32(RLC_GPM_UCODE_ADDR, 0);
5879 for (i = 0; i < size; i++)
5880 WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5881 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5883 const __be32 *fw_data;
5885 switch (rdev->family) {
5889 size = BONAIRE_RLC_UCODE_SIZE;
5892 size = KV_RLC_UCODE_SIZE;
5895 size = KB_RLC_UCODE_SIZE;
5898 size = ML_RLC_UCODE_SIZE;
5902 fw_data = (const __be32 *)rdev->rlc_fw->data;
5903 WREG32(RLC_GPM_UCODE_ADDR, 0);
5904 for (i = 0; i < size; i++)
5905 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5906 WREG32(RLC_GPM_UCODE_ADDR, 0);
5909 /* XXX - find out what chips support lbpw */
5910 cik_enable_lbpw(rdev, false);
5912 if (rdev->family == CHIP_BONAIRE)
5913 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5915 cik_rlc_start(rdev);
5920 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5922 u32 data, orig, tmp, tmp2;
5924 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5926 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5927 cik_enable_gui_idle_interrupt(rdev, true);
5929 tmp = cik_halt_rlc(rdev);
5931 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5932 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5933 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5934 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5935 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5937 cik_update_rlc(rdev, tmp);
5939 data |= CGCG_EN | CGLS_EN;
5941 cik_enable_gui_idle_interrupt(rdev, false);
5943 RREG32(CB_CGTT_SCLK_CTRL);
5944 RREG32(CB_CGTT_SCLK_CTRL);
5945 RREG32(CB_CGTT_SCLK_CTRL);
5946 RREG32(CB_CGTT_SCLK_CTRL);
5948 data &= ~(CGCG_EN | CGLS_EN);
5952 WREG32(RLC_CGCG_CGLS_CTRL, data);
5956 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5958 u32 data, orig, tmp = 0;
5960 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5961 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5962 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5963 orig = data = RREG32(CP_MEM_SLP_CNTL);
5964 data |= CP_MEM_LS_EN;
5966 WREG32(CP_MEM_SLP_CNTL, data);
5970 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5974 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5976 tmp = cik_halt_rlc(rdev);
5978 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5979 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5980 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5981 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5982 WREG32(RLC_SERDES_WR_CTRL, data);
5984 cik_update_rlc(rdev, tmp);
5986 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5987 orig = data = RREG32(CGTS_SM_CTRL_REG);
5988 data &= ~SM_MODE_MASK;
5989 data |= SM_MODE(0x2);
5990 data |= SM_MODE_ENABLE;
5991 data &= ~CGTS_OVERRIDE;
5992 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5993 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5994 data &= ~CGTS_LS_OVERRIDE;
5995 data &= ~ON_MONITOR_ADD_MASK;
5996 data |= ON_MONITOR_ADD_EN;
5997 data |= ON_MONITOR_ADD(0x96);
5999 WREG32(CGTS_SM_CTRL_REG, data);
6002 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6005 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6007 data = RREG32(RLC_MEM_SLP_CNTL);
6008 if (data & RLC_MEM_LS_EN) {
6009 data &= ~RLC_MEM_LS_EN;
6010 WREG32(RLC_MEM_SLP_CNTL, data);
6013 data = RREG32(CP_MEM_SLP_CNTL);
6014 if (data & CP_MEM_LS_EN) {
6015 data &= ~CP_MEM_LS_EN;
6016 WREG32(CP_MEM_SLP_CNTL, data);
6019 orig = data = RREG32(CGTS_SM_CTRL_REG);
6020 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6022 WREG32(CGTS_SM_CTRL_REG, data);
6024 tmp = cik_halt_rlc(rdev);
6026 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6027 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6028 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6029 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6030 WREG32(RLC_SERDES_WR_CTRL, data);
6032 cik_update_rlc(rdev, tmp);
6036 static const u32 mc_cg_registers[] =
6049 static void cik_enable_mc_ls(struct radeon_device *rdev,
6055 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6056 orig = data = RREG32(mc_cg_registers[i]);
6057 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6058 data |= MC_LS_ENABLE;
6060 data &= ~MC_LS_ENABLE;
6062 WREG32(mc_cg_registers[i], data);
6066 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6072 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6073 orig = data = RREG32(mc_cg_registers[i]);
6074 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6075 data |= MC_CG_ENABLE;
6077 data &= ~MC_CG_ENABLE;
6079 WREG32(mc_cg_registers[i], data);
6083 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6088 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6089 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6090 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6092 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6095 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6097 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6100 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6104 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6109 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6110 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6113 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6115 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6118 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6120 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6123 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6125 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6128 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6132 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6137 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6138 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6140 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6142 orig = data = RREG32(UVD_CGC_CTRL);
6145 WREG32(UVD_CGC_CTRL, data);
6147 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6149 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6151 orig = data = RREG32(UVD_CGC_CTRL);
6154 WREG32(UVD_CGC_CTRL, data);
6158 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6163 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6165 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6166 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6167 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6169 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6170 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6173 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6176 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6181 orig = data = RREG32(HDP_HOST_PATH_CNTL);
6183 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6184 data &= ~CLOCK_GATING_DIS;
6186 data |= CLOCK_GATING_DIS;
6189 WREG32(HDP_HOST_PATH_CNTL, data);
6192 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6197 orig = data = RREG32(HDP_MEM_POWER_LS);
6199 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6200 data |= HDP_LS_ENABLE;
6202 data &= ~HDP_LS_ENABLE;
6205 WREG32(HDP_MEM_POWER_LS, data);
6208 void cik_update_cg(struct radeon_device *rdev,
6209 u32 block, bool enable)
6212 if (block & RADEON_CG_BLOCK_GFX) {
6213 cik_enable_gui_idle_interrupt(rdev, false);
6214 /* order matters! */
6216 cik_enable_mgcg(rdev, true);
6217 cik_enable_cgcg(rdev, true);
6219 cik_enable_cgcg(rdev, false);
6220 cik_enable_mgcg(rdev, false);
6222 cik_enable_gui_idle_interrupt(rdev, true);
6225 if (block & RADEON_CG_BLOCK_MC) {
6226 if (!(rdev->flags & RADEON_IS_IGP)) {
6227 cik_enable_mc_mgcg(rdev, enable);
6228 cik_enable_mc_ls(rdev, enable);
6232 if (block & RADEON_CG_BLOCK_SDMA) {
6233 cik_enable_sdma_mgcg(rdev, enable);
6234 cik_enable_sdma_mgls(rdev, enable);
6237 if (block & RADEON_CG_BLOCK_BIF) {
6238 cik_enable_bif_mgls(rdev, enable);
6241 if (block & RADEON_CG_BLOCK_UVD) {
6243 cik_enable_uvd_mgcg(rdev, enable);
6246 if (block & RADEON_CG_BLOCK_HDP) {
6247 cik_enable_hdp_mgcg(rdev, enable);
6248 cik_enable_hdp_ls(rdev, enable);
6251 if (block & RADEON_CG_BLOCK_VCE) {
6252 vce_v2_0_enable_mgcg(rdev, enable);
6256 static void cik_init_cg(struct radeon_device *rdev)
6259 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6262 si_init_uvd_internal_cg(rdev);
6264 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6265 RADEON_CG_BLOCK_SDMA |
6266 RADEON_CG_BLOCK_BIF |
6267 RADEON_CG_BLOCK_UVD |
6268 RADEON_CG_BLOCK_HDP), true);
6271 static void cik_fini_cg(struct radeon_device *rdev)
6273 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6274 RADEON_CG_BLOCK_SDMA |
6275 RADEON_CG_BLOCK_BIF |
6276 RADEON_CG_BLOCK_UVD |
6277 RADEON_CG_BLOCK_HDP), false);
6279 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6282 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6287 orig = data = RREG32(RLC_PG_CNTL);
6288 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6289 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6291 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6293 WREG32(RLC_PG_CNTL, data);
6296 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6301 orig = data = RREG32(RLC_PG_CNTL);
6302 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6303 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6305 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6307 WREG32(RLC_PG_CNTL, data);
6310 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6314 orig = data = RREG32(RLC_PG_CNTL);
6315 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6316 data &= ~DISABLE_CP_PG;
6318 data |= DISABLE_CP_PG;
6320 WREG32(RLC_PG_CNTL, data);
6323 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6327 orig = data = RREG32(RLC_PG_CNTL);
6328 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6329 data &= ~DISABLE_GDS_PG;
6331 data |= DISABLE_GDS_PG;
6333 WREG32(RLC_PG_CNTL, data);
6336 #define CP_ME_TABLE_SIZE 96
6337 #define CP_ME_TABLE_OFFSET 2048
6338 #define CP_MEC_TABLE_OFFSET 4096
6340 void cik_init_cp_pg_table(struct radeon_device *rdev)
6342 volatile u32 *dst_ptr;
6343 int me, i, max_me = 4;
6345 u32 table_offset, table_size;
6347 if (rdev->family == CHIP_KAVERI)
6350 if (rdev->rlc.cp_table_ptr == NULL)
6353 /* write the cp table buffer */
6354 dst_ptr = rdev->rlc.cp_table_ptr;
6355 for (me = 0; me < max_me; me++) {
6357 const __le32 *fw_data;
6358 const struct gfx_firmware_header_v1_0 *hdr;
6361 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6362 fw_data = (const __le32 *)
6363 (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6364 table_offset = le32_to_cpu(hdr->jt_offset);
6365 table_size = le32_to_cpu(hdr->jt_size);
6366 } else if (me == 1) {
6367 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6368 fw_data = (const __le32 *)
6369 (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6370 table_offset = le32_to_cpu(hdr->jt_offset);
6371 table_size = le32_to_cpu(hdr->jt_size);
6372 } else if (me == 2) {
6373 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6374 fw_data = (const __le32 *)
6375 (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6376 table_offset = le32_to_cpu(hdr->jt_offset);
6377 table_size = le32_to_cpu(hdr->jt_size);
6378 } else if (me == 3) {
6379 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6380 fw_data = (const __le32 *)
6381 (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6382 table_offset = le32_to_cpu(hdr->jt_offset);
6383 table_size = le32_to_cpu(hdr->jt_size);
6385 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6386 fw_data = (const __le32 *)
6387 (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6388 table_offset = le32_to_cpu(hdr->jt_offset);
6389 table_size = le32_to_cpu(hdr->jt_size);
6392 for (i = 0; i < table_size; i ++) {
6393 dst_ptr[bo_offset + i] =
6394 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6396 bo_offset += table_size;
6398 const __be32 *fw_data;
6399 table_size = CP_ME_TABLE_SIZE;
6402 fw_data = (const __be32 *)rdev->ce_fw->data;
6403 table_offset = CP_ME_TABLE_OFFSET;
6404 } else if (me == 1) {
6405 fw_data = (const __be32 *)rdev->pfp_fw->data;
6406 table_offset = CP_ME_TABLE_OFFSET;
6407 } else if (me == 2) {
6408 fw_data = (const __be32 *)rdev->me_fw->data;
6409 table_offset = CP_ME_TABLE_OFFSET;
6411 fw_data = (const __be32 *)rdev->mec_fw->data;
6412 table_offset = CP_MEC_TABLE_OFFSET;
6415 for (i = 0; i < table_size; i ++) {
6416 dst_ptr[bo_offset + i] =
6417 cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6419 bo_offset += table_size;
6424 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6429 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6430 orig = data = RREG32(RLC_PG_CNTL);
6431 data |= GFX_PG_ENABLE;
6433 WREG32(RLC_PG_CNTL, data);
6435 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6438 WREG32(RLC_AUTO_PG_CTRL, data);
6440 orig = data = RREG32(RLC_PG_CNTL);
6441 data &= ~GFX_PG_ENABLE;
6443 WREG32(RLC_PG_CNTL, data);
6445 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6446 data &= ~AUTO_PG_EN;
6448 WREG32(RLC_AUTO_PG_CTRL, data);
6450 data = RREG32(DB_RENDER_CONTROL);
6454 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6456 u32 mask = 0, tmp, tmp1;
6459 cik_select_se_sh(rdev, se, sh);
6460 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6461 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6462 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6469 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6474 return (~tmp) & mask;
6477 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6479 u32 i, j, k, active_cu_number = 0;
6480 u32 mask, counter, cu_bitmap;
6483 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6484 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6488 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6489 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6497 active_cu_number += counter;
6498 tmp |= (cu_bitmap << (i * 16 + j * 8));
6502 WREG32(RLC_PG_AO_CU_MASK, tmp);
6504 tmp = RREG32(RLC_MAX_PG_CU);
6505 tmp &= ~MAX_PU_CU_MASK;
6506 tmp |= MAX_PU_CU(active_cu_number);
6507 WREG32(RLC_MAX_PG_CU, tmp);
6510 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6515 orig = data = RREG32(RLC_PG_CNTL);
6516 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6517 data |= STATIC_PER_CU_PG_ENABLE;
6519 data &= ~STATIC_PER_CU_PG_ENABLE;
6521 WREG32(RLC_PG_CNTL, data);
6524 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6529 orig = data = RREG32(RLC_PG_CNTL);
6530 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6531 data |= DYN_PER_CU_PG_ENABLE;
6533 data &= ~DYN_PER_CU_PG_ENABLE;
6535 WREG32(RLC_PG_CNTL, data);
6538 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6539 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6541 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6546 if (rdev->rlc.cs_data) {
6547 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6548 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6549 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6550 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6552 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6553 for (i = 0; i < 3; i++)
6554 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6556 if (rdev->rlc.reg_list) {
6557 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6558 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6559 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6562 orig = data = RREG32(RLC_PG_CNTL);
6565 WREG32(RLC_PG_CNTL, data);
6567 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6568 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6570 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6571 data &= ~IDLE_POLL_COUNT_MASK;
6572 data |= IDLE_POLL_COUNT(0x60);
6573 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6576 WREG32(RLC_PG_DELAY, data);
6578 data = RREG32(RLC_PG_DELAY_2);
6581 WREG32(RLC_PG_DELAY_2, data);
6583 data = RREG32(RLC_AUTO_PG_CTRL);
6584 data &= ~GRBM_REG_SGIT_MASK;
6585 data |= GRBM_REG_SGIT(0x700);
6586 WREG32(RLC_AUTO_PG_CTRL, data);
6590 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6592 cik_enable_gfx_cgpg(rdev, enable);
6593 cik_enable_gfx_static_mgpg(rdev, enable);
6594 cik_enable_gfx_dynamic_mgpg(rdev, enable);
6597 u32 cik_get_csb_size(struct radeon_device *rdev)
6600 const struct cs_section_def *sect = NULL;
6601 const struct cs_extent_def *ext = NULL;
6603 if (rdev->rlc.cs_data == NULL)
6606 /* begin clear state */
6608 /* context control state */
6611 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6612 for (ext = sect->section; ext->extent != NULL; ++ext) {
6613 if (sect->id == SECT_CONTEXT)
6614 count += 2 + ext->reg_count;
6619 /* pa_sc_raster_config/pa_sc_raster_config1 */
6621 /* end clear state */
6629 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6632 const struct cs_section_def *sect = NULL;
6633 const struct cs_extent_def *ext = NULL;
6635 if (rdev->rlc.cs_data == NULL)
6640 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6641 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6643 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6644 buffer[count++] = cpu_to_le32(0x80000000);
6645 buffer[count++] = cpu_to_le32(0x80000000);
6647 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6648 for (ext = sect->section; ext->extent != NULL; ++ext) {
6649 if (sect->id == SECT_CONTEXT) {
6651 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6652 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6653 for (i = 0; i < ext->reg_count; i++)
6654 buffer[count++] = cpu_to_le32(ext->extent[i]);
6661 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6662 buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6663 switch (rdev->family) {
6665 buffer[count++] = cpu_to_le32(0x16000012);
6666 buffer[count++] = cpu_to_le32(0x00000000);
6669 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6670 buffer[count++] = cpu_to_le32(0x00000000);
6674 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6675 buffer[count++] = cpu_to_le32(0x00000000);
6678 buffer[count++] = cpu_to_le32(0x3a00161a);
6679 buffer[count++] = cpu_to_le32(0x0000002e);
6682 buffer[count++] = cpu_to_le32(0x00000000);
6683 buffer[count++] = cpu_to_le32(0x00000000);
6687 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6688 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6690 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6691 buffer[count++] = cpu_to_le32(0);
6694 static void cik_init_pg(struct radeon_device *rdev)
6696 if (rdev->pg_flags) {
6697 cik_enable_sck_slowdown_on_pu(rdev, true);
6698 cik_enable_sck_slowdown_on_pd(rdev, true);
6699 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6700 cik_init_gfx_cgpg(rdev);
6701 cik_enable_cp_pg(rdev, true);
6702 cik_enable_gds_pg(rdev, true);
6704 cik_init_ao_cu_mask(rdev);
6705 cik_update_gfx_pg(rdev, true);
6709 static void cik_fini_pg(struct radeon_device *rdev)
6711 if (rdev->pg_flags) {
6712 cik_update_gfx_pg(rdev, false);
6713 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6714 cik_enable_cp_pg(rdev, false);
6715 cik_enable_gds_pg(rdev, false);
6722 * Starting with r6xx, interrupts are handled via a ring buffer.
6723 * Ring buffers are areas of GPU accessible memory that the GPU
6724 * writes interrupt vectors into and the host reads vectors out of.
6725 * There is a rptr (read pointer) that determines where the
6726 * host is currently reading, and a wptr (write pointer)
6727 * which determines where the GPU has written. When the
6728 * pointers are equal, the ring is idle. When the GPU
6729 * writes vectors to the ring buffer, it increments the
6730 * wptr. When there is an interrupt, the host then starts
6731 * fetching commands and processing them until the pointers are
6732 * equal again at which point it updates the rptr.
6736 * cik_enable_interrupts - Enable the interrupt ring buffer
6738 * @rdev: radeon_device pointer
6740 * Enable the interrupt ring buffer (CIK).
6742 static void cik_enable_interrupts(struct radeon_device *rdev)
6744 u32 ih_cntl = RREG32(IH_CNTL);
6745 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6747 ih_cntl |= ENABLE_INTR;
6748 ih_rb_cntl |= IH_RB_ENABLE;
6749 WREG32(IH_CNTL, ih_cntl);
6750 WREG32(IH_RB_CNTL, ih_rb_cntl);
6751 rdev->ih.enabled = true;
6755 * cik_disable_interrupts - Disable the interrupt ring buffer
6757 * @rdev: radeon_device pointer
6759 * Disable the interrupt ring buffer (CIK).
6761 static void cik_disable_interrupts(struct radeon_device *rdev)
6763 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6764 u32 ih_cntl = RREG32(IH_CNTL);
6766 ih_rb_cntl &= ~IH_RB_ENABLE;
6767 ih_cntl &= ~ENABLE_INTR;
6768 WREG32(IH_RB_CNTL, ih_rb_cntl);
6769 WREG32(IH_CNTL, ih_cntl);
6770 /* set rptr, wptr to 0 */
6771 WREG32(IH_RB_RPTR, 0);
6772 WREG32(IH_RB_WPTR, 0);
6773 rdev->ih.enabled = false;
6778 * cik_disable_interrupt_state - Disable all interrupt sources
6780 * @rdev: radeon_device pointer
6782 * Clear all interrupt enable bits used by the driver (CIK).
6784 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6789 tmp = RREG32(CP_INT_CNTL_RING0) &
6790 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6791 WREG32(CP_INT_CNTL_RING0, tmp);
6793 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6794 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6795 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6796 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6797 /* compute queues */
6798 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6799 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6800 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6801 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6802 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6803 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6804 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6805 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6807 WREG32(GRBM_INT_CNTL, 0);
6809 WREG32(SRBM_INT_CNTL, 0);
6810 /* vline/vblank, etc. */
6811 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6812 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6813 if (rdev->num_crtc >= 4) {
6814 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6815 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6817 if (rdev->num_crtc >= 6) {
6818 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6819 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6822 if (rdev->num_crtc >= 2) {
6823 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6824 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6826 if (rdev->num_crtc >= 4) {
6827 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6828 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6830 if (rdev->num_crtc >= 6) {
6831 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6832 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6836 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6838 /* digital hotplug */
6839 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6840 WREG32(DC_HPD1_INT_CONTROL, tmp);
6841 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6842 WREG32(DC_HPD2_INT_CONTROL, tmp);
6843 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6844 WREG32(DC_HPD3_INT_CONTROL, tmp);
6845 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6846 WREG32(DC_HPD4_INT_CONTROL, tmp);
6847 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6848 WREG32(DC_HPD5_INT_CONTROL, tmp);
6849 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6850 WREG32(DC_HPD6_INT_CONTROL, tmp);
6855 * cik_irq_init - init and enable the interrupt ring
6857 * @rdev: radeon_device pointer
6859 * Allocate a ring buffer for the interrupt controller,
6860 * enable the RLC, disable interrupts, enable the IH
6861 * ring buffer and enable it (CIK).
6862 * Called at device load and reume.
6863 * Returns 0 for success, errors for failure.
6865 static int cik_irq_init(struct radeon_device *rdev)
6869 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6872 ret = r600_ih_ring_alloc(rdev);
6877 cik_disable_interrupts(rdev);
6880 ret = cik_rlc_resume(rdev);
6882 r600_ih_ring_fini(rdev);
6886 /* setup interrupt control */
6887 /* set dummy read address to dummy page address */
6888 WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6889 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6890 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6891 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6893 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6894 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6895 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6896 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6898 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6899 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6901 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6902 IH_WPTR_OVERFLOW_CLEAR |
6905 if (rdev->wb.enabled)
6906 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6908 /* set the writeback address whether it's enabled or not */
6909 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6910 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6912 WREG32(IH_RB_CNTL, ih_rb_cntl);
6914 /* set rptr, wptr to 0 */
6915 WREG32(IH_RB_RPTR, 0);
6916 WREG32(IH_RB_WPTR, 0);
6918 /* Default settings for IH_CNTL (disabled at first) */
6919 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6920 /* RPTR_REARM only works if msi's are enabled */
6921 if (rdev->msi_enabled)
6922 ih_cntl |= RPTR_REARM;
6923 WREG32(IH_CNTL, ih_cntl);
6925 /* force the active interrupt state to all disabled */
6926 cik_disable_interrupt_state(rdev);
6928 pci_set_master(rdev->pdev);
6931 cik_enable_interrupts(rdev);
6937 * cik_irq_set - enable/disable interrupt sources
6939 * @rdev: radeon_device pointer
6941 * Enable interrupt sources on the GPU (vblanks, hpd,
6943 * Returns 0 for success, errors for failure.
6945 int cik_irq_set(struct radeon_device *rdev)
6948 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6949 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6950 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6951 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6952 u32 grbm_int_cntl = 0;
6953 u32 dma_cntl, dma_cntl1;
6955 if (!rdev->irq.installed) {
6956 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6959 /* don't enable anything if the ih is disabled */
6960 if (!rdev->ih.enabled) {
6961 cik_disable_interrupts(rdev);
6962 /* force the active interrupt state to all disabled */
6963 cik_disable_interrupt_state(rdev);
6967 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6968 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6969 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6971 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6972 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6973 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6974 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6975 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6976 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6978 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6979 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6981 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6982 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6983 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6984 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6985 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6986 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6987 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6988 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6990 /* enable CP interrupts on all rings */
6991 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6992 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6993 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6995 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6996 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6997 DRM_DEBUG("si_irq_set: sw int cp1\n");
6998 if (ring->me == 1) {
6999 switch (ring->pipe) {
7001 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7004 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7007 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7010 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7013 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7016 } else if (ring->me == 2) {
7017 switch (ring->pipe) {
7019 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7022 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7025 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7028 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7031 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7035 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7038 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7039 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7040 DRM_DEBUG("si_irq_set: sw int cp2\n");
7041 if (ring->me == 1) {
7042 switch (ring->pipe) {
7044 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7047 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7050 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7053 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7056 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7059 } else if (ring->me == 2) {
7060 switch (ring->pipe) {
7062 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7065 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7068 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7071 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7074 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7078 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7082 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7083 DRM_DEBUG("cik_irq_set: sw int dma\n");
7084 dma_cntl |= TRAP_ENABLE;
7087 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7088 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7089 dma_cntl1 |= TRAP_ENABLE;
7092 if (rdev->irq.crtc_vblank_int[0] ||
7093 atomic_read(&rdev->irq.pflip[0])) {
7094 DRM_DEBUG("cik_irq_set: vblank 0\n");
7095 crtc1 |= VBLANK_INTERRUPT_MASK;
7097 if (rdev->irq.crtc_vblank_int[1] ||
7098 atomic_read(&rdev->irq.pflip[1])) {
7099 DRM_DEBUG("cik_irq_set: vblank 1\n");
7100 crtc2 |= VBLANK_INTERRUPT_MASK;
7102 if (rdev->irq.crtc_vblank_int[2] ||
7103 atomic_read(&rdev->irq.pflip[2])) {
7104 DRM_DEBUG("cik_irq_set: vblank 2\n");
7105 crtc3 |= VBLANK_INTERRUPT_MASK;
7107 if (rdev->irq.crtc_vblank_int[3] ||
7108 atomic_read(&rdev->irq.pflip[3])) {
7109 DRM_DEBUG("cik_irq_set: vblank 3\n");
7110 crtc4 |= VBLANK_INTERRUPT_MASK;
7112 if (rdev->irq.crtc_vblank_int[4] ||
7113 atomic_read(&rdev->irq.pflip[4])) {
7114 DRM_DEBUG("cik_irq_set: vblank 4\n");
7115 crtc5 |= VBLANK_INTERRUPT_MASK;
7117 if (rdev->irq.crtc_vblank_int[5] ||
7118 atomic_read(&rdev->irq.pflip[5])) {
7119 DRM_DEBUG("cik_irq_set: vblank 5\n");
7120 crtc6 |= VBLANK_INTERRUPT_MASK;
7122 if (rdev->irq.hpd[0]) {
7123 DRM_DEBUG("cik_irq_set: hpd 1\n");
7124 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7126 if (rdev->irq.hpd[1]) {
7127 DRM_DEBUG("cik_irq_set: hpd 2\n");
7128 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7130 if (rdev->irq.hpd[2]) {
7131 DRM_DEBUG("cik_irq_set: hpd 3\n");
7132 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7134 if (rdev->irq.hpd[3]) {
7135 DRM_DEBUG("cik_irq_set: hpd 4\n");
7136 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7138 if (rdev->irq.hpd[4]) {
7139 DRM_DEBUG("cik_irq_set: hpd 5\n");
7140 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7142 if (rdev->irq.hpd[5]) {
7143 DRM_DEBUG("cik_irq_set: hpd 6\n");
7144 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7147 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7149 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7150 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7152 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7153 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7154 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7155 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7156 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7157 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7158 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7159 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7161 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7163 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7164 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7165 if (rdev->num_crtc >= 4) {
7166 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7167 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7169 if (rdev->num_crtc >= 6) {
7170 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7171 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7174 if (rdev->num_crtc >= 2) {
7175 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7176 GRPH_PFLIP_INT_MASK);
7177 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7178 GRPH_PFLIP_INT_MASK);
7180 if (rdev->num_crtc >= 4) {
7181 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7182 GRPH_PFLIP_INT_MASK);
7183 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7184 GRPH_PFLIP_INT_MASK);
7186 if (rdev->num_crtc >= 6) {
7187 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7188 GRPH_PFLIP_INT_MASK);
7189 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7190 GRPH_PFLIP_INT_MASK);
7193 WREG32(DC_HPD1_INT_CONTROL, hpd1);
7194 WREG32(DC_HPD2_INT_CONTROL, hpd2);
7195 WREG32(DC_HPD3_INT_CONTROL, hpd3);
7196 WREG32(DC_HPD4_INT_CONTROL, hpd4);
7197 WREG32(DC_HPD5_INT_CONTROL, hpd5);
7198 WREG32(DC_HPD6_INT_CONTROL, hpd6);
7201 RREG32(SRBM_STATUS);
7207 * cik_irq_ack - ack interrupt sources
7209 * @rdev: radeon_device pointer
7211 * Ack interrupt sources on the GPU (vblanks, hpd,
7212 * etc.) (CIK). Certain interrupts sources are sw
7213 * generated and do not require an explicit ack.
7215 static inline void cik_irq_ack(struct radeon_device *rdev)
7219 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7220 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7221 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7222 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7223 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7224 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7225 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7227 rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7228 EVERGREEN_CRTC0_REGISTER_OFFSET);
7229 rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7230 EVERGREEN_CRTC1_REGISTER_OFFSET);
7231 if (rdev->num_crtc >= 4) {
7232 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7233 EVERGREEN_CRTC2_REGISTER_OFFSET);
7234 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7235 EVERGREEN_CRTC3_REGISTER_OFFSET);
7237 if (rdev->num_crtc >= 6) {
7238 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7239 EVERGREEN_CRTC4_REGISTER_OFFSET);
7240 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7241 EVERGREEN_CRTC5_REGISTER_OFFSET);
7244 if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7245 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7246 GRPH_PFLIP_INT_CLEAR);
7247 if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7248 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7249 GRPH_PFLIP_INT_CLEAR);
7250 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7251 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7252 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7253 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7254 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7255 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7256 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7257 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7259 if (rdev->num_crtc >= 4) {
7260 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7261 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7262 GRPH_PFLIP_INT_CLEAR);
7263 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7264 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7265 GRPH_PFLIP_INT_CLEAR);
7266 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7267 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7268 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7269 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7270 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7271 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7272 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7273 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7276 if (rdev->num_crtc >= 6) {
7277 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7278 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7279 GRPH_PFLIP_INT_CLEAR);
7280 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7281 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7282 GRPH_PFLIP_INT_CLEAR);
7283 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7284 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7285 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7286 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7287 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7288 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7289 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7290 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7293 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7294 tmp = RREG32(DC_HPD1_INT_CONTROL);
7295 tmp |= DC_HPDx_INT_ACK;
7296 WREG32(DC_HPD1_INT_CONTROL, tmp);
7298 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7299 tmp = RREG32(DC_HPD2_INT_CONTROL);
7300 tmp |= DC_HPDx_INT_ACK;
7301 WREG32(DC_HPD2_INT_CONTROL, tmp);
7303 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7304 tmp = RREG32(DC_HPD3_INT_CONTROL);
7305 tmp |= DC_HPDx_INT_ACK;
7306 WREG32(DC_HPD3_INT_CONTROL, tmp);
7308 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7309 tmp = RREG32(DC_HPD4_INT_CONTROL);
7310 tmp |= DC_HPDx_INT_ACK;
7311 WREG32(DC_HPD4_INT_CONTROL, tmp);
7313 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7314 tmp = RREG32(DC_HPD5_INT_CONTROL);
7315 tmp |= DC_HPDx_INT_ACK;
7316 WREG32(DC_HPD5_INT_CONTROL, tmp);
7318 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7319 tmp = RREG32(DC_HPD6_INT_CONTROL);
7320 tmp |= DC_HPDx_INT_ACK;
7321 WREG32(DC_HPD6_INT_CONTROL, tmp);
7323 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7324 tmp = RREG32(DC_HPD1_INT_CONTROL);
7325 tmp |= DC_HPDx_RX_INT_ACK;
7326 WREG32(DC_HPD1_INT_CONTROL, tmp);
7328 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7329 tmp = RREG32(DC_HPD2_INT_CONTROL);
7330 tmp |= DC_HPDx_RX_INT_ACK;
7331 WREG32(DC_HPD2_INT_CONTROL, tmp);
7333 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7334 tmp = RREG32(DC_HPD3_INT_CONTROL);
7335 tmp |= DC_HPDx_RX_INT_ACK;
7336 WREG32(DC_HPD3_INT_CONTROL, tmp);
7338 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7339 tmp = RREG32(DC_HPD4_INT_CONTROL);
7340 tmp |= DC_HPDx_RX_INT_ACK;
7341 WREG32(DC_HPD4_INT_CONTROL, tmp);
7343 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7344 tmp = RREG32(DC_HPD5_INT_CONTROL);
7345 tmp |= DC_HPDx_RX_INT_ACK;
7346 WREG32(DC_HPD5_INT_CONTROL, tmp);
7348 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7349 tmp = RREG32(DC_HPD6_INT_CONTROL);
7350 tmp |= DC_HPDx_RX_INT_ACK;
7351 WREG32(DC_HPD6_INT_CONTROL, tmp);
7356 * cik_irq_disable - disable interrupts
7358 * @rdev: radeon_device pointer
7360 * Disable interrupts on the hw (CIK).
7362 static void cik_irq_disable(struct radeon_device *rdev)
7364 cik_disable_interrupts(rdev);
7365 /* Wait and acknowledge irq */
7368 cik_disable_interrupt_state(rdev);
7372 * cik_irq_disable - disable interrupts for suspend
7374 * @rdev: radeon_device pointer
7376 * Disable interrupts and stop the RLC (CIK).
7379 static void cik_irq_suspend(struct radeon_device *rdev)
7381 cik_irq_disable(rdev);
7386 * cik_irq_fini - tear down interrupt support
7388 * @rdev: radeon_device pointer
7390 * Disable interrupts on the hw and free the IH ring
7392 * Used for driver unload.
7394 static void cik_irq_fini(struct radeon_device *rdev)
7396 cik_irq_suspend(rdev);
7397 r600_ih_ring_fini(rdev);
7401 * cik_get_ih_wptr - get the IH ring buffer wptr
7403 * @rdev: radeon_device pointer
7405 * Get the IH ring buffer wptr from either the register
7406 * or the writeback memory buffer (CIK). Also check for
7407 * ring buffer overflow and deal with it.
7408 * Used by cik_irq_process().
7409 * Returns the value of the wptr.
7411 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7415 if (rdev->wb.enabled)
7416 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7418 wptr = RREG32(IH_RB_WPTR);
7420 if (wptr & RB_OVERFLOW) {
7421 wptr &= ~RB_OVERFLOW;
7422 /* When a ring buffer overflow happen start parsing interrupt
7423 * from the last not overwritten vector (wptr + 16). Hopefully
7424 * this should allow us to catchup.
7426 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7427 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7428 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7429 tmp = RREG32(IH_RB_CNTL);
7430 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7431 WREG32(IH_RB_CNTL, tmp);
7433 return (wptr & rdev->ih.ptr_mask);
7437 * Each IV ring entry is 128 bits:
7438 * [7:0] - interrupt source id
7440 * [59:32] - interrupt source data
7441 * [63:60] - reserved
7444 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7445 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7446 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7447 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7448 * PIPE_ID - ME0 0=3D
7449 * - ME1&2 compute dispatcher (4 pipes each)
7451 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7452 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7453 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7456 * [127:96] - reserved
7459 * cik_irq_process - interrupt handler
7461 * @rdev: radeon_device pointer
7463 * Interrupt hander (CIK). Walk the IH ring,
7464 * ack interrupts and schedule work to handle
7466 * Returns irq process return code.
7468 int cik_irq_process(struct radeon_device *rdev)
7470 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7471 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7474 u32 src_id, src_data, ring_id;
7475 u8 me_id, pipe_id, queue_id;
7477 bool queue_hotplug = false;
7478 bool queue_dp = false;
7479 bool queue_reset = false;
7480 u32 addr, status, mc_client;
7481 bool queue_thermal = false;
7483 if (!rdev->ih.enabled || rdev->shutdown)
7486 wptr = cik_get_ih_wptr(rdev);
7489 /* is somebody else already processing irqs? */
7490 if (atomic_xchg(&rdev->ih.lock, 1))
7493 rptr = rdev->ih.rptr;
7494 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7496 /* Order reading of wptr vs. reading of IH ring data */
7499 /* display interrupts */
7502 while (rptr != wptr) {
7503 /* wptr/rptr are in bytes! */
7504 ring_index = rptr / 4;
7506 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7507 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7508 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7511 case 1: /* D1 vblank/vline */
7513 case 0: /* D1 vblank */
7514 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7515 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7517 if (rdev->irq.crtc_vblank_int[0]) {
7518 drm_handle_vblank(rdev->ddev, 0);
7519 rdev->pm.vblank_sync = true;
7520 wake_up(&rdev->irq.vblank_queue);
7522 if (atomic_read(&rdev->irq.pflip[0]))
7523 radeon_crtc_handle_vblank(rdev, 0);
7524 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7525 DRM_DEBUG("IH: D1 vblank\n");
7528 case 1: /* D1 vline */
7529 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7530 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7532 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7533 DRM_DEBUG("IH: D1 vline\n");
7537 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7541 case 2: /* D2 vblank/vline */
7543 case 0: /* D2 vblank */
7544 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7545 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7547 if (rdev->irq.crtc_vblank_int[1]) {
7548 drm_handle_vblank(rdev->ddev, 1);
7549 rdev->pm.vblank_sync = true;
7550 wake_up(&rdev->irq.vblank_queue);
7552 if (atomic_read(&rdev->irq.pflip[1]))
7553 radeon_crtc_handle_vblank(rdev, 1);
7554 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7555 DRM_DEBUG("IH: D2 vblank\n");
7558 case 1: /* D2 vline */
7559 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7560 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7562 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7563 DRM_DEBUG("IH: D2 vline\n");
7567 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7571 case 3: /* D3 vblank/vline */
7573 case 0: /* D3 vblank */
7574 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7575 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7577 if (rdev->irq.crtc_vblank_int[2]) {
7578 drm_handle_vblank(rdev->ddev, 2);
7579 rdev->pm.vblank_sync = true;
7580 wake_up(&rdev->irq.vblank_queue);
7582 if (atomic_read(&rdev->irq.pflip[2]))
7583 radeon_crtc_handle_vblank(rdev, 2);
7584 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7585 DRM_DEBUG("IH: D3 vblank\n");
7588 case 1: /* D3 vline */
7589 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7590 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7592 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7593 DRM_DEBUG("IH: D3 vline\n");
7597 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7601 case 4: /* D4 vblank/vline */
7603 case 0: /* D4 vblank */
7604 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7605 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7607 if (rdev->irq.crtc_vblank_int[3]) {
7608 drm_handle_vblank(rdev->ddev, 3);
7609 rdev->pm.vblank_sync = true;
7610 wake_up(&rdev->irq.vblank_queue);
7612 if (atomic_read(&rdev->irq.pflip[3]))
7613 radeon_crtc_handle_vblank(rdev, 3);
7614 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7615 DRM_DEBUG("IH: D4 vblank\n");
7618 case 1: /* D4 vline */
7619 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7620 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7622 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7623 DRM_DEBUG("IH: D4 vline\n");
7627 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7631 case 5: /* D5 vblank/vline */
7633 case 0: /* D5 vblank */
7634 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7635 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7637 if (rdev->irq.crtc_vblank_int[4]) {
7638 drm_handle_vblank(rdev->ddev, 4);
7639 rdev->pm.vblank_sync = true;
7640 wake_up(&rdev->irq.vblank_queue);
7642 if (atomic_read(&rdev->irq.pflip[4]))
7643 radeon_crtc_handle_vblank(rdev, 4);
7644 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7645 DRM_DEBUG("IH: D5 vblank\n");
7648 case 1: /* D5 vline */
7649 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7650 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7652 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7653 DRM_DEBUG("IH: D5 vline\n");
7657 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7661 case 6: /* D6 vblank/vline */
7663 case 0: /* D6 vblank */
7664 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7665 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7667 if (rdev->irq.crtc_vblank_int[5]) {
7668 drm_handle_vblank(rdev->ddev, 5);
7669 rdev->pm.vblank_sync = true;
7670 wake_up(&rdev->irq.vblank_queue);
7672 if (atomic_read(&rdev->irq.pflip[5]))
7673 radeon_crtc_handle_vblank(rdev, 5);
7674 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7675 DRM_DEBUG("IH: D6 vblank\n");
7678 case 1: /* D6 vline */
7679 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7680 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7682 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7683 DRM_DEBUG("IH: D6 vline\n");
7687 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7691 case 8: /* D1 page flip */
7692 case 10: /* D2 page flip */
7693 case 12: /* D3 page flip */
7694 case 14: /* D4 page flip */
7695 case 16: /* D5 page flip */
7696 case 18: /* D6 page flip */
7697 DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7698 if (radeon_use_pflipirq > 0)
7699 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7701 case 42: /* HPD hotplug */
7704 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7705 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7707 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7708 queue_hotplug = true;
7709 DRM_DEBUG("IH: HPD1\n");
7713 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7714 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7716 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7717 queue_hotplug = true;
7718 DRM_DEBUG("IH: HPD2\n");
7722 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7723 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7725 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7726 queue_hotplug = true;
7727 DRM_DEBUG("IH: HPD3\n");
7731 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7732 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7734 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7735 queue_hotplug = true;
7736 DRM_DEBUG("IH: HPD4\n");
7740 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7741 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7743 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7744 queue_hotplug = true;
7745 DRM_DEBUG("IH: HPD5\n");
7749 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7750 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7752 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7753 queue_hotplug = true;
7754 DRM_DEBUG("IH: HPD6\n");
7758 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7759 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7761 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7763 DRM_DEBUG("IH: HPD_RX 1\n");
7767 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7768 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7770 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7772 DRM_DEBUG("IH: HPD_RX 2\n");
7776 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7777 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7779 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7781 DRM_DEBUG("IH: HPD_RX 3\n");
7785 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7786 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7788 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7790 DRM_DEBUG("IH: HPD_RX 4\n");
7794 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7795 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7797 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7799 DRM_DEBUG("IH: HPD_RX 5\n");
7803 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7804 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7806 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7808 DRM_DEBUG("IH: HPD_RX 6\n");
7812 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7817 DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7818 WREG32(SRBM_INT_ACK, 0x1);
7821 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7822 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7826 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7827 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7828 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7829 /* reset addr and status */
7830 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7831 if (addr == 0x0 && status == 0x0)
7833 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7834 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
7836 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7838 cik_vm_decode_fault(rdev, status, addr, mc_client);
7841 DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7844 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7847 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7850 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7854 case 176: /* GFX RB CP_INT */
7855 case 177: /* GFX IB CP_INT */
7856 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7858 case 181: /* CP EOP event */
7859 DRM_DEBUG("IH: CP EOP\n");
7860 /* XXX check the bitfield order! */
7861 me_id = (ring_id & 0x60) >> 5;
7862 pipe_id = (ring_id & 0x18) >> 3;
7863 queue_id = (ring_id & 0x7) >> 0;
7866 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7870 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7871 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7872 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7873 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7877 case 184: /* CP Privileged reg access */
7878 DRM_ERROR("Illegal register access in command stream\n");
7879 /* XXX check the bitfield order! */
7880 me_id = (ring_id & 0x60) >> 5;
7881 pipe_id = (ring_id & 0x18) >> 3;
7882 queue_id = (ring_id & 0x7) >> 0;
7885 /* This results in a full GPU reset, but all we need to do is soft
7886 * reset the CP for gfx
7900 case 185: /* CP Privileged inst */
7901 DRM_ERROR("Illegal instruction in command stream\n");
7902 /* XXX check the bitfield order! */
7903 me_id = (ring_id & 0x60) >> 5;
7904 pipe_id = (ring_id & 0x18) >> 3;
7905 queue_id = (ring_id & 0x7) >> 0;
7908 /* This results in a full GPU reset, but all we need to do is soft
7909 * reset the CP for gfx
7923 case 224: /* SDMA trap event */
7924 /* XXX check the bitfield order! */
7925 me_id = (ring_id & 0x3) >> 0;
7926 queue_id = (ring_id & 0xc) >> 2;
7927 DRM_DEBUG("IH: SDMA trap\n");
7932 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7945 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7957 case 230: /* thermal low to high */
7958 DRM_DEBUG("IH: thermal low to high\n");
7959 rdev->pm.dpm.thermal.high_to_low = false;
7960 queue_thermal = true;
7962 case 231: /* thermal high to low */
7963 DRM_DEBUG("IH: thermal high to low\n");
7964 rdev->pm.dpm.thermal.high_to_low = true;
7965 queue_thermal = true;
7967 case 233: /* GUI IDLE */
7968 DRM_DEBUG("IH: GUI idle\n");
7970 case 241: /* SDMA Privileged inst */
7971 case 247: /* SDMA Privileged inst */
7972 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7973 /* XXX check the bitfield order! */
7974 me_id = (ring_id & 0x3) >> 0;
7975 queue_id = (ring_id & 0xc) >> 2;
8010 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8014 /* wptr/rptr are in bytes! */
8016 rptr &= rdev->ih.ptr_mask;
8017 WREG32(IH_RB_RPTR, rptr);
8020 schedule_work(&rdev->dp_work);
8022 schedule_delayed_work(&rdev->hotplug_work, 0);
8024 rdev->needs_reset = true;
8025 wake_up_all(&rdev->fence_queue);
8028 schedule_work(&rdev->pm.dpm.thermal.work);
8029 rdev->ih.rptr = rptr;
8030 atomic_set(&rdev->ih.lock, 0);
8032 /* make sure wptr hasn't changed while processing */
8033 wptr = cik_get_ih_wptr(rdev);
8041 * startup/shutdown callbacks
8043 static void cik_uvd_init(struct radeon_device *rdev)
8050 r = radeon_uvd_init(rdev);
8052 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8054 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8055 * to early fails cik_uvd_start() and thus nothing happens
8056 * there. So it is pointless to try to go through that code
8057 * hence why we disable uvd here.
8062 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8063 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8066 static void cik_uvd_start(struct radeon_device *rdev)
8073 r = radeon_uvd_resume(rdev);
8075 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8078 r = uvd_v4_2_resume(rdev);
8080 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8083 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8085 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8091 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8094 static void cik_uvd_resume(struct radeon_device *rdev)
8096 struct radeon_ring *ring;
8099 if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8102 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8103 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8105 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8108 r = uvd_v1_0_init(rdev);
8110 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8115 static void cik_vce_init(struct radeon_device *rdev)
8122 r = radeon_vce_init(rdev);
8124 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8126 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8127 * to early fails cik_vce_start() and thus nothing happens
8128 * there. So it is pointless to try to go through that code
8129 * hence why we disable vce here.
8134 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8135 r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8136 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8137 r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8140 static void cik_vce_start(struct radeon_device *rdev)
8147 r = radeon_vce_resume(rdev);
8149 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8152 r = vce_v2_0_resume(rdev);
8154 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8157 r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8159 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8162 r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8164 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8170 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8171 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8174 static void cik_vce_resume(struct radeon_device *rdev)
8176 struct radeon_ring *ring;
8179 if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8182 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8183 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8185 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8188 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8189 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8191 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8194 r = vce_v1_0_init(rdev);
8196 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8202 * cik_startup - program the asic to a functional state
8204 * @rdev: radeon_device pointer
8206 * Programs the asic to a functional state (CIK).
8207 * Called by cik_init() and cik_resume().
8208 * Returns 0 for success, error for failure.
8210 static int cik_startup(struct radeon_device *rdev)
8212 struct radeon_ring *ring;
8216 /* enable pcie gen2/3 link */
8217 cik_pcie_gen3_enable(rdev);
8219 cik_program_aspm(rdev);
8221 /* scratch needs to be initialized before MC */
8222 r = r600_vram_scratch_init(rdev);
8226 cik_mc_program(rdev);
8228 if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8229 r = ci_mc_load_microcode(rdev);
8231 DRM_ERROR("Failed to load MC firmware!\n");
8236 r = cik_pcie_gart_enable(rdev);
8241 /* allocate rlc buffers */
8242 if (rdev->flags & RADEON_IS_IGP) {
8243 if (rdev->family == CHIP_KAVERI) {
8244 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8245 rdev->rlc.reg_list_size =
8246 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8248 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8249 rdev->rlc.reg_list_size =
8250 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8253 rdev->rlc.cs_data = ci_cs_data;
8254 rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8255 rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8256 r = sumo_rlc_init(rdev);
8258 DRM_ERROR("Failed to init rlc BOs!\n");
8262 /* allocate wb buffer */
8263 r = radeon_wb_init(rdev);
8267 /* allocate mec buffers */
8268 r = cik_mec_init(rdev);
8270 DRM_ERROR("Failed to init MEC BOs!\n");
8274 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8276 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8280 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8282 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8286 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8288 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8292 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8294 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8298 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8300 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8304 cik_uvd_start(rdev);
8305 cik_vce_start(rdev);
8308 if (!rdev->irq.installed) {
8309 r = radeon_irq_kms_init(rdev);
8314 r = cik_irq_init(rdev);
8316 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8317 radeon_irq_kms_fini(rdev);
8322 if (rdev->family == CHIP_HAWAII) {
8324 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8326 nop = RADEON_CP_PACKET2;
8328 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8331 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8332 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8337 /* set up the compute queues */
8338 /* type-2 packets are deprecated on MEC, use type-3 instead */
8339 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8340 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8344 ring->me = 1; /* first MEC */
8345 ring->pipe = 0; /* first pipe */
8346 ring->queue = 0; /* first queue */
8347 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8349 /* type-2 packets are deprecated on MEC, use type-3 instead */
8350 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8351 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8355 /* dGPU only have 1 MEC */
8356 ring->me = 1; /* first MEC */
8357 ring->pipe = 0; /* first pipe */
8358 ring->queue = 1; /* second queue */
8359 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8361 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8362 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8363 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8367 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8368 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8369 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8373 r = cik_cp_resume(rdev);
8377 r = cik_sdma_resume(rdev);
8381 cik_uvd_resume(rdev);
8382 cik_vce_resume(rdev);
8384 r = radeon_ib_pool_init(rdev);
8386 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8390 r = radeon_vm_manager_init(rdev);
8392 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8396 r = radeon_audio_init(rdev);
8404 * cik_resume - resume the asic to a functional state
8406 * @rdev: radeon_device pointer
8408 * Programs the asic to a functional state (CIK).
8410 * Returns 0 for success, error for failure.
8412 int cik_resume(struct radeon_device *rdev)
8417 atom_asic_init(rdev->mode_info.atom_context);
8419 /* init golden registers */
8420 cik_init_golden_registers(rdev);
8422 if (rdev->pm.pm_method == PM_METHOD_DPM)
8423 radeon_pm_resume(rdev);
8425 rdev->accel_working = true;
8426 r = cik_startup(rdev);
8428 DRM_ERROR("cik startup failed on resume\n");
8429 rdev->accel_working = false;
8438 * cik_suspend - suspend the asic
8440 * @rdev: radeon_device pointer
8442 * Bring the chip into a state suitable for suspend (CIK).
8443 * Called at suspend.
8444 * Returns 0 for success.
8446 int cik_suspend(struct radeon_device *rdev)
8448 radeon_pm_suspend(rdev);
8449 radeon_audio_fini(rdev);
8450 radeon_vm_manager_fini(rdev);
8451 cik_cp_enable(rdev, false);
8452 cik_sdma_enable(rdev, false);
8453 if (rdev->has_uvd) {
8454 uvd_v1_0_fini(rdev);
8455 radeon_uvd_suspend(rdev);
8458 radeon_vce_suspend(rdev);
8461 cik_irq_suspend(rdev);
8462 radeon_wb_disable(rdev);
8463 cik_pcie_gart_disable(rdev);
8467 /* Plan is to move initialization in that function and use
8468 * helper function so that radeon_device_init pretty much
8469 * do nothing more than calling asic specific function. This
8470 * should also allow to remove a bunch of callback function
8474 * cik_init - asic specific driver and hw init
8476 * @rdev: radeon_device pointer
8478 * Setup asic specific driver variables and program the hw
8479 * to a functional state (CIK).
8480 * Called at driver startup.
8481 * Returns 0 for success, errors for failure.
8483 int cik_init(struct radeon_device *rdev)
8485 struct radeon_ring *ring;
8489 if (!radeon_get_bios(rdev)) {
8490 if (ASIC_IS_AVIVO(rdev))
8493 /* Must be an ATOMBIOS */
8494 if (!rdev->is_atom_bios) {
8495 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8498 r = radeon_atombios_init(rdev);
8502 /* Post card if necessary */
8503 if (!radeon_card_posted(rdev)) {
8505 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8508 DRM_INFO("GPU not posted. posting now...\n");
8509 atom_asic_init(rdev->mode_info.atom_context);
8511 /* init golden registers */
8512 cik_init_golden_registers(rdev);
8513 /* Initialize scratch registers */
8514 cik_scratch_init(rdev);
8515 /* Initialize surface registers */
8516 radeon_surface_init(rdev);
8517 /* Initialize clocks */
8518 radeon_get_clock_info(rdev->ddev);
8521 r = radeon_fence_driver_init(rdev);
8525 /* initialize memory controller */
8526 r = cik_mc_init(rdev);
8529 /* Memory manager */
8530 r = radeon_bo_init(rdev);
8534 if (rdev->flags & RADEON_IS_IGP) {
8535 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8536 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8537 r = cik_init_microcode(rdev);
8539 DRM_ERROR("Failed to load firmware!\n");
8544 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8545 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8547 r = cik_init_microcode(rdev);
8549 DRM_ERROR("Failed to load firmware!\n");
8555 /* Initialize power management */
8556 radeon_pm_init(rdev);
8558 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8559 ring->ring_obj = NULL;
8560 r600_ring_init(rdev, ring, 1024 * 1024);
8562 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8563 ring->ring_obj = NULL;
8564 r600_ring_init(rdev, ring, 1024 * 1024);
8565 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8569 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8570 ring->ring_obj = NULL;
8571 r600_ring_init(rdev, ring, 1024 * 1024);
8572 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8576 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8577 ring->ring_obj = NULL;
8578 r600_ring_init(rdev, ring, 256 * 1024);
8580 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8581 ring->ring_obj = NULL;
8582 r600_ring_init(rdev, ring, 256 * 1024);
8587 rdev->ih.ring_obj = NULL;
8588 r600_ih_ring_init(rdev, 64 * 1024);
8590 r = r600_pcie_gart_init(rdev);
8594 rdev->accel_working = true;
8595 r = cik_startup(rdev);
8597 dev_err(rdev->dev, "disabling GPU acceleration\n");
8599 cik_sdma_fini(rdev);
8601 sumo_rlc_fini(rdev);
8603 radeon_wb_fini(rdev);
8604 radeon_ib_pool_fini(rdev);
8605 radeon_vm_manager_fini(rdev);
8606 radeon_irq_kms_fini(rdev);
8607 cik_pcie_gart_fini(rdev);
8608 rdev->accel_working = false;
8611 /* Don't start up if the MC ucode is missing.
8612 * The default clocks and voltages before the MC ucode
8613 * is loaded are not suffient for advanced operations.
8615 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8616 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8624 * cik_fini - asic specific driver and hw fini
8626 * @rdev: radeon_device pointer
8628 * Tear down the asic specific driver variables and program the hw
8629 * to an idle state (CIK).
8630 * Called at driver unload.
8632 void cik_fini(struct radeon_device *rdev)
8634 radeon_pm_fini(rdev);
8636 cik_sdma_fini(rdev);
8640 sumo_rlc_fini(rdev);
8642 radeon_wb_fini(rdev);
8643 radeon_vm_manager_fini(rdev);
8644 radeon_ib_pool_fini(rdev);
8645 radeon_irq_kms_fini(rdev);
8646 uvd_v1_0_fini(rdev);
8647 radeon_uvd_fini(rdev);
8648 radeon_vce_fini(rdev);
8649 cik_pcie_gart_fini(rdev);
8650 r600_vram_scratch_fini(rdev);
8651 radeon_gem_fini(rdev);
8652 radeon_fence_driver_fini(rdev);
8653 radeon_bo_fini(rdev);
8654 radeon_atombios_fini(rdev);
8659 void dce8_program_fmt(struct drm_encoder *encoder)
8661 struct drm_device *dev = encoder->dev;
8662 struct radeon_device *rdev = dev->dev_private;
8663 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8664 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8665 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8668 enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8671 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8672 bpc = radeon_get_monitor_bpc(connector);
8673 dither = radeon_connector->dither;
8676 /* LVDS/eDP FMT is set up by atom */
8677 if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8680 /* not needed for analog */
8681 if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8682 (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8690 if (dither == RADEON_FMT_DITHER_ENABLE)
8691 /* XXX sort out optimal dither settings */
8692 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8693 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8695 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8698 if (dither == RADEON_FMT_DITHER_ENABLE)
8699 /* XXX sort out optimal dither settings */
8700 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8701 FMT_RGB_RANDOM_ENABLE |
8702 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8704 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8707 if (dither == RADEON_FMT_DITHER_ENABLE)
8708 /* XXX sort out optimal dither settings */
8709 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8710 FMT_RGB_RANDOM_ENABLE |
8711 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8713 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8720 WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8723 /* display watermark setup */
8725 * dce8_line_buffer_adjust - Set up the line buffer
8727 * @rdev: radeon_device pointer
8728 * @radeon_crtc: the selected display controller
8729 * @mode: the current display mode on the selected display
8732 * Setup up the line buffer allocation for
8733 * the selected display controller (CIK).
8734 * Returns the line buffer size in pixels.
8736 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8737 struct radeon_crtc *radeon_crtc,
8738 struct drm_display_mode *mode)
8740 u32 tmp, buffer_alloc, i;
8741 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8744 * There are 6 line buffers, one for each display controllers.
8745 * There are 3 partitions per LB. Select the number of partitions
8746 * to enable based on the display width. For display widths larger
8747 * than 4096, you need use to use 2 display controllers and combine
8748 * them using the stereo blender.
8750 if (radeon_crtc->base.enabled && mode) {
8751 if (mode->crtc_hdisplay < 1920) {
8754 } else if (mode->crtc_hdisplay < 2560) {
8757 } else if (mode->crtc_hdisplay < 4096) {
8759 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8761 DRM_DEBUG_KMS("Mode too big for LB!\n");
8763 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8770 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8771 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8773 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8774 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8775 for (i = 0; i < rdev->usec_timeout; i++) {
8776 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8777 DMIF_BUFFERS_ALLOCATED_COMPLETED)
8782 if (radeon_crtc->base.enabled && mode) {
8794 /* controller not enabled, so no lb used */
8799 * cik_get_number_of_dram_channels - get the number of dram channels
8801 * @rdev: radeon_device pointer
8803 * Look up the number of video ram channels (CIK).
8804 * Used for display watermark bandwidth calculations
8805 * Returns the number of dram channels
8807 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8809 u32 tmp = RREG32(MC_SHARED_CHMAP);
8811 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8834 struct dce8_wm_params {
8835 u32 dram_channels; /* number of dram channels */
8836 u32 yclk; /* bandwidth per dram data pin in kHz */
8837 u32 sclk; /* engine clock in kHz */
8838 u32 disp_clk; /* display clock in kHz */
8839 u32 src_width; /* viewport width */
8840 u32 active_time; /* active display time in ns */
8841 u32 blank_time; /* blank time in ns */
8842 bool interlaced; /* mode is interlaced */
8843 fixed20_12 vsc; /* vertical scale ratio */
8844 u32 num_heads; /* number of active crtcs */
8845 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8846 u32 lb_size; /* line buffer allocated to pipe */
8847 u32 vtaps; /* vertical scaler taps */
8851 * dce8_dram_bandwidth - get the dram bandwidth
8853 * @wm: watermark calculation data
8855 * Calculate the raw dram bandwidth (CIK).
8856 * Used for display watermark bandwidth calculations
8857 * Returns the dram bandwidth in MBytes/s
8859 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8861 /* Calculate raw DRAM Bandwidth */
8862 fixed20_12 dram_efficiency; /* 0.7 */
8863 fixed20_12 yclk, dram_channels, bandwidth;
8866 a.full = dfixed_const(1000);
8867 yclk.full = dfixed_const(wm->yclk);
8868 yclk.full = dfixed_div(yclk, a);
8869 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8870 a.full = dfixed_const(10);
8871 dram_efficiency.full = dfixed_const(7);
8872 dram_efficiency.full = dfixed_div(dram_efficiency, a);
8873 bandwidth.full = dfixed_mul(dram_channels, yclk);
8874 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8876 return dfixed_trunc(bandwidth);
8880 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8882 * @wm: watermark calculation data
8884 * Calculate the dram bandwidth used for display (CIK).
8885 * Used for display watermark bandwidth calculations
8886 * Returns the dram bandwidth for display in MBytes/s
8888 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8890 /* Calculate DRAM Bandwidth and the part allocated to display. */
8891 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8892 fixed20_12 yclk, dram_channels, bandwidth;
8895 a.full = dfixed_const(1000);
8896 yclk.full = dfixed_const(wm->yclk);
8897 yclk.full = dfixed_div(yclk, a);
8898 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8899 a.full = dfixed_const(10);
8900 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8901 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8902 bandwidth.full = dfixed_mul(dram_channels, yclk);
8903 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8905 return dfixed_trunc(bandwidth);
8909 * dce8_data_return_bandwidth - get the data return bandwidth
8911 * @wm: watermark calculation data
8913 * Calculate the data return bandwidth used for display (CIK).
8914 * Used for display watermark bandwidth calculations
8915 * Returns the data return bandwidth in MBytes/s
8917 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8919 /* Calculate the display Data return Bandwidth */
8920 fixed20_12 return_efficiency; /* 0.8 */
8921 fixed20_12 sclk, bandwidth;
8924 a.full = dfixed_const(1000);
8925 sclk.full = dfixed_const(wm->sclk);
8926 sclk.full = dfixed_div(sclk, a);
8927 a.full = dfixed_const(10);
8928 return_efficiency.full = dfixed_const(8);
8929 return_efficiency.full = dfixed_div(return_efficiency, a);
8930 a.full = dfixed_const(32);
8931 bandwidth.full = dfixed_mul(a, sclk);
8932 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8934 return dfixed_trunc(bandwidth);
8938 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8940 * @wm: watermark calculation data
8942 * Calculate the dmif bandwidth used for display (CIK).
8943 * Used for display watermark bandwidth calculations
8944 * Returns the dmif bandwidth in MBytes/s
8946 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8948 /* Calculate the DMIF Request Bandwidth */
8949 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8950 fixed20_12 disp_clk, bandwidth;
8953 a.full = dfixed_const(1000);
8954 disp_clk.full = dfixed_const(wm->disp_clk);
8955 disp_clk.full = dfixed_div(disp_clk, a);
8956 a.full = dfixed_const(32);
8957 b.full = dfixed_mul(a, disp_clk);
8959 a.full = dfixed_const(10);
8960 disp_clk_request_efficiency.full = dfixed_const(8);
8961 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8963 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8965 return dfixed_trunc(bandwidth);
8969 * dce8_available_bandwidth - get the min available bandwidth
8971 * @wm: watermark calculation data
8973 * Calculate the min available bandwidth used for display (CIK).
8974 * Used for display watermark bandwidth calculations
8975 * Returns the min available bandwidth in MBytes/s
8977 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8979 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8980 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8981 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8982 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8984 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8988 * dce8_average_bandwidth - get the average available bandwidth
8990 * @wm: watermark calculation data
8992 * Calculate the average available bandwidth used for display (CIK).
8993 * Used for display watermark bandwidth calculations
8994 * Returns the average available bandwidth in MBytes/s
8996 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8998 /* Calculate the display mode Average Bandwidth
8999 * DisplayMode should contain the source and destination dimensions,
9003 fixed20_12 line_time;
9004 fixed20_12 src_width;
9005 fixed20_12 bandwidth;
9008 a.full = dfixed_const(1000);
9009 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9010 line_time.full = dfixed_div(line_time, a);
9011 bpp.full = dfixed_const(wm->bytes_per_pixel);
9012 src_width.full = dfixed_const(wm->src_width);
9013 bandwidth.full = dfixed_mul(src_width, bpp);
9014 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9015 bandwidth.full = dfixed_div(bandwidth, line_time);
9017 return dfixed_trunc(bandwidth);
9021 * dce8_latency_watermark - get the latency watermark
9023 * @wm: watermark calculation data
9025 * Calculate the latency watermark (CIK).
9026 * Used for display watermark bandwidth calculations
9027 * Returns the latency watermark in ns
9029 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9031 /* First calculate the latency in ns */
9032 u32 mc_latency = 2000; /* 2000 ns. */
9033 u32 available_bandwidth = dce8_available_bandwidth(wm);
9034 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9035 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9036 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9037 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9038 (wm->num_heads * cursor_line_pair_return_time);
9039 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9040 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9041 u32 tmp, dmif_size = 12288;
9044 if (wm->num_heads == 0)
9047 a.full = dfixed_const(2);
9048 b.full = dfixed_const(1);
9049 if ((wm->vsc.full > a.full) ||
9050 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9052 ((wm->vsc.full >= a.full) && wm->interlaced))
9053 max_src_lines_per_dst_line = 4;
9055 max_src_lines_per_dst_line = 2;
9057 a.full = dfixed_const(available_bandwidth);
9058 b.full = dfixed_const(wm->num_heads);
9059 a.full = dfixed_div(a, b);
9060 tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9061 tmp = min(dfixed_trunc(a), tmp);
9063 lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9065 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9066 b.full = dfixed_const(1000);
9067 c.full = dfixed_const(lb_fill_bw);
9068 b.full = dfixed_div(c, b);
9069 a.full = dfixed_div(a, b);
9070 line_fill_time = dfixed_trunc(a);
9072 if (line_fill_time < wm->active_time)
9075 return latency + (line_fill_time - wm->active_time);
9080 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9081 * average and available dram bandwidth
9083 * @wm: watermark calculation data
9085 * Check if the display average bandwidth fits in the display
9086 * dram bandwidth (CIK).
9087 * Used for display watermark bandwidth calculations
9088 * Returns true if the display fits, false if not.
9090 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9092 if (dce8_average_bandwidth(wm) <=
9093 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9100 * dce8_average_bandwidth_vs_available_bandwidth - check
9101 * average and available bandwidth
9103 * @wm: watermark calculation data
9105 * Check if the display average bandwidth fits in the display
9106 * available bandwidth (CIK).
9107 * Used for display watermark bandwidth calculations
9108 * Returns true if the display fits, false if not.
9110 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9112 if (dce8_average_bandwidth(wm) <=
9113 (dce8_available_bandwidth(wm) / wm->num_heads))
9120 * dce8_check_latency_hiding - check latency hiding
9122 * @wm: watermark calculation data
9124 * Check latency hiding (CIK).
9125 * Used for display watermark bandwidth calculations
9126 * Returns true if the display fits, false if not.
9128 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9130 u32 lb_partitions = wm->lb_size / wm->src_width;
9131 u32 line_time = wm->active_time + wm->blank_time;
9132 u32 latency_tolerant_lines;
9136 a.full = dfixed_const(1);
9137 if (wm->vsc.full > a.full)
9138 latency_tolerant_lines = 1;
9140 if (lb_partitions <= (wm->vtaps + 1))
9141 latency_tolerant_lines = 1;
9143 latency_tolerant_lines = 2;
9146 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9148 if (dce8_latency_watermark(wm) <= latency_hiding)
9155 * dce8_program_watermarks - program display watermarks
9157 * @rdev: radeon_device pointer
9158 * @radeon_crtc: the selected display controller
9159 * @lb_size: line buffer size
9160 * @num_heads: number of display controllers in use
9162 * Calculate and program the display watermarks for the
9163 * selected display controller (CIK).
9165 static void dce8_program_watermarks(struct radeon_device *rdev,
9166 struct radeon_crtc *radeon_crtc,
9167 u32 lb_size, u32 num_heads)
9169 struct drm_display_mode *mode = &radeon_crtc->base.mode;
9170 struct dce8_wm_params wm_low, wm_high;
9173 u32 latency_watermark_a = 0, latency_watermark_b = 0;
9176 if (radeon_crtc->base.enabled && num_heads && mode) {
9177 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9179 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9181 line_time = min(line_time, (u32)65535);
9183 /* watermark for high clocks */
9184 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9185 rdev->pm.dpm_enabled) {
9187 radeon_dpm_get_mclk(rdev, false) * 10;
9189 radeon_dpm_get_sclk(rdev, false) * 10;
9191 wm_high.yclk = rdev->pm.current_mclk * 10;
9192 wm_high.sclk = rdev->pm.current_sclk * 10;
9195 wm_high.disp_clk = mode->clock;
9196 wm_high.src_width = mode->crtc_hdisplay;
9197 wm_high.active_time = active_time;
9198 wm_high.blank_time = line_time - wm_high.active_time;
9199 wm_high.interlaced = false;
9200 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9201 wm_high.interlaced = true;
9202 wm_high.vsc = radeon_crtc->vsc;
9204 if (radeon_crtc->rmx_type != RMX_OFF)
9206 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9207 wm_high.lb_size = lb_size;
9208 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9209 wm_high.num_heads = num_heads;
9211 /* set for high clocks */
9212 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9214 /* possibly force display priority to high */
9215 /* should really do this at mode validation time... */
9216 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9217 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9218 !dce8_check_latency_hiding(&wm_high) ||
9219 (rdev->disp_priority == 2)) {
9220 DRM_DEBUG_KMS("force priority to high\n");
9223 /* watermark for low clocks */
9224 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9225 rdev->pm.dpm_enabled) {
9227 radeon_dpm_get_mclk(rdev, true) * 10;
9229 radeon_dpm_get_sclk(rdev, true) * 10;
9231 wm_low.yclk = rdev->pm.current_mclk * 10;
9232 wm_low.sclk = rdev->pm.current_sclk * 10;
9235 wm_low.disp_clk = mode->clock;
9236 wm_low.src_width = mode->crtc_hdisplay;
9237 wm_low.active_time = active_time;
9238 wm_low.blank_time = line_time - wm_low.active_time;
9239 wm_low.interlaced = false;
9240 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9241 wm_low.interlaced = true;
9242 wm_low.vsc = radeon_crtc->vsc;
9244 if (radeon_crtc->rmx_type != RMX_OFF)
9246 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9247 wm_low.lb_size = lb_size;
9248 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9249 wm_low.num_heads = num_heads;
9251 /* set for low clocks */
9252 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9254 /* possibly force display priority to high */
9255 /* should really do this at mode validation time... */
9256 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9257 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9258 !dce8_check_latency_hiding(&wm_low) ||
9259 (rdev->disp_priority == 2)) {
9260 DRM_DEBUG_KMS("force priority to high\n");
9263 /* Save number of lines the linebuffer leads before the scanout */
9264 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9268 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9270 tmp &= ~LATENCY_WATERMARK_MASK(3);
9271 tmp |= LATENCY_WATERMARK_MASK(1);
9272 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9273 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9274 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9275 LATENCY_HIGH_WATERMARK(line_time)));
9277 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9278 tmp &= ~LATENCY_WATERMARK_MASK(3);
9279 tmp |= LATENCY_WATERMARK_MASK(2);
9280 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9281 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9282 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9283 LATENCY_HIGH_WATERMARK(line_time)));
9284 /* restore original selection */
9285 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9287 /* save values for DPM */
9288 radeon_crtc->line_time = line_time;
9289 radeon_crtc->wm_high = latency_watermark_a;
9290 radeon_crtc->wm_low = latency_watermark_b;
9294 * dce8_bandwidth_update - program display watermarks
9296 * @rdev: radeon_device pointer
9298 * Calculate and program the display watermarks and line
9299 * buffer allocation (CIK).
9301 void dce8_bandwidth_update(struct radeon_device *rdev)
9303 struct drm_display_mode *mode = NULL;
9304 u32 num_heads = 0, lb_size;
9307 if (!rdev->mode_info.mode_config_initialized)
9310 radeon_update_display_priority(rdev);
9312 for (i = 0; i < rdev->num_crtc; i++) {
9313 if (rdev->mode_info.crtcs[i]->base.enabled)
9316 for (i = 0; i < rdev->num_crtc; i++) {
9317 mode = &rdev->mode_info.crtcs[i]->base.mode;
9318 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9319 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9324 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9326 * @rdev: radeon_device pointer
9328 * Fetches a GPU clock counter snapshot (SI).
9329 * Returns the 64 bit clock counter snapshot.
9331 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9335 mutex_lock(&rdev->gpu_clock_mutex);
9336 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9337 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9338 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9339 mutex_unlock(&rdev->gpu_clock_mutex);
9343 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9344 u32 cntl_reg, u32 status_reg)
9347 struct atom_clock_dividers dividers;
9350 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9351 clock, false, ÷rs);
9355 tmp = RREG32_SMC(cntl_reg);
9356 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9357 tmp |= dividers.post_divider;
9358 WREG32_SMC(cntl_reg, tmp);
9360 for (i = 0; i < 100; i++) {
9361 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9371 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9375 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9379 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9383 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9386 struct atom_clock_dividers dividers;
9389 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9390 ecclk, false, ÷rs);
9394 for (i = 0; i < 100; i++) {
9395 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9402 tmp = RREG32_SMC(CG_ECLK_CNTL);
9403 tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9404 tmp |= dividers.post_divider;
9405 WREG32_SMC(CG_ECLK_CNTL, tmp);
9407 for (i = 0; i < 100; i++) {
9408 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9418 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9420 struct pci_dev *root = rdev->pdev->bus->self;
9421 enum pci_bus_speed speed_cap;
9422 int bridge_pos, gpu_pos;
9423 u32 speed_cntl, current_data_rate;
9427 if (pci_is_root_bus(rdev->pdev->bus))
9430 if (radeon_pcie_gen2 == 0)
9433 if (rdev->flags & RADEON_IS_IGP)
9436 if (!(rdev->flags & RADEON_IS_PCIE))
9439 speed_cap = pcie_get_speed_cap(root);
9440 if (speed_cap == PCI_SPEED_UNKNOWN)
9443 if ((speed_cap != PCIE_SPEED_8_0GT) &&
9444 (speed_cap != PCIE_SPEED_5_0GT))
9447 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9448 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9449 LC_CURRENT_DATA_RATE_SHIFT;
9450 if (speed_cap == PCIE_SPEED_8_0GT) {
9451 if (current_data_rate == 2) {
9452 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9455 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9456 } else if (speed_cap == PCIE_SPEED_5_0GT) {
9457 if (current_data_rate == 1) {
9458 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9461 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9464 bridge_pos = pci_pcie_cap(root);
9468 gpu_pos = pci_pcie_cap(rdev->pdev);
9472 if (speed_cap == PCIE_SPEED_8_0GT) {
9473 /* re-try equalization if gen3 is not already enabled */
9474 if (current_data_rate != 2) {
9475 u16 bridge_cfg, gpu_cfg;
9476 u16 bridge_cfg2, gpu_cfg2;
9477 u32 max_lw, current_lw, tmp;
9479 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9480 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9482 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9483 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9485 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9486 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9488 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9489 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9490 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9492 if (current_lw < max_lw) {
9493 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9494 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9495 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9496 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9497 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9498 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9502 for (i = 0; i < 10; i++) {
9504 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9505 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9508 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9509 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9511 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9512 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9514 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9515 tmp |= LC_SET_QUIESCE;
9516 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9518 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9520 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9525 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9526 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9527 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9528 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9530 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9531 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9532 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9533 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9536 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9537 tmp16 &= ~((1 << 4) | (7 << 9));
9538 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9539 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9541 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9542 tmp16 &= ~((1 << 4) | (7 << 9));
9543 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9544 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9546 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9547 tmp &= ~LC_SET_QUIESCE;
9548 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9553 /* set the link speed */
9554 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9555 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9556 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9558 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9560 if (speed_cap == PCIE_SPEED_8_0GT)
9561 tmp16 |= 3; /* gen3 */
9562 else if (speed_cap == PCIE_SPEED_5_0GT)
9563 tmp16 |= 2; /* gen2 */
9565 tmp16 |= 1; /* gen1 */
9566 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9568 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9569 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9570 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9572 for (i = 0; i < rdev->usec_timeout; i++) {
9573 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9574 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9580 static void cik_program_aspm(struct radeon_device *rdev)
9583 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9584 bool disable_clkreq = false;
9586 if (radeon_aspm == 0)
9589 /* XXX double check IGPs */
9590 if (rdev->flags & RADEON_IS_IGP)
9593 if (!(rdev->flags & RADEON_IS_PCIE))
9596 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9597 data &= ~LC_XMIT_N_FTS_MASK;
9598 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9600 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9602 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9603 data |= LC_GO_TO_RECOVERY;
9605 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9607 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9608 data |= P_IGNORE_EDB_ERR;
9610 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9612 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9613 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9614 data |= LC_PMI_TO_L1_DIS;
9616 data |= LC_L0S_INACTIVITY(7);
9619 data |= LC_L1_INACTIVITY(7);
9620 data &= ~LC_PMI_TO_L1_DIS;
9622 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9624 if (!disable_plloff_in_l1) {
9625 bool clk_req_support;
9627 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9628 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9629 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9631 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9633 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9634 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9635 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9637 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9639 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9640 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9641 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9643 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9645 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9646 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9647 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9649 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9651 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9652 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9653 data |= LC_DYN_LANES_PWR_STATE(3);
9655 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9657 if (!disable_clkreq &&
9658 !pci_is_root_bus(rdev->pdev->bus)) {
9659 struct pci_dev *root = rdev->pdev->bus->self;
9662 clk_req_support = false;
9663 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9664 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9665 clk_req_support = true;
9667 clk_req_support = false;
9670 if (clk_req_support) {
9671 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9672 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9674 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9676 orig = data = RREG32_SMC(THM_CLK_CNTL);
9677 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9678 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9680 WREG32_SMC(THM_CLK_CNTL, data);
9682 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9683 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9684 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9686 WREG32_SMC(MISC_CLK_CTRL, data);
9688 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9689 data &= ~BCLK_AS_XCLK;
9691 WREG32_SMC(CG_CLKPIN_CNTL, data);
9693 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9694 data &= ~FORCE_BIF_REFCLK_EN;
9696 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9698 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9699 data &= ~MPLL_CLKOUT_SEL_MASK;
9700 data |= MPLL_CLKOUT_SEL(4);
9702 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9707 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9710 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9711 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9713 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9716 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9717 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9718 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9719 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9720 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9721 data &= ~LC_L0S_INACTIVITY_MASK;
9723 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);