2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
40 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
41 extern void r600_ih_ring_fini(struct radeon_device *rdev);
42 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
43 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
44 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
45 extern void sumo_rlc_fini(struct radeon_device *rdev);
46 extern int sumo_rlc_init(struct radeon_device *rdev);
47 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
48 extern void si_rlc_reset(struct radeon_device *rdev);
49 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
50 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
51 extern int cik_sdma_resume(struct radeon_device *rdev);
52 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
53 extern void cik_sdma_fini(struct radeon_device *rdev);
54 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
55 static void cik_rlc_stop(struct radeon_device *rdev);
56 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
57 static void cik_program_aspm(struct radeon_device *rdev);
58 static void cik_init_pg(struct radeon_device *rdev);
59 static void cik_init_cg(struct radeon_device *rdev);
60 static void cik_fini_pg(struct radeon_device *rdev);
61 static void cik_fini_cg(struct radeon_device *rdev);
62 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
66 * cik_get_allowed_info_register - fetch the register for the info ioctl
68 * @rdev: radeon_device pointer
69 * @reg: register offset in bytes
70 * @val: register value
72 * Returns 0 for success or -EINVAL for an invalid register
75 int cik_get_allowed_info_register(struct radeon_device *rdev,
87 case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
88 case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
99 * Indirect registers accessor
101 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
106 spin_lock_irqsave(&rdev->didt_idx_lock, flags);
107 WREG32(CIK_DIDT_IND_INDEX, (reg));
108 r = RREG32(CIK_DIDT_IND_DATA);
109 spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
113 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
117 spin_lock_irqsave(&rdev->didt_idx_lock, flags);
118 WREG32(CIK_DIDT_IND_INDEX, (reg));
119 WREG32(CIK_DIDT_IND_DATA, (v));
120 spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
123 /* get temperature in millidegrees */
124 int ci_get_temp(struct radeon_device *rdev)
129 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
135 actual_temp = temp & 0x1ff;
137 actual_temp = actual_temp * 1000;
142 /* get temperature in millidegrees */
143 int kv_get_temp(struct radeon_device *rdev)
148 temp = RREG32_SMC(0xC0300E0C);
151 actual_temp = (temp / 8) - 49;
155 actual_temp = actual_temp * 1000;
161 * Indirect registers accessor
163 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
168 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
169 WREG32(PCIE_INDEX, reg);
170 (void)RREG32(PCIE_INDEX);
171 r = RREG32(PCIE_DATA);
172 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
176 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
180 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
181 WREG32(PCIE_INDEX, reg);
182 (void)RREG32(PCIE_INDEX);
183 WREG32(PCIE_DATA, v);
184 (void)RREG32(PCIE_DATA);
185 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
188 static const u32 spectre_rlc_save_restore_register_list[] =
190 (0x0e00 << 16) | (0xc12c >> 2),
192 (0x0e00 << 16) | (0xc140 >> 2),
194 (0x0e00 << 16) | (0xc150 >> 2),
196 (0x0e00 << 16) | (0xc15c >> 2),
198 (0x0e00 << 16) | (0xc168 >> 2),
200 (0x0e00 << 16) | (0xc170 >> 2),
202 (0x0e00 << 16) | (0xc178 >> 2),
204 (0x0e00 << 16) | (0xc204 >> 2),
206 (0x0e00 << 16) | (0xc2b4 >> 2),
208 (0x0e00 << 16) | (0xc2b8 >> 2),
210 (0x0e00 << 16) | (0xc2bc >> 2),
212 (0x0e00 << 16) | (0xc2c0 >> 2),
214 (0x0e00 << 16) | (0x8228 >> 2),
216 (0x0e00 << 16) | (0x829c >> 2),
218 (0x0e00 << 16) | (0x869c >> 2),
220 (0x0600 << 16) | (0x98f4 >> 2),
222 (0x0e00 << 16) | (0x98f8 >> 2),
224 (0x0e00 << 16) | (0x9900 >> 2),
226 (0x0e00 << 16) | (0xc260 >> 2),
228 (0x0e00 << 16) | (0x90e8 >> 2),
230 (0x0e00 << 16) | (0x3c000 >> 2),
232 (0x0e00 << 16) | (0x3c00c >> 2),
234 (0x0e00 << 16) | (0x8c1c >> 2),
236 (0x0e00 << 16) | (0x9700 >> 2),
238 (0x0e00 << 16) | (0xcd20 >> 2),
240 (0x4e00 << 16) | (0xcd20 >> 2),
242 (0x5e00 << 16) | (0xcd20 >> 2),
244 (0x6e00 << 16) | (0xcd20 >> 2),
246 (0x7e00 << 16) | (0xcd20 >> 2),
248 (0x8e00 << 16) | (0xcd20 >> 2),
250 (0x9e00 << 16) | (0xcd20 >> 2),
252 (0xae00 << 16) | (0xcd20 >> 2),
254 (0xbe00 << 16) | (0xcd20 >> 2),
256 (0x0e00 << 16) | (0x89bc >> 2),
258 (0x0e00 << 16) | (0x8900 >> 2),
261 (0x0e00 << 16) | (0xc130 >> 2),
263 (0x0e00 << 16) | (0xc134 >> 2),
265 (0x0e00 << 16) | (0xc1fc >> 2),
267 (0x0e00 << 16) | (0xc208 >> 2),
269 (0x0e00 << 16) | (0xc264 >> 2),
271 (0x0e00 << 16) | (0xc268 >> 2),
273 (0x0e00 << 16) | (0xc26c >> 2),
275 (0x0e00 << 16) | (0xc270 >> 2),
277 (0x0e00 << 16) | (0xc274 >> 2),
279 (0x0e00 << 16) | (0xc278 >> 2),
281 (0x0e00 << 16) | (0xc27c >> 2),
283 (0x0e00 << 16) | (0xc280 >> 2),
285 (0x0e00 << 16) | (0xc284 >> 2),
287 (0x0e00 << 16) | (0xc288 >> 2),
289 (0x0e00 << 16) | (0xc28c >> 2),
291 (0x0e00 << 16) | (0xc290 >> 2),
293 (0x0e00 << 16) | (0xc294 >> 2),
295 (0x0e00 << 16) | (0xc298 >> 2),
297 (0x0e00 << 16) | (0xc29c >> 2),
299 (0x0e00 << 16) | (0xc2a0 >> 2),
301 (0x0e00 << 16) | (0xc2a4 >> 2),
303 (0x0e00 << 16) | (0xc2a8 >> 2),
305 (0x0e00 << 16) | (0xc2ac >> 2),
307 (0x0e00 << 16) | (0xc2b0 >> 2),
309 (0x0e00 << 16) | (0x301d0 >> 2),
311 (0x0e00 << 16) | (0x30238 >> 2),
313 (0x0e00 << 16) | (0x30250 >> 2),
315 (0x0e00 << 16) | (0x30254 >> 2),
317 (0x0e00 << 16) | (0x30258 >> 2),
319 (0x0e00 << 16) | (0x3025c >> 2),
321 (0x4e00 << 16) | (0xc900 >> 2),
323 (0x5e00 << 16) | (0xc900 >> 2),
325 (0x6e00 << 16) | (0xc900 >> 2),
327 (0x7e00 << 16) | (0xc900 >> 2),
329 (0x8e00 << 16) | (0xc900 >> 2),
331 (0x9e00 << 16) | (0xc900 >> 2),
333 (0xae00 << 16) | (0xc900 >> 2),
335 (0xbe00 << 16) | (0xc900 >> 2),
337 (0x4e00 << 16) | (0xc904 >> 2),
339 (0x5e00 << 16) | (0xc904 >> 2),
341 (0x6e00 << 16) | (0xc904 >> 2),
343 (0x7e00 << 16) | (0xc904 >> 2),
345 (0x8e00 << 16) | (0xc904 >> 2),
347 (0x9e00 << 16) | (0xc904 >> 2),
349 (0xae00 << 16) | (0xc904 >> 2),
351 (0xbe00 << 16) | (0xc904 >> 2),
353 (0x4e00 << 16) | (0xc908 >> 2),
355 (0x5e00 << 16) | (0xc908 >> 2),
357 (0x6e00 << 16) | (0xc908 >> 2),
359 (0x7e00 << 16) | (0xc908 >> 2),
361 (0x8e00 << 16) | (0xc908 >> 2),
363 (0x9e00 << 16) | (0xc908 >> 2),
365 (0xae00 << 16) | (0xc908 >> 2),
367 (0xbe00 << 16) | (0xc908 >> 2),
369 (0x4e00 << 16) | (0xc90c >> 2),
371 (0x5e00 << 16) | (0xc90c >> 2),
373 (0x6e00 << 16) | (0xc90c >> 2),
375 (0x7e00 << 16) | (0xc90c >> 2),
377 (0x8e00 << 16) | (0xc90c >> 2),
379 (0x9e00 << 16) | (0xc90c >> 2),
381 (0xae00 << 16) | (0xc90c >> 2),
383 (0xbe00 << 16) | (0xc90c >> 2),
385 (0x4e00 << 16) | (0xc910 >> 2),
387 (0x5e00 << 16) | (0xc910 >> 2),
389 (0x6e00 << 16) | (0xc910 >> 2),
391 (0x7e00 << 16) | (0xc910 >> 2),
393 (0x8e00 << 16) | (0xc910 >> 2),
395 (0x9e00 << 16) | (0xc910 >> 2),
397 (0xae00 << 16) | (0xc910 >> 2),
399 (0xbe00 << 16) | (0xc910 >> 2),
401 (0x0e00 << 16) | (0xc99c >> 2),
403 (0x0e00 << 16) | (0x9834 >> 2),
405 (0x0000 << 16) | (0x30f00 >> 2),
407 (0x0001 << 16) | (0x30f00 >> 2),
409 (0x0000 << 16) | (0x30f04 >> 2),
411 (0x0001 << 16) | (0x30f04 >> 2),
413 (0x0000 << 16) | (0x30f08 >> 2),
415 (0x0001 << 16) | (0x30f08 >> 2),
417 (0x0000 << 16) | (0x30f0c >> 2),
419 (0x0001 << 16) | (0x30f0c >> 2),
421 (0x0600 << 16) | (0x9b7c >> 2),
423 (0x0e00 << 16) | (0x8a14 >> 2),
425 (0x0e00 << 16) | (0x8a18 >> 2),
427 (0x0600 << 16) | (0x30a00 >> 2),
429 (0x0e00 << 16) | (0x8bf0 >> 2),
431 (0x0e00 << 16) | (0x8bcc >> 2),
433 (0x0e00 << 16) | (0x8b24 >> 2),
435 (0x0e00 << 16) | (0x30a04 >> 2),
437 (0x0600 << 16) | (0x30a10 >> 2),
439 (0x0600 << 16) | (0x30a14 >> 2),
441 (0x0600 << 16) | (0x30a18 >> 2),
443 (0x0600 << 16) | (0x30a2c >> 2),
445 (0x0e00 << 16) | (0xc700 >> 2),
447 (0x0e00 << 16) | (0xc704 >> 2),
449 (0x0e00 << 16) | (0xc708 >> 2),
451 (0x0e00 << 16) | (0xc768 >> 2),
453 (0x0400 << 16) | (0xc770 >> 2),
455 (0x0400 << 16) | (0xc774 >> 2),
457 (0x0400 << 16) | (0xc778 >> 2),
459 (0x0400 << 16) | (0xc77c >> 2),
461 (0x0400 << 16) | (0xc780 >> 2),
463 (0x0400 << 16) | (0xc784 >> 2),
465 (0x0400 << 16) | (0xc788 >> 2),
467 (0x0400 << 16) | (0xc78c >> 2),
469 (0x0400 << 16) | (0xc798 >> 2),
471 (0x0400 << 16) | (0xc79c >> 2),
473 (0x0400 << 16) | (0xc7a0 >> 2),
475 (0x0400 << 16) | (0xc7a4 >> 2),
477 (0x0400 << 16) | (0xc7a8 >> 2),
479 (0x0400 << 16) | (0xc7ac >> 2),
481 (0x0400 << 16) | (0xc7b0 >> 2),
483 (0x0400 << 16) | (0xc7b4 >> 2),
485 (0x0e00 << 16) | (0x9100 >> 2),
487 (0x0e00 << 16) | (0x3c010 >> 2),
489 (0x0e00 << 16) | (0x92a8 >> 2),
491 (0x0e00 << 16) | (0x92ac >> 2),
493 (0x0e00 << 16) | (0x92b4 >> 2),
495 (0x0e00 << 16) | (0x92b8 >> 2),
497 (0x0e00 << 16) | (0x92bc >> 2),
499 (0x0e00 << 16) | (0x92c0 >> 2),
501 (0x0e00 << 16) | (0x92c4 >> 2),
503 (0x0e00 << 16) | (0x92c8 >> 2),
505 (0x0e00 << 16) | (0x92cc >> 2),
507 (0x0e00 << 16) | (0x92d0 >> 2),
509 (0x0e00 << 16) | (0x8c00 >> 2),
511 (0x0e00 << 16) | (0x8c04 >> 2),
513 (0x0e00 << 16) | (0x8c20 >> 2),
515 (0x0e00 << 16) | (0x8c38 >> 2),
517 (0x0e00 << 16) | (0x8c3c >> 2),
519 (0x0e00 << 16) | (0xae00 >> 2),
521 (0x0e00 << 16) | (0x9604 >> 2),
523 (0x0e00 << 16) | (0xac08 >> 2),
525 (0x0e00 << 16) | (0xac0c >> 2),
527 (0x0e00 << 16) | (0xac10 >> 2),
529 (0x0e00 << 16) | (0xac14 >> 2),
531 (0x0e00 << 16) | (0xac58 >> 2),
533 (0x0e00 << 16) | (0xac68 >> 2),
535 (0x0e00 << 16) | (0xac6c >> 2),
537 (0x0e00 << 16) | (0xac70 >> 2),
539 (0x0e00 << 16) | (0xac74 >> 2),
541 (0x0e00 << 16) | (0xac78 >> 2),
543 (0x0e00 << 16) | (0xac7c >> 2),
545 (0x0e00 << 16) | (0xac80 >> 2),
547 (0x0e00 << 16) | (0xac84 >> 2),
549 (0x0e00 << 16) | (0xac88 >> 2),
551 (0x0e00 << 16) | (0xac8c >> 2),
553 (0x0e00 << 16) | (0x970c >> 2),
555 (0x0e00 << 16) | (0x9714 >> 2),
557 (0x0e00 << 16) | (0x9718 >> 2),
559 (0x0e00 << 16) | (0x971c >> 2),
561 (0x0e00 << 16) | (0x31068 >> 2),
563 (0x4e00 << 16) | (0x31068 >> 2),
565 (0x5e00 << 16) | (0x31068 >> 2),
567 (0x6e00 << 16) | (0x31068 >> 2),
569 (0x7e00 << 16) | (0x31068 >> 2),
571 (0x8e00 << 16) | (0x31068 >> 2),
573 (0x9e00 << 16) | (0x31068 >> 2),
575 (0xae00 << 16) | (0x31068 >> 2),
577 (0xbe00 << 16) | (0x31068 >> 2),
579 (0x0e00 << 16) | (0xcd10 >> 2),
581 (0x0e00 << 16) | (0xcd14 >> 2),
583 (0x0e00 << 16) | (0x88b0 >> 2),
585 (0x0e00 << 16) | (0x88b4 >> 2),
587 (0x0e00 << 16) | (0x88b8 >> 2),
589 (0x0e00 << 16) | (0x88bc >> 2),
591 (0x0400 << 16) | (0x89c0 >> 2),
593 (0x0e00 << 16) | (0x88c4 >> 2),
595 (0x0e00 << 16) | (0x88c8 >> 2),
597 (0x0e00 << 16) | (0x88d0 >> 2),
599 (0x0e00 << 16) | (0x88d4 >> 2),
601 (0x0e00 << 16) | (0x88d8 >> 2),
603 (0x0e00 << 16) | (0x8980 >> 2),
605 (0x0e00 << 16) | (0x30938 >> 2),
607 (0x0e00 << 16) | (0x3093c >> 2),
609 (0x0e00 << 16) | (0x30940 >> 2),
611 (0x0e00 << 16) | (0x89a0 >> 2),
613 (0x0e00 << 16) | (0x30900 >> 2),
615 (0x0e00 << 16) | (0x30904 >> 2),
617 (0x0e00 << 16) | (0x89b4 >> 2),
619 (0x0e00 << 16) | (0x3c210 >> 2),
621 (0x0e00 << 16) | (0x3c214 >> 2),
623 (0x0e00 << 16) | (0x3c218 >> 2),
625 (0x0e00 << 16) | (0x8904 >> 2),
628 (0x0e00 << 16) | (0x8c28 >> 2),
629 (0x0e00 << 16) | (0x8c2c >> 2),
630 (0x0e00 << 16) | (0x8c30 >> 2),
631 (0x0e00 << 16) | (0x8c34 >> 2),
632 (0x0e00 << 16) | (0x9600 >> 2),
635 static const u32 kalindi_rlc_save_restore_register_list[] =
637 (0x0e00 << 16) | (0xc12c >> 2),
639 (0x0e00 << 16) | (0xc140 >> 2),
641 (0x0e00 << 16) | (0xc150 >> 2),
643 (0x0e00 << 16) | (0xc15c >> 2),
645 (0x0e00 << 16) | (0xc168 >> 2),
647 (0x0e00 << 16) | (0xc170 >> 2),
649 (0x0e00 << 16) | (0xc204 >> 2),
651 (0x0e00 << 16) | (0xc2b4 >> 2),
653 (0x0e00 << 16) | (0xc2b8 >> 2),
655 (0x0e00 << 16) | (0xc2bc >> 2),
657 (0x0e00 << 16) | (0xc2c0 >> 2),
659 (0x0e00 << 16) | (0x8228 >> 2),
661 (0x0e00 << 16) | (0x829c >> 2),
663 (0x0e00 << 16) | (0x869c >> 2),
665 (0x0600 << 16) | (0x98f4 >> 2),
667 (0x0e00 << 16) | (0x98f8 >> 2),
669 (0x0e00 << 16) | (0x9900 >> 2),
671 (0x0e00 << 16) | (0xc260 >> 2),
673 (0x0e00 << 16) | (0x90e8 >> 2),
675 (0x0e00 << 16) | (0x3c000 >> 2),
677 (0x0e00 << 16) | (0x3c00c >> 2),
679 (0x0e00 << 16) | (0x8c1c >> 2),
681 (0x0e00 << 16) | (0x9700 >> 2),
683 (0x0e00 << 16) | (0xcd20 >> 2),
685 (0x4e00 << 16) | (0xcd20 >> 2),
687 (0x5e00 << 16) | (0xcd20 >> 2),
689 (0x6e00 << 16) | (0xcd20 >> 2),
691 (0x7e00 << 16) | (0xcd20 >> 2),
693 (0x0e00 << 16) | (0x89bc >> 2),
695 (0x0e00 << 16) | (0x8900 >> 2),
698 (0x0e00 << 16) | (0xc130 >> 2),
700 (0x0e00 << 16) | (0xc134 >> 2),
702 (0x0e00 << 16) | (0xc1fc >> 2),
704 (0x0e00 << 16) | (0xc208 >> 2),
706 (0x0e00 << 16) | (0xc264 >> 2),
708 (0x0e00 << 16) | (0xc268 >> 2),
710 (0x0e00 << 16) | (0xc26c >> 2),
712 (0x0e00 << 16) | (0xc270 >> 2),
714 (0x0e00 << 16) | (0xc274 >> 2),
716 (0x0e00 << 16) | (0xc28c >> 2),
718 (0x0e00 << 16) | (0xc290 >> 2),
720 (0x0e00 << 16) | (0xc294 >> 2),
722 (0x0e00 << 16) | (0xc298 >> 2),
724 (0x0e00 << 16) | (0xc2a0 >> 2),
726 (0x0e00 << 16) | (0xc2a4 >> 2),
728 (0x0e00 << 16) | (0xc2a8 >> 2),
730 (0x0e00 << 16) | (0xc2ac >> 2),
732 (0x0e00 << 16) | (0x301d0 >> 2),
734 (0x0e00 << 16) | (0x30238 >> 2),
736 (0x0e00 << 16) | (0x30250 >> 2),
738 (0x0e00 << 16) | (0x30254 >> 2),
740 (0x0e00 << 16) | (0x30258 >> 2),
742 (0x0e00 << 16) | (0x3025c >> 2),
744 (0x4e00 << 16) | (0xc900 >> 2),
746 (0x5e00 << 16) | (0xc900 >> 2),
748 (0x6e00 << 16) | (0xc900 >> 2),
750 (0x7e00 << 16) | (0xc900 >> 2),
752 (0x4e00 << 16) | (0xc904 >> 2),
754 (0x5e00 << 16) | (0xc904 >> 2),
756 (0x6e00 << 16) | (0xc904 >> 2),
758 (0x7e00 << 16) | (0xc904 >> 2),
760 (0x4e00 << 16) | (0xc908 >> 2),
762 (0x5e00 << 16) | (0xc908 >> 2),
764 (0x6e00 << 16) | (0xc908 >> 2),
766 (0x7e00 << 16) | (0xc908 >> 2),
768 (0x4e00 << 16) | (0xc90c >> 2),
770 (0x5e00 << 16) | (0xc90c >> 2),
772 (0x6e00 << 16) | (0xc90c >> 2),
774 (0x7e00 << 16) | (0xc90c >> 2),
776 (0x4e00 << 16) | (0xc910 >> 2),
778 (0x5e00 << 16) | (0xc910 >> 2),
780 (0x6e00 << 16) | (0xc910 >> 2),
782 (0x7e00 << 16) | (0xc910 >> 2),
784 (0x0e00 << 16) | (0xc99c >> 2),
786 (0x0e00 << 16) | (0x9834 >> 2),
788 (0x0000 << 16) | (0x30f00 >> 2),
790 (0x0000 << 16) | (0x30f04 >> 2),
792 (0x0000 << 16) | (0x30f08 >> 2),
794 (0x0000 << 16) | (0x30f0c >> 2),
796 (0x0600 << 16) | (0x9b7c >> 2),
798 (0x0e00 << 16) | (0x8a14 >> 2),
800 (0x0e00 << 16) | (0x8a18 >> 2),
802 (0x0600 << 16) | (0x30a00 >> 2),
804 (0x0e00 << 16) | (0x8bf0 >> 2),
806 (0x0e00 << 16) | (0x8bcc >> 2),
808 (0x0e00 << 16) | (0x8b24 >> 2),
810 (0x0e00 << 16) | (0x30a04 >> 2),
812 (0x0600 << 16) | (0x30a10 >> 2),
814 (0x0600 << 16) | (0x30a14 >> 2),
816 (0x0600 << 16) | (0x30a18 >> 2),
818 (0x0600 << 16) | (0x30a2c >> 2),
820 (0x0e00 << 16) | (0xc700 >> 2),
822 (0x0e00 << 16) | (0xc704 >> 2),
824 (0x0e00 << 16) | (0xc708 >> 2),
826 (0x0e00 << 16) | (0xc768 >> 2),
828 (0x0400 << 16) | (0xc770 >> 2),
830 (0x0400 << 16) | (0xc774 >> 2),
832 (0x0400 << 16) | (0xc798 >> 2),
834 (0x0400 << 16) | (0xc79c >> 2),
836 (0x0e00 << 16) | (0x9100 >> 2),
838 (0x0e00 << 16) | (0x3c010 >> 2),
840 (0x0e00 << 16) | (0x8c00 >> 2),
842 (0x0e00 << 16) | (0x8c04 >> 2),
844 (0x0e00 << 16) | (0x8c20 >> 2),
846 (0x0e00 << 16) | (0x8c38 >> 2),
848 (0x0e00 << 16) | (0x8c3c >> 2),
850 (0x0e00 << 16) | (0xae00 >> 2),
852 (0x0e00 << 16) | (0x9604 >> 2),
854 (0x0e00 << 16) | (0xac08 >> 2),
856 (0x0e00 << 16) | (0xac0c >> 2),
858 (0x0e00 << 16) | (0xac10 >> 2),
860 (0x0e00 << 16) | (0xac14 >> 2),
862 (0x0e00 << 16) | (0xac58 >> 2),
864 (0x0e00 << 16) | (0xac68 >> 2),
866 (0x0e00 << 16) | (0xac6c >> 2),
868 (0x0e00 << 16) | (0xac70 >> 2),
870 (0x0e00 << 16) | (0xac74 >> 2),
872 (0x0e00 << 16) | (0xac78 >> 2),
874 (0x0e00 << 16) | (0xac7c >> 2),
876 (0x0e00 << 16) | (0xac80 >> 2),
878 (0x0e00 << 16) | (0xac84 >> 2),
880 (0x0e00 << 16) | (0xac88 >> 2),
882 (0x0e00 << 16) | (0xac8c >> 2),
884 (0x0e00 << 16) | (0x970c >> 2),
886 (0x0e00 << 16) | (0x9714 >> 2),
888 (0x0e00 << 16) | (0x9718 >> 2),
890 (0x0e00 << 16) | (0x971c >> 2),
892 (0x0e00 << 16) | (0x31068 >> 2),
894 (0x4e00 << 16) | (0x31068 >> 2),
896 (0x5e00 << 16) | (0x31068 >> 2),
898 (0x6e00 << 16) | (0x31068 >> 2),
900 (0x7e00 << 16) | (0x31068 >> 2),
902 (0x0e00 << 16) | (0xcd10 >> 2),
904 (0x0e00 << 16) | (0xcd14 >> 2),
906 (0x0e00 << 16) | (0x88b0 >> 2),
908 (0x0e00 << 16) | (0x88b4 >> 2),
910 (0x0e00 << 16) | (0x88b8 >> 2),
912 (0x0e00 << 16) | (0x88bc >> 2),
914 (0x0400 << 16) | (0x89c0 >> 2),
916 (0x0e00 << 16) | (0x88c4 >> 2),
918 (0x0e00 << 16) | (0x88c8 >> 2),
920 (0x0e00 << 16) | (0x88d0 >> 2),
922 (0x0e00 << 16) | (0x88d4 >> 2),
924 (0x0e00 << 16) | (0x88d8 >> 2),
926 (0x0e00 << 16) | (0x8980 >> 2),
928 (0x0e00 << 16) | (0x30938 >> 2),
930 (0x0e00 << 16) | (0x3093c >> 2),
932 (0x0e00 << 16) | (0x30940 >> 2),
934 (0x0e00 << 16) | (0x89a0 >> 2),
936 (0x0e00 << 16) | (0x30900 >> 2),
938 (0x0e00 << 16) | (0x30904 >> 2),
940 (0x0e00 << 16) | (0x89b4 >> 2),
942 (0x0e00 << 16) | (0x3e1fc >> 2),
944 (0x0e00 << 16) | (0x3c210 >> 2),
946 (0x0e00 << 16) | (0x3c214 >> 2),
948 (0x0e00 << 16) | (0x3c218 >> 2),
950 (0x0e00 << 16) | (0x8904 >> 2),
953 (0x0e00 << 16) | (0x8c28 >> 2),
954 (0x0e00 << 16) | (0x8c2c >> 2),
955 (0x0e00 << 16) | (0x8c30 >> 2),
956 (0x0e00 << 16) | (0x8c34 >> 2),
957 (0x0e00 << 16) | (0x9600 >> 2),
960 static const u32 bonaire_golden_spm_registers[] =
962 0x30800, 0xe0ffffff, 0xe0000000
965 static const u32 bonaire_golden_common_registers[] =
967 0xc770, 0xffffffff, 0x00000800,
968 0xc774, 0xffffffff, 0x00000800,
969 0xc798, 0xffffffff, 0x00007fbf,
970 0xc79c, 0xffffffff, 0x00007faf
973 static const u32 bonaire_golden_registers[] =
975 0x3354, 0x00000333, 0x00000333,
976 0x3350, 0x000c0fc0, 0x00040200,
977 0x9a10, 0x00010000, 0x00058208,
978 0x3c000, 0xffff1fff, 0x00140000,
979 0x3c200, 0xfdfc0fff, 0x00000100,
980 0x3c234, 0x40000000, 0x40000200,
981 0x9830, 0xffffffff, 0x00000000,
982 0x9834, 0xf00fffff, 0x00000400,
983 0x9838, 0x0002021c, 0x00020200,
984 0xc78, 0x00000080, 0x00000000,
985 0x5bb0, 0x000000f0, 0x00000070,
986 0x5bc0, 0xf0311fff, 0x80300000,
987 0x98f8, 0x73773777, 0x12010001,
988 0x350c, 0x00810000, 0x408af000,
989 0x7030, 0x31000111, 0x00000011,
990 0x2f48, 0x73773777, 0x12010001,
991 0x220c, 0x00007fb6, 0x0021a1b1,
992 0x2210, 0x00007fb6, 0x002021b1,
993 0x2180, 0x00007fb6, 0x00002191,
994 0x2218, 0x00007fb6, 0x002121b1,
995 0x221c, 0x00007fb6, 0x002021b1,
996 0x21dc, 0x00007fb6, 0x00002191,
997 0x21e0, 0x00007fb6, 0x00002191,
998 0x3628, 0x0000003f, 0x0000000a,
999 0x362c, 0x0000003f, 0x0000000a,
1000 0x2ae4, 0x00073ffe, 0x000022a2,
1001 0x240c, 0x000007ff, 0x00000000,
1002 0x8a14, 0xf000003f, 0x00000007,
1003 0x8bf0, 0x00002001, 0x00000001,
1004 0x8b24, 0xffffffff, 0x00ffffff,
1005 0x30a04, 0x0000ff0f, 0x00000000,
1006 0x28a4c, 0x07ffffff, 0x06000000,
1007 0x4d8, 0x00000fff, 0x00000100,
1008 0x3e78, 0x00000001, 0x00000002,
1009 0x9100, 0x03000000, 0x0362c688,
1010 0x8c00, 0x000000ff, 0x00000001,
1011 0xe40, 0x00001fff, 0x00001fff,
1012 0x9060, 0x0000007f, 0x00000020,
1013 0x9508, 0x00010000, 0x00010000,
1014 0xac14, 0x000003ff, 0x000000f3,
1015 0xac0c, 0xffffffff, 0x00001032
1018 static const u32 bonaire_mgcg_cgcg_init[] =
1020 0xc420, 0xffffffff, 0xfffffffc,
1021 0x30800, 0xffffffff, 0xe0000000,
1022 0x3c2a0, 0xffffffff, 0x00000100,
1023 0x3c208, 0xffffffff, 0x00000100,
1024 0x3c2c0, 0xffffffff, 0xc0000100,
1025 0x3c2c8, 0xffffffff, 0xc0000100,
1026 0x3c2c4, 0xffffffff, 0xc0000100,
1027 0x55e4, 0xffffffff, 0x00600100,
1028 0x3c280, 0xffffffff, 0x00000100,
1029 0x3c214, 0xffffffff, 0x06000100,
1030 0x3c220, 0xffffffff, 0x00000100,
1031 0x3c218, 0xffffffff, 0x06000100,
1032 0x3c204, 0xffffffff, 0x00000100,
1033 0x3c2e0, 0xffffffff, 0x00000100,
1034 0x3c224, 0xffffffff, 0x00000100,
1035 0x3c200, 0xffffffff, 0x00000100,
1036 0x3c230, 0xffffffff, 0x00000100,
1037 0x3c234, 0xffffffff, 0x00000100,
1038 0x3c250, 0xffffffff, 0x00000100,
1039 0x3c254, 0xffffffff, 0x00000100,
1040 0x3c258, 0xffffffff, 0x00000100,
1041 0x3c25c, 0xffffffff, 0x00000100,
1042 0x3c260, 0xffffffff, 0x00000100,
1043 0x3c27c, 0xffffffff, 0x00000100,
1044 0x3c278, 0xffffffff, 0x00000100,
1045 0x3c210, 0xffffffff, 0x06000100,
1046 0x3c290, 0xffffffff, 0x00000100,
1047 0x3c274, 0xffffffff, 0x00000100,
1048 0x3c2b4, 0xffffffff, 0x00000100,
1049 0x3c2b0, 0xffffffff, 0x00000100,
1050 0x3c270, 0xffffffff, 0x00000100,
1051 0x30800, 0xffffffff, 0xe0000000,
1052 0x3c020, 0xffffffff, 0x00010000,
1053 0x3c024, 0xffffffff, 0x00030002,
1054 0x3c028, 0xffffffff, 0x00040007,
1055 0x3c02c, 0xffffffff, 0x00060005,
1056 0x3c030, 0xffffffff, 0x00090008,
1057 0x3c034, 0xffffffff, 0x00010000,
1058 0x3c038, 0xffffffff, 0x00030002,
1059 0x3c03c, 0xffffffff, 0x00040007,
1060 0x3c040, 0xffffffff, 0x00060005,
1061 0x3c044, 0xffffffff, 0x00090008,
1062 0x3c048, 0xffffffff, 0x00010000,
1063 0x3c04c, 0xffffffff, 0x00030002,
1064 0x3c050, 0xffffffff, 0x00040007,
1065 0x3c054, 0xffffffff, 0x00060005,
1066 0x3c058, 0xffffffff, 0x00090008,
1067 0x3c05c, 0xffffffff, 0x00010000,
1068 0x3c060, 0xffffffff, 0x00030002,
1069 0x3c064, 0xffffffff, 0x00040007,
1070 0x3c068, 0xffffffff, 0x00060005,
1071 0x3c06c, 0xffffffff, 0x00090008,
1072 0x3c070, 0xffffffff, 0x00010000,
1073 0x3c074, 0xffffffff, 0x00030002,
1074 0x3c078, 0xffffffff, 0x00040007,
1075 0x3c07c, 0xffffffff, 0x00060005,
1076 0x3c080, 0xffffffff, 0x00090008,
1077 0x3c084, 0xffffffff, 0x00010000,
1078 0x3c088, 0xffffffff, 0x00030002,
1079 0x3c08c, 0xffffffff, 0x00040007,
1080 0x3c090, 0xffffffff, 0x00060005,
1081 0x3c094, 0xffffffff, 0x00090008,
1082 0x3c098, 0xffffffff, 0x00010000,
1083 0x3c09c, 0xffffffff, 0x00030002,
1084 0x3c0a0, 0xffffffff, 0x00040007,
1085 0x3c0a4, 0xffffffff, 0x00060005,
1086 0x3c0a8, 0xffffffff, 0x00090008,
1087 0x3c000, 0xffffffff, 0x96e00200,
1088 0x8708, 0xffffffff, 0x00900100,
1089 0xc424, 0xffffffff, 0x0020003f,
1090 0x38, 0xffffffff, 0x0140001c,
1091 0x3c, 0x000f0000, 0x000f0000,
1092 0x220, 0xffffffff, 0xC060000C,
1093 0x224, 0xc0000fff, 0x00000100,
1094 0xf90, 0xffffffff, 0x00000100,
1095 0xf98, 0x00000101, 0x00000000,
1096 0x20a8, 0xffffffff, 0x00000104,
1097 0x55e4, 0xff000fff, 0x00000100,
1098 0x30cc, 0xc0000fff, 0x00000104,
1099 0xc1e4, 0x00000001, 0x00000001,
1100 0xd00c, 0xff000ff0, 0x00000100,
1101 0xd80c, 0xff000ff0, 0x00000100
1104 static const u32 spectre_golden_spm_registers[] =
1106 0x30800, 0xe0ffffff, 0xe0000000
1109 static const u32 spectre_golden_common_registers[] =
1111 0xc770, 0xffffffff, 0x00000800,
1112 0xc774, 0xffffffff, 0x00000800,
1113 0xc798, 0xffffffff, 0x00007fbf,
1114 0xc79c, 0xffffffff, 0x00007faf
1117 static const u32 spectre_golden_registers[] =
1119 0x3c000, 0xffff1fff, 0x96940200,
1120 0x3c00c, 0xffff0001, 0xff000000,
1121 0x3c200, 0xfffc0fff, 0x00000100,
1122 0x6ed8, 0x00010101, 0x00010000,
1123 0x9834, 0xf00fffff, 0x00000400,
1124 0x9838, 0xfffffffc, 0x00020200,
1125 0x5bb0, 0x000000f0, 0x00000070,
1126 0x5bc0, 0xf0311fff, 0x80300000,
1127 0x98f8, 0x73773777, 0x12010001,
1128 0x9b7c, 0x00ff0000, 0x00fc0000,
1129 0x2f48, 0x73773777, 0x12010001,
1130 0x8a14, 0xf000003f, 0x00000007,
1131 0x8b24, 0xffffffff, 0x00ffffff,
1132 0x28350, 0x3f3f3fff, 0x00000082,
1133 0x28354, 0x0000003f, 0x00000000,
1134 0x3e78, 0x00000001, 0x00000002,
1135 0x913c, 0xffff03df, 0x00000004,
1136 0xc768, 0x00000008, 0x00000008,
1137 0x8c00, 0x000008ff, 0x00000800,
1138 0x9508, 0x00010000, 0x00010000,
1139 0xac0c, 0xffffffff, 0x54763210,
1140 0x214f8, 0x01ff01ff, 0x00000002,
1141 0x21498, 0x007ff800, 0x00200000,
1142 0x2015c, 0xffffffff, 0x00000f40,
1143 0x30934, 0xffffffff, 0x00000001
1146 static const u32 spectre_mgcg_cgcg_init[] =
1148 0xc420, 0xffffffff, 0xfffffffc,
1149 0x30800, 0xffffffff, 0xe0000000,
1150 0x3c2a0, 0xffffffff, 0x00000100,
1151 0x3c208, 0xffffffff, 0x00000100,
1152 0x3c2c0, 0xffffffff, 0x00000100,
1153 0x3c2c8, 0xffffffff, 0x00000100,
1154 0x3c2c4, 0xffffffff, 0x00000100,
1155 0x55e4, 0xffffffff, 0x00600100,
1156 0x3c280, 0xffffffff, 0x00000100,
1157 0x3c214, 0xffffffff, 0x06000100,
1158 0x3c220, 0xffffffff, 0x00000100,
1159 0x3c218, 0xffffffff, 0x06000100,
1160 0x3c204, 0xffffffff, 0x00000100,
1161 0x3c2e0, 0xffffffff, 0x00000100,
1162 0x3c224, 0xffffffff, 0x00000100,
1163 0x3c200, 0xffffffff, 0x00000100,
1164 0x3c230, 0xffffffff, 0x00000100,
1165 0x3c234, 0xffffffff, 0x00000100,
1166 0x3c250, 0xffffffff, 0x00000100,
1167 0x3c254, 0xffffffff, 0x00000100,
1168 0x3c258, 0xffffffff, 0x00000100,
1169 0x3c25c, 0xffffffff, 0x00000100,
1170 0x3c260, 0xffffffff, 0x00000100,
1171 0x3c27c, 0xffffffff, 0x00000100,
1172 0x3c278, 0xffffffff, 0x00000100,
1173 0x3c210, 0xffffffff, 0x06000100,
1174 0x3c290, 0xffffffff, 0x00000100,
1175 0x3c274, 0xffffffff, 0x00000100,
1176 0x3c2b4, 0xffffffff, 0x00000100,
1177 0x3c2b0, 0xffffffff, 0x00000100,
1178 0x3c270, 0xffffffff, 0x00000100,
1179 0x30800, 0xffffffff, 0xe0000000,
1180 0x3c020, 0xffffffff, 0x00010000,
1181 0x3c024, 0xffffffff, 0x00030002,
1182 0x3c028, 0xffffffff, 0x00040007,
1183 0x3c02c, 0xffffffff, 0x00060005,
1184 0x3c030, 0xffffffff, 0x00090008,
1185 0x3c034, 0xffffffff, 0x00010000,
1186 0x3c038, 0xffffffff, 0x00030002,
1187 0x3c03c, 0xffffffff, 0x00040007,
1188 0x3c040, 0xffffffff, 0x00060005,
1189 0x3c044, 0xffffffff, 0x00090008,
1190 0x3c048, 0xffffffff, 0x00010000,
1191 0x3c04c, 0xffffffff, 0x00030002,
1192 0x3c050, 0xffffffff, 0x00040007,
1193 0x3c054, 0xffffffff, 0x00060005,
1194 0x3c058, 0xffffffff, 0x00090008,
1195 0x3c05c, 0xffffffff, 0x00010000,
1196 0x3c060, 0xffffffff, 0x00030002,
1197 0x3c064, 0xffffffff, 0x00040007,
1198 0x3c068, 0xffffffff, 0x00060005,
1199 0x3c06c, 0xffffffff, 0x00090008,
1200 0x3c070, 0xffffffff, 0x00010000,
1201 0x3c074, 0xffffffff, 0x00030002,
1202 0x3c078, 0xffffffff, 0x00040007,
1203 0x3c07c, 0xffffffff, 0x00060005,
1204 0x3c080, 0xffffffff, 0x00090008,
1205 0x3c084, 0xffffffff, 0x00010000,
1206 0x3c088, 0xffffffff, 0x00030002,
1207 0x3c08c, 0xffffffff, 0x00040007,
1208 0x3c090, 0xffffffff, 0x00060005,
1209 0x3c094, 0xffffffff, 0x00090008,
1210 0x3c098, 0xffffffff, 0x00010000,
1211 0x3c09c, 0xffffffff, 0x00030002,
1212 0x3c0a0, 0xffffffff, 0x00040007,
1213 0x3c0a4, 0xffffffff, 0x00060005,
1214 0x3c0a8, 0xffffffff, 0x00090008,
1215 0x3c0ac, 0xffffffff, 0x00010000,
1216 0x3c0b0, 0xffffffff, 0x00030002,
1217 0x3c0b4, 0xffffffff, 0x00040007,
1218 0x3c0b8, 0xffffffff, 0x00060005,
1219 0x3c0bc, 0xffffffff, 0x00090008,
1220 0x3c000, 0xffffffff, 0x96e00200,
1221 0x8708, 0xffffffff, 0x00900100,
1222 0xc424, 0xffffffff, 0x0020003f,
1223 0x38, 0xffffffff, 0x0140001c,
1224 0x3c, 0x000f0000, 0x000f0000,
1225 0x220, 0xffffffff, 0xC060000C,
1226 0x224, 0xc0000fff, 0x00000100,
1227 0xf90, 0xffffffff, 0x00000100,
1228 0xf98, 0x00000101, 0x00000000,
1229 0x20a8, 0xffffffff, 0x00000104,
1230 0x55e4, 0xff000fff, 0x00000100,
1231 0x30cc, 0xc0000fff, 0x00000104,
1232 0xc1e4, 0x00000001, 0x00000001,
1233 0xd00c, 0xff000ff0, 0x00000100,
1234 0xd80c, 0xff000ff0, 0x00000100
1237 static const u32 kalindi_golden_spm_registers[] =
1239 0x30800, 0xe0ffffff, 0xe0000000
1242 static const u32 kalindi_golden_common_registers[] =
1244 0xc770, 0xffffffff, 0x00000800,
1245 0xc774, 0xffffffff, 0x00000800,
1246 0xc798, 0xffffffff, 0x00007fbf,
1247 0xc79c, 0xffffffff, 0x00007faf
1250 static const u32 kalindi_golden_registers[] =
1252 0x3c000, 0xffffdfff, 0x6e944040,
1253 0x55e4, 0xff607fff, 0xfc000100,
1254 0x3c220, 0xff000fff, 0x00000100,
1255 0x3c224, 0xff000fff, 0x00000100,
1256 0x3c200, 0xfffc0fff, 0x00000100,
1257 0x6ed8, 0x00010101, 0x00010000,
1258 0x9830, 0xffffffff, 0x00000000,
1259 0x9834, 0xf00fffff, 0x00000400,
1260 0x5bb0, 0x000000f0, 0x00000070,
1261 0x5bc0, 0xf0311fff, 0x80300000,
1262 0x98f8, 0x73773777, 0x12010001,
1263 0x98fc, 0xffffffff, 0x00000010,
1264 0x9b7c, 0x00ff0000, 0x00fc0000,
1265 0x8030, 0x00001f0f, 0x0000100a,
1266 0x2f48, 0x73773777, 0x12010001,
1267 0x2408, 0x000fffff, 0x000c007f,
1268 0x8a14, 0xf000003f, 0x00000007,
1269 0x8b24, 0x3fff3fff, 0x00ffcfff,
1270 0x30a04, 0x0000ff0f, 0x00000000,
1271 0x28a4c, 0x07ffffff, 0x06000000,
1272 0x4d8, 0x00000fff, 0x00000100,
1273 0x3e78, 0x00000001, 0x00000002,
1274 0xc768, 0x00000008, 0x00000008,
1275 0x8c00, 0x000000ff, 0x00000003,
1276 0x214f8, 0x01ff01ff, 0x00000002,
1277 0x21498, 0x007ff800, 0x00200000,
1278 0x2015c, 0xffffffff, 0x00000f40,
1279 0x88c4, 0x001f3ae3, 0x00000082,
1280 0x88d4, 0x0000001f, 0x00000010,
1281 0x30934, 0xffffffff, 0x00000000
1284 static const u32 kalindi_mgcg_cgcg_init[] =
1286 0xc420, 0xffffffff, 0xfffffffc,
1287 0x30800, 0xffffffff, 0xe0000000,
1288 0x3c2a0, 0xffffffff, 0x00000100,
1289 0x3c208, 0xffffffff, 0x00000100,
1290 0x3c2c0, 0xffffffff, 0x00000100,
1291 0x3c2c8, 0xffffffff, 0x00000100,
1292 0x3c2c4, 0xffffffff, 0x00000100,
1293 0x55e4, 0xffffffff, 0x00600100,
1294 0x3c280, 0xffffffff, 0x00000100,
1295 0x3c214, 0xffffffff, 0x06000100,
1296 0x3c220, 0xffffffff, 0x00000100,
1297 0x3c218, 0xffffffff, 0x06000100,
1298 0x3c204, 0xffffffff, 0x00000100,
1299 0x3c2e0, 0xffffffff, 0x00000100,
1300 0x3c224, 0xffffffff, 0x00000100,
1301 0x3c200, 0xffffffff, 0x00000100,
1302 0x3c230, 0xffffffff, 0x00000100,
1303 0x3c234, 0xffffffff, 0x00000100,
1304 0x3c250, 0xffffffff, 0x00000100,
1305 0x3c254, 0xffffffff, 0x00000100,
1306 0x3c258, 0xffffffff, 0x00000100,
1307 0x3c25c, 0xffffffff, 0x00000100,
1308 0x3c260, 0xffffffff, 0x00000100,
1309 0x3c27c, 0xffffffff, 0x00000100,
1310 0x3c278, 0xffffffff, 0x00000100,
1311 0x3c210, 0xffffffff, 0x06000100,
1312 0x3c290, 0xffffffff, 0x00000100,
1313 0x3c274, 0xffffffff, 0x00000100,
1314 0x3c2b4, 0xffffffff, 0x00000100,
1315 0x3c2b0, 0xffffffff, 0x00000100,
1316 0x3c270, 0xffffffff, 0x00000100,
1317 0x30800, 0xffffffff, 0xe0000000,
1318 0x3c020, 0xffffffff, 0x00010000,
1319 0x3c024, 0xffffffff, 0x00030002,
1320 0x3c028, 0xffffffff, 0x00040007,
1321 0x3c02c, 0xffffffff, 0x00060005,
1322 0x3c030, 0xffffffff, 0x00090008,
1323 0x3c034, 0xffffffff, 0x00010000,
1324 0x3c038, 0xffffffff, 0x00030002,
1325 0x3c03c, 0xffffffff, 0x00040007,
1326 0x3c040, 0xffffffff, 0x00060005,
1327 0x3c044, 0xffffffff, 0x00090008,
1328 0x3c000, 0xffffffff, 0x96e00200,
1329 0x8708, 0xffffffff, 0x00900100,
1330 0xc424, 0xffffffff, 0x0020003f,
1331 0x38, 0xffffffff, 0x0140001c,
1332 0x3c, 0x000f0000, 0x000f0000,
1333 0x220, 0xffffffff, 0xC060000C,
1334 0x224, 0xc0000fff, 0x00000100,
1335 0x20a8, 0xffffffff, 0x00000104,
1336 0x55e4, 0xff000fff, 0x00000100,
1337 0x30cc, 0xc0000fff, 0x00000104,
1338 0xc1e4, 0x00000001, 0x00000001,
1339 0xd00c, 0xff000ff0, 0x00000100,
1340 0xd80c, 0xff000ff0, 0x00000100
1343 static const u32 hawaii_golden_spm_registers[] =
1345 0x30800, 0xe0ffffff, 0xe0000000
1348 static const u32 hawaii_golden_common_registers[] =
1350 0x30800, 0xffffffff, 0xe0000000,
1351 0x28350, 0xffffffff, 0x3a00161a,
1352 0x28354, 0xffffffff, 0x0000002e,
1353 0x9a10, 0xffffffff, 0x00018208,
1354 0x98f8, 0xffffffff, 0x12011003
1357 static const u32 hawaii_golden_registers[] =
1359 0x3354, 0x00000333, 0x00000333,
1360 0x9a10, 0x00010000, 0x00058208,
1361 0x9830, 0xffffffff, 0x00000000,
1362 0x9834, 0xf00fffff, 0x00000400,
1363 0x9838, 0x0002021c, 0x00020200,
1364 0xc78, 0x00000080, 0x00000000,
1365 0x5bb0, 0x000000f0, 0x00000070,
1366 0x5bc0, 0xf0311fff, 0x80300000,
1367 0x350c, 0x00810000, 0x408af000,
1368 0x7030, 0x31000111, 0x00000011,
1369 0x2f48, 0x73773777, 0x12010001,
1370 0x2120, 0x0000007f, 0x0000001b,
1371 0x21dc, 0x00007fb6, 0x00002191,
1372 0x3628, 0x0000003f, 0x0000000a,
1373 0x362c, 0x0000003f, 0x0000000a,
1374 0x2ae4, 0x00073ffe, 0x000022a2,
1375 0x240c, 0x000007ff, 0x00000000,
1376 0x8bf0, 0x00002001, 0x00000001,
1377 0x8b24, 0xffffffff, 0x00ffffff,
1378 0x30a04, 0x0000ff0f, 0x00000000,
1379 0x28a4c, 0x07ffffff, 0x06000000,
1380 0x3e78, 0x00000001, 0x00000002,
1381 0xc768, 0x00000008, 0x00000008,
1382 0xc770, 0x00000f00, 0x00000800,
1383 0xc774, 0x00000f00, 0x00000800,
1384 0xc798, 0x00ffffff, 0x00ff7fbf,
1385 0xc79c, 0x00ffffff, 0x00ff7faf,
1386 0x8c00, 0x000000ff, 0x00000800,
1387 0xe40, 0x00001fff, 0x00001fff,
1388 0x9060, 0x0000007f, 0x00000020,
1389 0x9508, 0x00010000, 0x00010000,
1390 0xae00, 0x00100000, 0x000ff07c,
1391 0xac14, 0x000003ff, 0x0000000f,
1392 0xac10, 0xffffffff, 0x7564fdec,
1393 0xac0c, 0xffffffff, 0x3120b9a8,
1394 0xac08, 0x20000000, 0x0f9c0000
1397 static const u32 hawaii_mgcg_cgcg_init[] =
1399 0xc420, 0xffffffff, 0xfffffffd,
1400 0x30800, 0xffffffff, 0xe0000000,
1401 0x3c2a0, 0xffffffff, 0x00000100,
1402 0x3c208, 0xffffffff, 0x00000100,
1403 0x3c2c0, 0xffffffff, 0x00000100,
1404 0x3c2c8, 0xffffffff, 0x00000100,
1405 0x3c2c4, 0xffffffff, 0x00000100,
1406 0x55e4, 0xffffffff, 0x00200100,
1407 0x3c280, 0xffffffff, 0x00000100,
1408 0x3c214, 0xffffffff, 0x06000100,
1409 0x3c220, 0xffffffff, 0x00000100,
1410 0x3c218, 0xffffffff, 0x06000100,
1411 0x3c204, 0xffffffff, 0x00000100,
1412 0x3c2e0, 0xffffffff, 0x00000100,
1413 0x3c224, 0xffffffff, 0x00000100,
1414 0x3c200, 0xffffffff, 0x00000100,
1415 0x3c230, 0xffffffff, 0x00000100,
1416 0x3c234, 0xffffffff, 0x00000100,
1417 0x3c250, 0xffffffff, 0x00000100,
1418 0x3c254, 0xffffffff, 0x00000100,
1419 0x3c258, 0xffffffff, 0x00000100,
1420 0x3c25c, 0xffffffff, 0x00000100,
1421 0x3c260, 0xffffffff, 0x00000100,
1422 0x3c27c, 0xffffffff, 0x00000100,
1423 0x3c278, 0xffffffff, 0x00000100,
1424 0x3c210, 0xffffffff, 0x06000100,
1425 0x3c290, 0xffffffff, 0x00000100,
1426 0x3c274, 0xffffffff, 0x00000100,
1427 0x3c2b4, 0xffffffff, 0x00000100,
1428 0x3c2b0, 0xffffffff, 0x00000100,
1429 0x3c270, 0xffffffff, 0x00000100,
1430 0x30800, 0xffffffff, 0xe0000000,
1431 0x3c020, 0xffffffff, 0x00010000,
1432 0x3c024, 0xffffffff, 0x00030002,
1433 0x3c028, 0xffffffff, 0x00040007,
1434 0x3c02c, 0xffffffff, 0x00060005,
1435 0x3c030, 0xffffffff, 0x00090008,
1436 0x3c034, 0xffffffff, 0x00010000,
1437 0x3c038, 0xffffffff, 0x00030002,
1438 0x3c03c, 0xffffffff, 0x00040007,
1439 0x3c040, 0xffffffff, 0x00060005,
1440 0x3c044, 0xffffffff, 0x00090008,
1441 0x3c048, 0xffffffff, 0x00010000,
1442 0x3c04c, 0xffffffff, 0x00030002,
1443 0x3c050, 0xffffffff, 0x00040007,
1444 0x3c054, 0xffffffff, 0x00060005,
1445 0x3c058, 0xffffffff, 0x00090008,
1446 0x3c05c, 0xffffffff, 0x00010000,
1447 0x3c060, 0xffffffff, 0x00030002,
1448 0x3c064, 0xffffffff, 0x00040007,
1449 0x3c068, 0xffffffff, 0x00060005,
1450 0x3c06c, 0xffffffff, 0x00090008,
1451 0x3c070, 0xffffffff, 0x00010000,
1452 0x3c074, 0xffffffff, 0x00030002,
1453 0x3c078, 0xffffffff, 0x00040007,
1454 0x3c07c, 0xffffffff, 0x00060005,
1455 0x3c080, 0xffffffff, 0x00090008,
1456 0x3c084, 0xffffffff, 0x00010000,
1457 0x3c088, 0xffffffff, 0x00030002,
1458 0x3c08c, 0xffffffff, 0x00040007,
1459 0x3c090, 0xffffffff, 0x00060005,
1460 0x3c094, 0xffffffff, 0x00090008,
1461 0x3c098, 0xffffffff, 0x00010000,
1462 0x3c09c, 0xffffffff, 0x00030002,
1463 0x3c0a0, 0xffffffff, 0x00040007,
1464 0x3c0a4, 0xffffffff, 0x00060005,
1465 0x3c0a8, 0xffffffff, 0x00090008,
1466 0x3c0ac, 0xffffffff, 0x00010000,
1467 0x3c0b0, 0xffffffff, 0x00030002,
1468 0x3c0b4, 0xffffffff, 0x00040007,
1469 0x3c0b8, 0xffffffff, 0x00060005,
1470 0x3c0bc, 0xffffffff, 0x00090008,
1471 0x3c0c0, 0xffffffff, 0x00010000,
1472 0x3c0c4, 0xffffffff, 0x00030002,
1473 0x3c0c8, 0xffffffff, 0x00040007,
1474 0x3c0cc, 0xffffffff, 0x00060005,
1475 0x3c0d0, 0xffffffff, 0x00090008,
1476 0x3c0d4, 0xffffffff, 0x00010000,
1477 0x3c0d8, 0xffffffff, 0x00030002,
1478 0x3c0dc, 0xffffffff, 0x00040007,
1479 0x3c0e0, 0xffffffff, 0x00060005,
1480 0x3c0e4, 0xffffffff, 0x00090008,
1481 0x3c0e8, 0xffffffff, 0x00010000,
1482 0x3c0ec, 0xffffffff, 0x00030002,
1483 0x3c0f0, 0xffffffff, 0x00040007,
1484 0x3c0f4, 0xffffffff, 0x00060005,
1485 0x3c0f8, 0xffffffff, 0x00090008,
1486 0xc318, 0xffffffff, 0x00020200,
1487 0x3350, 0xffffffff, 0x00000200,
1488 0x15c0, 0xffffffff, 0x00000400,
1489 0x55e8, 0xffffffff, 0x00000000,
1490 0x2f50, 0xffffffff, 0x00000902,
1491 0x3c000, 0xffffffff, 0x96940200,
1492 0x8708, 0xffffffff, 0x00900100,
1493 0xc424, 0xffffffff, 0x0020003f,
1494 0x38, 0xffffffff, 0x0140001c,
1495 0x3c, 0x000f0000, 0x000f0000,
1496 0x220, 0xffffffff, 0xc060000c,
1497 0x224, 0xc0000fff, 0x00000100,
1498 0xf90, 0xffffffff, 0x00000100,
1499 0xf98, 0x00000101, 0x00000000,
1500 0x20a8, 0xffffffff, 0x00000104,
1501 0x55e4, 0xff000fff, 0x00000100,
1502 0x30cc, 0xc0000fff, 0x00000104,
1503 0xc1e4, 0x00000001, 0x00000001,
1504 0xd00c, 0xff000ff0, 0x00000100,
1505 0xd80c, 0xff000ff0, 0x00000100
1508 static const u32 godavari_golden_registers[] =
1510 0x55e4, 0xff607fff, 0xfc000100,
1511 0x6ed8, 0x00010101, 0x00010000,
1512 0x9830, 0xffffffff, 0x00000000,
1513 0x98302, 0xf00fffff, 0x00000400,
1514 0x6130, 0xffffffff, 0x00010000,
1515 0x5bb0, 0x000000f0, 0x00000070,
1516 0x5bc0, 0xf0311fff, 0x80300000,
1517 0x98f8, 0x73773777, 0x12010001,
1518 0x98fc, 0xffffffff, 0x00000010,
1519 0x8030, 0x00001f0f, 0x0000100a,
1520 0x2f48, 0x73773777, 0x12010001,
1521 0x2408, 0x000fffff, 0x000c007f,
1522 0x8a14, 0xf000003f, 0x00000007,
1523 0x8b24, 0xffffffff, 0x00ff0fff,
1524 0x30a04, 0x0000ff0f, 0x00000000,
1525 0x28a4c, 0x07ffffff, 0x06000000,
1526 0x4d8, 0x00000fff, 0x00000100,
1527 0xd014, 0x00010000, 0x00810001,
1528 0xd814, 0x00010000, 0x00810001,
1529 0x3e78, 0x00000001, 0x00000002,
1530 0xc768, 0x00000008, 0x00000008,
1531 0xc770, 0x00000f00, 0x00000800,
1532 0xc774, 0x00000f00, 0x00000800,
1533 0xc798, 0x00ffffff, 0x00ff7fbf,
1534 0xc79c, 0x00ffffff, 0x00ff7faf,
1535 0x8c00, 0x000000ff, 0x00000001,
1536 0x214f8, 0x01ff01ff, 0x00000002,
1537 0x21498, 0x007ff800, 0x00200000,
1538 0x2015c, 0xffffffff, 0x00000f40,
1539 0x88c4, 0x001f3ae3, 0x00000082,
1540 0x88d4, 0x0000001f, 0x00000010,
1541 0x30934, 0xffffffff, 0x00000000
1545 static void cik_init_golden_registers(struct radeon_device *rdev)
1547 /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1548 mutex_lock(&rdev->grbm_idx_mutex);
1549 switch (rdev->family) {
1551 radeon_program_register_sequence(rdev,
1552 bonaire_mgcg_cgcg_init,
1553 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1554 radeon_program_register_sequence(rdev,
1555 bonaire_golden_registers,
1556 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1557 radeon_program_register_sequence(rdev,
1558 bonaire_golden_common_registers,
1559 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1560 radeon_program_register_sequence(rdev,
1561 bonaire_golden_spm_registers,
1562 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1565 radeon_program_register_sequence(rdev,
1566 kalindi_mgcg_cgcg_init,
1567 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1568 radeon_program_register_sequence(rdev,
1569 kalindi_golden_registers,
1570 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1571 radeon_program_register_sequence(rdev,
1572 kalindi_golden_common_registers,
1573 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1574 radeon_program_register_sequence(rdev,
1575 kalindi_golden_spm_registers,
1576 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1579 radeon_program_register_sequence(rdev,
1580 kalindi_mgcg_cgcg_init,
1581 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1582 radeon_program_register_sequence(rdev,
1583 godavari_golden_registers,
1584 (const u32)ARRAY_SIZE(godavari_golden_registers));
1585 radeon_program_register_sequence(rdev,
1586 kalindi_golden_common_registers,
1587 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1588 radeon_program_register_sequence(rdev,
1589 kalindi_golden_spm_registers,
1590 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1593 radeon_program_register_sequence(rdev,
1594 spectre_mgcg_cgcg_init,
1595 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1596 radeon_program_register_sequence(rdev,
1597 spectre_golden_registers,
1598 (const u32)ARRAY_SIZE(spectre_golden_registers));
1599 radeon_program_register_sequence(rdev,
1600 spectre_golden_common_registers,
1601 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1602 radeon_program_register_sequence(rdev,
1603 spectre_golden_spm_registers,
1604 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1607 radeon_program_register_sequence(rdev,
1608 hawaii_mgcg_cgcg_init,
1609 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1610 radeon_program_register_sequence(rdev,
1611 hawaii_golden_registers,
1612 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1613 radeon_program_register_sequence(rdev,
1614 hawaii_golden_common_registers,
1615 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1616 radeon_program_register_sequence(rdev,
1617 hawaii_golden_spm_registers,
1618 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1623 mutex_unlock(&rdev->grbm_idx_mutex);
1627 * cik_get_xclk - get the xclk
1629 * @rdev: radeon_device pointer
1631 * Returns the reference clock used by the gfx engine
1634 u32 cik_get_xclk(struct radeon_device *rdev)
1636 u32 reference_clock = rdev->clock.spll.reference_freq;
1638 if (rdev->flags & RADEON_IS_IGP) {
1639 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1640 return reference_clock / 2;
1642 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1643 return reference_clock / 4;
1645 return reference_clock;
1649 * cik_mm_rdoorbell - read a doorbell dword
1651 * @rdev: radeon_device pointer
1652 * @index: doorbell index
1654 * Returns the value in the doorbell aperture at the
1655 * requested doorbell index (CIK).
1657 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1659 if (index < rdev->doorbell.num_doorbells) {
1660 return readl(rdev->doorbell.ptr + index);
1662 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1668 * cik_mm_wdoorbell - write a doorbell dword
1670 * @rdev: radeon_device pointer
1671 * @index: doorbell index
1672 * @v: value to write
1674 * Writes @v to the doorbell aperture at the
1675 * requested doorbell index (CIK).
1677 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1679 if (index < rdev->doorbell.num_doorbells) {
1680 writel(v, rdev->doorbell.ptr + index);
1682 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1686 #define BONAIRE_IO_MC_REGS_SIZE 36
1688 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1690 {0x00000070, 0x04400000},
1691 {0x00000071, 0x80c01803},
1692 {0x00000072, 0x00004004},
1693 {0x00000073, 0x00000100},
1694 {0x00000074, 0x00ff0000},
1695 {0x00000075, 0x34000000},
1696 {0x00000076, 0x08000014},
1697 {0x00000077, 0x00cc08ec},
1698 {0x00000078, 0x00000400},
1699 {0x00000079, 0x00000000},
1700 {0x0000007a, 0x04090000},
1701 {0x0000007c, 0x00000000},
1702 {0x0000007e, 0x4408a8e8},
1703 {0x0000007f, 0x00000304},
1704 {0x00000080, 0x00000000},
1705 {0x00000082, 0x00000001},
1706 {0x00000083, 0x00000002},
1707 {0x00000084, 0xf3e4f400},
1708 {0x00000085, 0x052024e3},
1709 {0x00000087, 0x00000000},
1710 {0x00000088, 0x01000000},
1711 {0x0000008a, 0x1c0a0000},
1712 {0x0000008b, 0xff010000},
1713 {0x0000008d, 0xffffefff},
1714 {0x0000008e, 0xfff3efff},
1715 {0x0000008f, 0xfff3efbf},
1716 {0x00000092, 0xf7ffffff},
1717 {0x00000093, 0xffffff7f},
1718 {0x00000095, 0x00101101},
1719 {0x00000096, 0x00000fff},
1720 {0x00000097, 0x00116fff},
1721 {0x00000098, 0x60010000},
1722 {0x00000099, 0x10010000},
1723 {0x0000009a, 0x00006000},
1724 {0x0000009b, 0x00001000},
1725 {0x0000009f, 0x00b48000}
1728 #define HAWAII_IO_MC_REGS_SIZE 22
1730 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1732 {0x0000007d, 0x40000000},
1733 {0x0000007e, 0x40180304},
1734 {0x0000007f, 0x0000ff00},
1735 {0x00000081, 0x00000000},
1736 {0x00000083, 0x00000800},
1737 {0x00000086, 0x00000000},
1738 {0x00000087, 0x00000100},
1739 {0x00000088, 0x00020100},
1740 {0x00000089, 0x00000000},
1741 {0x0000008b, 0x00040000},
1742 {0x0000008c, 0x00000100},
1743 {0x0000008e, 0xff010000},
1744 {0x00000090, 0xffffefff},
1745 {0x00000091, 0xfff3efff},
1746 {0x00000092, 0xfff3efbf},
1747 {0x00000093, 0xf7ffffff},
1748 {0x00000094, 0xffffff7f},
1749 {0x00000095, 0x00000fff},
1750 {0x00000096, 0x00116fff},
1751 {0x00000097, 0x60010000},
1752 {0x00000098, 0x10010000},
1753 {0x0000009f, 0x00c79000}
1758 * cik_srbm_select - select specific register instances
1760 * @rdev: radeon_device pointer
1761 * @me: selected ME (micro engine)
1766 * Switches the currently active registers instances. Some
1767 * registers are instanced per VMID, others are instanced per
1768 * me/pipe/queue combination.
1770 static void cik_srbm_select(struct radeon_device *rdev,
1771 u32 me, u32 pipe, u32 queue, u32 vmid)
1773 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1776 QUEUEID(queue & 0x7));
1777 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1782 * ci_mc_load_microcode - load MC ucode into the hw
1784 * @rdev: radeon_device pointer
1786 * Load the GDDR MC ucode into the hw (CIK).
1787 * Returns 0 on success, error on failure.
1789 int ci_mc_load_microcode(struct radeon_device *rdev)
1791 const __be32 *fw_data = NULL;
1792 const __le32 *new_fw_data = NULL;
1794 u32 *io_mc_regs = NULL;
1795 const __le32 *new_io_mc_regs = NULL;
1796 int i, regs_size, ucode_size;
1802 const struct mc_firmware_header_v1_0 *hdr =
1803 (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1805 radeon_ucode_print_mc_hdr(&hdr->header);
1807 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1808 new_io_mc_regs = (const __le32 *)
1809 (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1810 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1811 new_fw_data = (const __le32 *)
1812 (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1814 ucode_size = rdev->mc_fw->size / 4;
1816 switch (rdev->family) {
1818 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1819 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1822 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1823 regs_size = HAWAII_IO_MC_REGS_SIZE;
1828 fw_data = (const __be32 *)rdev->mc_fw->data;
1831 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1834 /* reset the engine and set to writable */
1835 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1836 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1838 /* load mc io regs */
1839 for (i = 0; i < regs_size; i++) {
1841 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1842 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1844 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1845 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1849 tmp = RREG32(MC_SEQ_MISC0);
1850 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1851 WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1852 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1853 WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1854 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1857 /* load the MC ucode */
1858 for (i = 0; i < ucode_size; i++) {
1860 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1862 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1865 /* put the engine back into the active state */
1866 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1867 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1868 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1870 /* wait for training to complete */
1871 for (i = 0; i < rdev->usec_timeout; i++) {
1872 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1876 for (i = 0; i < rdev->usec_timeout; i++) {
1877 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1887 * cik_init_microcode - load ucode images from disk
1889 * @rdev: radeon_device pointer
1891 * Use the firmware interface to load the ucode images into
1892 * the driver (not loaded into hw).
1893 * Returns 0 on success, error on failure.
1895 static int cik_init_microcode(struct radeon_device *rdev)
1897 const char *chip_name;
1898 const char *new_chip_name;
1899 size_t pfp_req_size, me_req_size, ce_req_size,
1900 mec_req_size, rlc_req_size, mc_req_size = 0,
1901 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1906 bool new_smc = false;
1910 switch (rdev->family) {
1912 chip_name = "BONAIRE";
1913 if ((rdev->pdev->revision == 0x80) ||
1914 (rdev->pdev->revision == 0x81) ||
1915 (rdev->pdev->device == 0x665f))
1917 new_chip_name = "bonaire";
1918 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1919 me_req_size = CIK_ME_UCODE_SIZE * 4;
1920 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1921 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1922 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1923 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1924 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1925 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1926 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1930 chip_name = "HAWAII";
1931 if (rdev->pdev->revision == 0x80)
1933 new_chip_name = "hawaii";
1934 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1935 me_req_size = CIK_ME_UCODE_SIZE * 4;
1936 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1937 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1938 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1939 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1940 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1941 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1942 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1946 chip_name = "KAVERI";
1947 new_chip_name = "kaveri";
1948 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1949 me_req_size = CIK_ME_UCODE_SIZE * 4;
1950 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1951 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1952 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1953 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1957 chip_name = "KABINI";
1958 new_chip_name = "kabini";
1959 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1960 me_req_size = CIK_ME_UCODE_SIZE * 4;
1961 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1962 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1963 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1964 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1968 chip_name = "MULLINS";
1969 new_chip_name = "mullins";
1970 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1971 me_req_size = CIK_ME_UCODE_SIZE * 4;
1972 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1973 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1974 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1975 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1981 DRM_INFO("Loading %s Microcode\n", new_chip_name);
1983 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1984 err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1986 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1987 err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1990 if (rdev->pfp_fw->size != pfp_req_size) {
1992 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1993 rdev->pfp_fw->size, fw_name);
1998 err = radeon_ucode_validate(rdev->pfp_fw);
2001 "cik_fw: validation failed for firmware \"%s\"\n",
2009 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2010 err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2012 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2013 err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2016 if (rdev->me_fw->size != me_req_size) {
2018 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2019 rdev->me_fw->size, fw_name);
2023 err = radeon_ucode_validate(rdev->me_fw);
2026 "cik_fw: validation failed for firmware \"%s\"\n",
2034 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2035 err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2037 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2038 err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2041 if (rdev->ce_fw->size != ce_req_size) {
2043 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2044 rdev->ce_fw->size, fw_name);
2048 err = radeon_ucode_validate(rdev->ce_fw);
2051 "cik_fw: validation failed for firmware \"%s\"\n",
2059 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2060 err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2062 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2063 err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2066 if (rdev->mec_fw->size != mec_req_size) {
2068 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2069 rdev->mec_fw->size, fw_name);
2073 err = radeon_ucode_validate(rdev->mec_fw);
2076 "cik_fw: validation failed for firmware \"%s\"\n",
2084 if (rdev->family == CHIP_KAVERI) {
2085 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2086 err = reject_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2090 err = radeon_ucode_validate(rdev->mec2_fw);
2099 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2100 err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2102 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2103 err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2106 if (rdev->rlc_fw->size != rlc_req_size) {
2108 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2109 rdev->rlc_fw->size, fw_name);
2113 err = radeon_ucode_validate(rdev->rlc_fw);
2116 "cik_fw: validation failed for firmware \"%s\"\n",
2124 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2125 err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2127 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2128 err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2131 if (rdev->sdma_fw->size != sdma_req_size) {
2133 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2134 rdev->sdma_fw->size, fw_name);
2138 err = radeon_ucode_validate(rdev->sdma_fw);
2141 "cik_fw: validation failed for firmware \"%s\"\n",
2149 /* No SMC, MC ucode on APUs */
2150 if (!(rdev->flags & RADEON_IS_IGP)) {
2151 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2152 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2154 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2155 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2157 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2158 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2162 if ((rdev->mc_fw->size != mc_req_size) &&
2163 (rdev->mc_fw->size != mc2_req_size)){
2165 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2166 rdev->mc_fw->size, fw_name);
2169 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2171 err = radeon_ucode_validate(rdev->mc_fw);
2174 "cik_fw: validation failed for firmware \"%s\"\n",
2183 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2185 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2186 err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2188 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2189 err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2192 "smc: error loading firmware \"%s\"\n",
2194 release_firmware(rdev->smc_fw);
2195 rdev->smc_fw = NULL;
2197 } else if (rdev->smc_fw->size != smc_req_size) {
2199 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2200 rdev->smc_fw->size, fw_name);
2204 err = radeon_ucode_validate(rdev->smc_fw);
2207 "cik_fw: validation failed for firmware \"%s\"\n",
2217 rdev->new_fw = false;
2218 } else if (new_fw < num_fw) {
2219 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2222 rdev->new_fw = true;
2229 "cik_cp: Failed to load firmware \"%s\"\n",
2231 release_firmware(rdev->pfp_fw);
2232 rdev->pfp_fw = NULL;
2233 release_firmware(rdev->me_fw);
2235 release_firmware(rdev->ce_fw);
2237 release_firmware(rdev->mec_fw);
2238 rdev->mec_fw = NULL;
2239 release_firmware(rdev->mec2_fw);
2240 rdev->mec2_fw = NULL;
2241 release_firmware(rdev->rlc_fw);
2242 rdev->rlc_fw = NULL;
2243 release_firmware(rdev->sdma_fw);
2244 rdev->sdma_fw = NULL;
2245 release_firmware(rdev->mc_fw);
2247 release_firmware(rdev->smc_fw);
2248 rdev->smc_fw = NULL;
2257 * cik_tiling_mode_table_init - init the hw tiling table
2259 * @rdev: radeon_device pointer
2261 * Starting with SI, the tiling setup is done globally in a
2262 * set of 32 tiling modes. Rather than selecting each set of
2263 * parameters per surface as on older asics, we just select
2264 * which index in the tiling table we want to use, and the
2265 * surface uses those parameters (CIK).
2267 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2269 u32 *tile = rdev->config.cik.tile_mode_array;
2270 u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2271 const u32 num_tile_mode_states =
2272 ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2273 const u32 num_secondary_tile_mode_states =
2274 ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2275 u32 reg_offset, split_equal_to_row_size;
2276 u32 num_pipe_configs;
2277 u32 num_rbs = rdev->config.cik.max_backends_per_se *
2278 rdev->config.cik.max_shader_engines;
2280 switch (rdev->config.cik.mem_row_size_in_kb) {
2282 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2286 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2289 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2293 num_pipe_configs = rdev->config.cik.max_tile_pipes;
2294 if (num_pipe_configs > 8)
2295 num_pipe_configs = 16;
2297 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2298 tile[reg_offset] = 0;
2299 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2300 macrotile[reg_offset] = 0;
2302 switch(num_pipe_configs) {
2304 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2305 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2306 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2307 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2308 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2309 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2310 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2312 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2313 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2314 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2316 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2318 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2320 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2322 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323 TILE_SPLIT(split_equal_to_row_size));
2324 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2325 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2328 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2329 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2331 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2332 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2333 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334 TILE_SPLIT(split_equal_to_row_size));
2335 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2336 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2337 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2338 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2340 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2342 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2345 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2346 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2347 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2350 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2352 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2353 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2355 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2356 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2360 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2361 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2364 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2365 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2368 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2370 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2372 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2375 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2376 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2377 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2378 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2379 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2380 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2383 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2386 NUM_BANKS(ADDR_SURF_16_BANK));
2387 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2390 NUM_BANKS(ADDR_SURF_16_BANK));
2391 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2394 NUM_BANKS(ADDR_SURF_16_BANK));
2395 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2396 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2397 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2398 NUM_BANKS(ADDR_SURF_16_BANK));
2399 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2400 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2401 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2402 NUM_BANKS(ADDR_SURF_8_BANK));
2403 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2406 NUM_BANKS(ADDR_SURF_4_BANK));
2407 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2409 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2410 NUM_BANKS(ADDR_SURF_2_BANK));
2411 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2412 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2413 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2414 NUM_BANKS(ADDR_SURF_16_BANK));
2415 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2417 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2418 NUM_BANKS(ADDR_SURF_16_BANK));
2419 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2420 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2421 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2422 NUM_BANKS(ADDR_SURF_16_BANK));
2423 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2424 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2425 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2426 NUM_BANKS(ADDR_SURF_8_BANK));
2427 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2428 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2429 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2430 NUM_BANKS(ADDR_SURF_4_BANK));
2431 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2433 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2434 NUM_BANKS(ADDR_SURF_2_BANK));
2435 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2436 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2437 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2438 NUM_BANKS(ADDR_SURF_2_BANK));
2440 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2441 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2442 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2443 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2447 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2448 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2449 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2450 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2451 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2453 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2454 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2455 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2456 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2457 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2458 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2459 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2461 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2462 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2463 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2465 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2466 TILE_SPLIT(split_equal_to_row_size));
2467 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2468 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2471 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2472 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2474 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2475 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2476 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477 TILE_SPLIT(split_equal_to_row_size));
2478 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2479 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2480 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2481 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2483 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2485 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2487 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2488 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2489 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2491 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2492 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2493 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2495 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2496 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2498 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2500 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2503 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2504 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2505 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2506 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2507 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2508 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2510 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2511 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2513 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2515 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2517 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2518 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2519 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2520 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2521 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2523 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2526 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2529 NUM_BANKS(ADDR_SURF_16_BANK));
2530 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2532 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2533 NUM_BANKS(ADDR_SURF_16_BANK));
2534 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2536 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2537 NUM_BANKS(ADDR_SURF_16_BANK));
2538 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2541 NUM_BANKS(ADDR_SURF_16_BANK));
2542 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2543 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2544 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2545 NUM_BANKS(ADDR_SURF_8_BANK));
2546 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2548 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2549 NUM_BANKS(ADDR_SURF_4_BANK));
2550 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2553 NUM_BANKS(ADDR_SURF_2_BANK));
2554 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2556 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2557 NUM_BANKS(ADDR_SURF_16_BANK));
2558 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2560 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2561 NUM_BANKS(ADDR_SURF_16_BANK));
2562 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2563 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2564 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2565 NUM_BANKS(ADDR_SURF_16_BANK));
2566 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2568 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2569 NUM_BANKS(ADDR_SURF_16_BANK));
2570 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2572 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2573 NUM_BANKS(ADDR_SURF_8_BANK));
2574 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2576 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2577 NUM_BANKS(ADDR_SURF_4_BANK));
2578 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2580 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2581 NUM_BANKS(ADDR_SURF_2_BANK));
2583 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2584 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2585 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2586 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2591 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2592 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2593 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2594 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2595 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2597 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2598 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2599 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2600 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2601 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2602 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2603 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2605 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2606 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2607 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2608 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2609 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2610 TILE_SPLIT(split_equal_to_row_size));
2611 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2612 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2613 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2614 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2616 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2617 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2618 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2619 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2620 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2621 TILE_SPLIT(split_equal_to_row_size));
2622 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2623 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2624 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2625 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2627 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2629 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2631 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2633 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2635 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2636 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2637 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2639 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2641 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2642 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2643 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2644 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2645 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2646 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2649 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2651 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2652 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2654 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2655 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2656 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2657 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2658 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2659 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2661 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2662 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2663 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2664 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2665 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2666 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2667 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2670 } else if (num_rbs < 4) {
2671 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2673 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2674 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2675 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2676 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2677 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2679 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2680 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2681 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2682 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2683 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2684 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2685 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2686 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2687 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2688 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2689 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2690 TILE_SPLIT(split_equal_to_row_size));
2691 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2692 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2693 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2694 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2695 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2696 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2697 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2698 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2699 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2700 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2701 TILE_SPLIT(split_equal_to_row_size));
2702 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2703 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2704 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2706 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2707 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2709 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2710 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2712 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2713 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2714 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2716 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2717 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2718 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2721 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2722 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2723 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2724 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2728 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2730 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2731 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2732 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2734 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2735 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2736 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2737 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2738 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2739 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2741 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2742 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2743 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2745 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2746 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2747 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2751 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2753 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2754 NUM_BANKS(ADDR_SURF_16_BANK));
2755 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2757 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2758 NUM_BANKS(ADDR_SURF_16_BANK));
2759 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2761 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2762 NUM_BANKS(ADDR_SURF_16_BANK));
2763 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2765 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2766 NUM_BANKS(ADDR_SURF_16_BANK));
2767 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2770 NUM_BANKS(ADDR_SURF_16_BANK));
2771 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2773 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2774 NUM_BANKS(ADDR_SURF_8_BANK));
2775 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2776 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2777 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2778 NUM_BANKS(ADDR_SURF_4_BANK));
2779 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2780 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2781 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2782 NUM_BANKS(ADDR_SURF_16_BANK));
2783 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2784 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2785 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2786 NUM_BANKS(ADDR_SURF_16_BANK));
2787 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2788 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2789 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2790 NUM_BANKS(ADDR_SURF_16_BANK));
2791 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2792 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2793 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2794 NUM_BANKS(ADDR_SURF_16_BANK));
2795 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2796 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2797 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2798 NUM_BANKS(ADDR_SURF_16_BANK));
2799 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2800 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2801 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2802 NUM_BANKS(ADDR_SURF_8_BANK));
2803 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2805 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2806 NUM_BANKS(ADDR_SURF_4_BANK));
2808 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2809 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2810 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2811 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2815 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2816 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2817 PIPE_CONFIG(ADDR_SURF_P2) |
2818 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2819 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2820 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2821 PIPE_CONFIG(ADDR_SURF_P2) |
2822 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2823 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2824 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2825 PIPE_CONFIG(ADDR_SURF_P2) |
2826 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2827 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2828 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2829 PIPE_CONFIG(ADDR_SURF_P2) |
2830 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2831 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2832 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2833 PIPE_CONFIG(ADDR_SURF_P2) |
2834 TILE_SPLIT(split_equal_to_row_size));
2835 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2836 PIPE_CONFIG(ADDR_SURF_P2) |
2837 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2838 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2839 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2840 PIPE_CONFIG(ADDR_SURF_P2) |
2841 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2842 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2843 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2844 PIPE_CONFIG(ADDR_SURF_P2) |
2845 TILE_SPLIT(split_equal_to_row_size));
2846 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2847 PIPE_CONFIG(ADDR_SURF_P2);
2848 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2849 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2850 PIPE_CONFIG(ADDR_SURF_P2));
2851 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2852 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2853 PIPE_CONFIG(ADDR_SURF_P2) |
2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2856 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2857 PIPE_CONFIG(ADDR_SURF_P2) |
2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2859 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2860 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2861 PIPE_CONFIG(ADDR_SURF_P2) |
2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2864 PIPE_CONFIG(ADDR_SURF_P2) |
2865 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2866 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2867 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2868 PIPE_CONFIG(ADDR_SURF_P2) |
2869 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2870 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2871 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2872 PIPE_CONFIG(ADDR_SURF_P2) |
2873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2874 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2875 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2876 PIPE_CONFIG(ADDR_SURF_P2) |
2877 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2878 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2879 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2880 PIPE_CONFIG(ADDR_SURF_P2));
2881 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2882 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2883 PIPE_CONFIG(ADDR_SURF_P2) |
2884 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2886 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2887 PIPE_CONFIG(ADDR_SURF_P2) |
2888 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2889 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2890 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2891 PIPE_CONFIG(ADDR_SURF_P2) |
2892 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2894 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2895 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2896 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2897 NUM_BANKS(ADDR_SURF_16_BANK));
2898 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2899 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2900 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2901 NUM_BANKS(ADDR_SURF_16_BANK));
2902 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2903 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2904 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2905 NUM_BANKS(ADDR_SURF_16_BANK));
2906 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2907 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2908 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2909 NUM_BANKS(ADDR_SURF_16_BANK));
2910 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2912 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2913 NUM_BANKS(ADDR_SURF_16_BANK));
2914 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2915 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2916 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917 NUM_BANKS(ADDR_SURF_16_BANK));
2918 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2920 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2921 NUM_BANKS(ADDR_SURF_8_BANK));
2922 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2923 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2924 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2925 NUM_BANKS(ADDR_SURF_16_BANK));
2926 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2927 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2928 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929 NUM_BANKS(ADDR_SURF_16_BANK));
2930 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2931 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2932 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2933 NUM_BANKS(ADDR_SURF_16_BANK));
2934 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2935 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2936 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2937 NUM_BANKS(ADDR_SURF_16_BANK));
2938 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2940 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2941 NUM_BANKS(ADDR_SURF_16_BANK));
2942 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2943 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2944 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2945 NUM_BANKS(ADDR_SURF_16_BANK));
2946 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2947 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2948 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2949 NUM_BANKS(ADDR_SURF_8_BANK));
2951 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2952 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2953 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2954 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2958 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2963 * cik_select_se_sh - select which SE, SH to address
2965 * @rdev: radeon_device pointer
2966 * @se_num: shader engine to address
2967 * @sh_num: sh block to address
2969 * Select which SE, SH combinations to address. Certain
2970 * registers are instanced per SE or SH. 0xffffffff means
2971 * broadcast to all SEs or SHs (CIK).
2973 static void cik_select_se_sh(struct radeon_device *rdev,
2974 u32 se_num, u32 sh_num)
2976 u32 data = INSTANCE_BROADCAST_WRITES;
2978 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2979 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2980 else if (se_num == 0xffffffff)
2981 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2982 else if (sh_num == 0xffffffff)
2983 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2985 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2986 WREG32(GRBM_GFX_INDEX, data);
2990 * cik_create_bitmask - create a bitmask
2992 * @bit_width: length of the mask
2994 * create a variable length bit mask (CIK).
2995 * Returns the bitmask.
2997 static u32 cik_create_bitmask(u32 bit_width)
3001 for (i = 0; i < bit_width; i++) {
3009 * cik_get_rb_disabled - computes the mask of disabled RBs
3011 * @rdev: radeon_device pointer
3012 * @max_rb_num: max RBs (render backends) for the asic
3013 * @se_num: number of SEs (shader engines) for the asic
3014 * @sh_per_se: number of SH blocks per SE for the asic
3016 * Calculates the bitmask of disabled RBs (CIK).
3017 * Returns the disabled RB bitmask.
3019 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3020 u32 max_rb_num_per_se,
3025 data = RREG32(CC_RB_BACKEND_DISABLE);
3027 data &= BACKEND_DISABLE_MASK;
3030 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3032 data >>= BACKEND_DISABLE_SHIFT;
3034 mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3040 * cik_setup_rb - setup the RBs on the asic
3042 * @rdev: radeon_device pointer
3043 * @se_num: number of SEs (shader engines) for the asic
3044 * @sh_per_se: number of SH blocks per SE for the asic
3045 * @max_rb_num: max RBs (render backends) for the asic
3047 * Configures per-SE/SH RB registers (CIK).
3049 static void cik_setup_rb(struct radeon_device *rdev,
3050 u32 se_num, u32 sh_per_se,
3051 u32 max_rb_num_per_se)
3055 u32 disabled_rbs = 0;
3056 u32 enabled_rbs = 0;
3058 mutex_lock(&rdev->grbm_idx_mutex);
3059 for (i = 0; i < se_num; i++) {
3060 for (j = 0; j < sh_per_se; j++) {
3061 cik_select_se_sh(rdev, i, j);
3062 data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3063 if (rdev->family == CHIP_HAWAII)
3064 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3066 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3069 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3070 mutex_unlock(&rdev->grbm_idx_mutex);
3073 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3074 if (!(disabled_rbs & mask))
3075 enabled_rbs |= mask;
3079 rdev->config.cik.backend_enable_mask = enabled_rbs;
3081 mutex_lock(&rdev->grbm_idx_mutex);
3082 for (i = 0; i < se_num; i++) {
3083 cik_select_se_sh(rdev, i, 0xffffffff);
3085 for (j = 0; j < sh_per_se; j++) {
3086 switch (enabled_rbs & 3) {
3089 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3091 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3094 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3097 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3101 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3106 WREG32(PA_SC_RASTER_CONFIG, data);
3108 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3109 mutex_unlock(&rdev->grbm_idx_mutex);
3113 * cik_gpu_init - setup the 3D engine
3115 * @rdev: radeon_device pointer
3117 * Configures the 3D engine and tiling configuration
3118 * registers so that the 3D engine is usable.
3120 static void cik_gpu_init(struct radeon_device *rdev)
3122 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3123 u32 mc_shared_chmap, mc_arb_ramcfg;
3124 u32 hdp_host_path_cntl;
3128 switch (rdev->family) {
3130 rdev->config.cik.max_shader_engines = 2;
3131 rdev->config.cik.max_tile_pipes = 4;
3132 rdev->config.cik.max_cu_per_sh = 7;
3133 rdev->config.cik.max_sh_per_se = 1;
3134 rdev->config.cik.max_backends_per_se = 2;
3135 rdev->config.cik.max_texture_channel_caches = 4;
3136 rdev->config.cik.max_gprs = 256;
3137 rdev->config.cik.max_gs_threads = 32;
3138 rdev->config.cik.max_hw_contexts = 8;
3140 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3141 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3142 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3143 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3144 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3147 rdev->config.cik.max_shader_engines = 4;
3148 rdev->config.cik.max_tile_pipes = 16;
3149 rdev->config.cik.max_cu_per_sh = 11;
3150 rdev->config.cik.max_sh_per_se = 1;
3151 rdev->config.cik.max_backends_per_se = 4;
3152 rdev->config.cik.max_texture_channel_caches = 16;
3153 rdev->config.cik.max_gprs = 256;
3154 rdev->config.cik.max_gs_threads = 32;
3155 rdev->config.cik.max_hw_contexts = 8;
3157 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3158 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3159 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3160 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3161 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3164 rdev->config.cik.max_shader_engines = 1;
3165 rdev->config.cik.max_tile_pipes = 4;
3166 rdev->config.cik.max_cu_per_sh = 8;
3167 rdev->config.cik.max_backends_per_se = 2;
3168 rdev->config.cik.max_sh_per_se = 1;
3169 rdev->config.cik.max_texture_channel_caches = 4;
3170 rdev->config.cik.max_gprs = 256;
3171 rdev->config.cik.max_gs_threads = 16;
3172 rdev->config.cik.max_hw_contexts = 8;
3174 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3175 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3176 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3177 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3178 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3183 rdev->config.cik.max_shader_engines = 1;
3184 rdev->config.cik.max_tile_pipes = 2;
3185 rdev->config.cik.max_cu_per_sh = 2;
3186 rdev->config.cik.max_sh_per_se = 1;
3187 rdev->config.cik.max_backends_per_se = 1;
3188 rdev->config.cik.max_texture_channel_caches = 2;
3189 rdev->config.cik.max_gprs = 256;
3190 rdev->config.cik.max_gs_threads = 16;
3191 rdev->config.cik.max_hw_contexts = 8;
3193 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3194 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3195 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3196 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3197 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3201 /* Initialize HDP */
3202 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3203 WREG32((0x2c14 + j), 0x00000000);
3204 WREG32((0x2c18 + j), 0x00000000);
3205 WREG32((0x2c1c + j), 0x00000000);
3206 WREG32((0x2c20 + j), 0x00000000);
3207 WREG32((0x2c24 + j), 0x00000000);
3210 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3211 WREG32(SRBM_INT_CNTL, 0x1);
3212 WREG32(SRBM_INT_ACK, 0x1);
3214 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3216 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3217 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3219 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3220 rdev->config.cik.mem_max_burst_length_bytes = 256;
3221 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3222 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3223 if (rdev->config.cik.mem_row_size_in_kb > 4)
3224 rdev->config.cik.mem_row_size_in_kb = 4;
3225 /* XXX use MC settings? */
3226 rdev->config.cik.shader_engine_tile_size = 32;
3227 rdev->config.cik.num_gpus = 1;
3228 rdev->config.cik.multi_gpu_tile_size = 64;
3230 /* fix up row size */
3231 gb_addr_config &= ~ROW_SIZE_MASK;
3232 switch (rdev->config.cik.mem_row_size_in_kb) {
3235 gb_addr_config |= ROW_SIZE(0);
3238 gb_addr_config |= ROW_SIZE(1);
3241 gb_addr_config |= ROW_SIZE(2);
3245 /* setup tiling info dword. gb_addr_config is not adequate since it does
3246 * not have bank info, so create a custom tiling dword.
3247 * bits 3:0 num_pipes
3248 * bits 7:4 num_banks
3249 * bits 11:8 group_size
3250 * bits 15:12 row_size
3252 rdev->config.cik.tile_config = 0;
3253 switch (rdev->config.cik.num_tile_pipes) {
3255 rdev->config.cik.tile_config |= (0 << 0);
3258 rdev->config.cik.tile_config |= (1 << 0);
3261 rdev->config.cik.tile_config |= (2 << 0);
3265 /* XXX what about 12? */
3266 rdev->config.cik.tile_config |= (3 << 0);
3269 rdev->config.cik.tile_config |=
3270 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3271 rdev->config.cik.tile_config |=
3272 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3273 rdev->config.cik.tile_config |=
3274 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3276 WREG32(GB_ADDR_CONFIG, gb_addr_config);
3277 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3278 WREG32(DMIF_ADDR_CALC, gb_addr_config);
3279 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3280 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3281 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3282 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3283 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3285 cik_tiling_mode_table_init(rdev);
3287 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3288 rdev->config.cik.max_sh_per_se,
3289 rdev->config.cik.max_backends_per_se);
3291 rdev->config.cik.active_cus = 0;
3292 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3293 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3294 rdev->config.cik.active_cus +=
3295 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3299 /* set HW defaults for 3D engine */
3300 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3302 mutex_lock(&rdev->grbm_idx_mutex);
3304 * making sure that the following register writes will be broadcasted
3305 * to all the shaders
3307 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3308 WREG32(SX_DEBUG_1, 0x20);
3310 WREG32(TA_CNTL_AUX, 0x00010000);
3312 tmp = RREG32(SPI_CONFIG_CNTL);
3314 WREG32(SPI_CONFIG_CNTL, tmp);
3316 WREG32(SQ_CONFIG, 1);
3318 WREG32(DB_DEBUG, 0);
3320 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3322 WREG32(DB_DEBUG2, tmp);
3324 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3326 WREG32(DB_DEBUG3, tmp);
3328 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3330 WREG32(CB_HW_CONTROL, tmp);
3332 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3334 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3335 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3336 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3337 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3339 WREG32(VGT_NUM_INSTANCES, 1);
3341 WREG32(CP_PERFMON_CNTL, 0);
3343 WREG32(SQ_CONFIG, 0);
3345 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3346 FORCE_EOV_MAX_REZ_CNT(255)));
3348 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3349 AUTO_INVLD_EN(ES_AND_GS_AUTO));
3351 WREG32(VGT_GS_VERTEX_REUSE, 16);
3352 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3354 tmp = RREG32(HDP_MISC_CNTL);
3355 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3356 WREG32(HDP_MISC_CNTL, tmp);
3358 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3359 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3361 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3362 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3363 mutex_unlock(&rdev->grbm_idx_mutex);
3369 * GPU scratch registers helpers function.
3372 * cik_scratch_init - setup driver info for CP scratch regs
3374 * @rdev: radeon_device pointer
3376 * Set up the number and offset of the CP scratch registers.
3377 * NOTE: use of CP scratch registers is a legacy inferface and
3378 * is not used by default on newer asics (r6xx+). On newer asics,
3379 * memory buffers are used for fences rather than scratch regs.
3381 static void cik_scratch_init(struct radeon_device *rdev)
3385 rdev->scratch.num_reg = 7;
3386 rdev->scratch.reg_base = SCRATCH_REG0;
3387 for (i = 0; i < rdev->scratch.num_reg; i++) {
3388 rdev->scratch.free[i] = true;
3389 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3394 * cik_ring_test - basic gfx ring test
3396 * @rdev: radeon_device pointer
3397 * @ring: radeon_ring structure holding ring information
3399 * Allocate a scratch register and write to it using the gfx ring (CIK).
3400 * Provides a basic gfx ring test to verify that the ring is working.
3401 * Used by cik_cp_gfx_resume();
3402 * Returns 0 on success, error on failure.
3404 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3411 r = radeon_scratch_get(rdev, &scratch);
3413 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3416 WREG32(scratch, 0xCAFEDEAD);
3417 r = radeon_ring_lock(rdev, ring, 3);
3419 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3420 radeon_scratch_free(rdev, scratch);
3423 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3424 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3425 radeon_ring_write(ring, 0xDEADBEEF);
3426 radeon_ring_unlock_commit(rdev, ring, false);
3428 for (i = 0; i < rdev->usec_timeout; i++) {
3429 tmp = RREG32(scratch);
3430 if (tmp == 0xDEADBEEF)
3434 if (i < rdev->usec_timeout) {
3435 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3437 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3438 ring->idx, scratch, tmp);
3441 radeon_scratch_free(rdev, scratch);
3446 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3448 * @rdev: radeon_device pointer
3449 * @ridx: radeon ring index
3451 * Emits an hdp flush on the cp.
3453 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3456 struct radeon_ring *ring = &rdev->ring[ridx];
3459 switch (ring->idx) {
3460 case CAYMAN_RING_TYPE_CP1_INDEX:
3461 case CAYMAN_RING_TYPE_CP2_INDEX:
3465 ref_and_mask = CP2 << ring->pipe;
3468 ref_and_mask = CP6 << ring->pipe;
3474 case RADEON_RING_TYPE_GFX_INDEX:
3479 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3480 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3481 WAIT_REG_MEM_FUNCTION(3) | /* == */
3482 WAIT_REG_MEM_ENGINE(1))); /* pfp */
3483 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3484 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3485 radeon_ring_write(ring, ref_and_mask);
3486 radeon_ring_write(ring, ref_and_mask);
3487 radeon_ring_write(ring, 0x20); /* poll interval */
3491 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3493 * @rdev: radeon_device pointer
3494 * @fence: radeon fence object
3496 * Emits a fence sequnce number on the gfx ring and flushes
3499 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3500 struct radeon_fence *fence)
3502 struct radeon_ring *ring = &rdev->ring[fence->ring];
3503 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3505 /* Workaround for cache flush problems. First send a dummy EOP
3506 * event down the pipe with seq one below.
3508 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3509 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3511 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3513 radeon_ring_write(ring, addr & 0xfffffffc);
3514 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3515 DATA_SEL(1) | INT_SEL(0));
3516 radeon_ring_write(ring, fence->seq - 1);
3517 radeon_ring_write(ring, 0);
3519 /* Then send the real EOP event down the pipe. */
3520 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3521 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3523 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3525 radeon_ring_write(ring, addr & 0xfffffffc);
3526 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3527 radeon_ring_write(ring, fence->seq);
3528 radeon_ring_write(ring, 0);
3532 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3534 * @rdev: radeon_device pointer
3535 * @fence: radeon fence object
3537 * Emits a fence sequnce number on the compute ring and flushes
3540 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3541 struct radeon_fence *fence)
3543 struct radeon_ring *ring = &rdev->ring[fence->ring];
3544 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3546 /* RELEASE_MEM - flush caches, send int */
3547 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3548 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3550 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3552 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3553 radeon_ring_write(ring, addr & 0xfffffffc);
3554 radeon_ring_write(ring, upper_32_bits(addr));
3555 radeon_ring_write(ring, fence->seq);
3556 radeon_ring_write(ring, 0);
3560 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3562 * @rdev: radeon_device pointer
3563 * @ring: radeon ring buffer object
3564 * @semaphore: radeon semaphore object
3565 * @emit_wait: Is this a sempahore wait?
3567 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3568 * from running ahead of semaphore waits.
3570 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3571 struct radeon_ring *ring,
3572 struct radeon_semaphore *semaphore,
3575 uint64_t addr = semaphore->gpu_addr;
3576 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3578 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3579 radeon_ring_write(ring, lower_32_bits(addr));
3580 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3582 if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3583 /* Prevent the PFP from running ahead of the semaphore wait */
3584 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3585 radeon_ring_write(ring, 0x0);
3592 * cik_copy_cpdma - copy pages using the CP DMA engine
3594 * @rdev: radeon_device pointer
3595 * @src_offset: src GPU address
3596 * @dst_offset: dst GPU address
3597 * @num_gpu_pages: number of GPU pages to xfer
3598 * @resv: reservation object to sync to
3600 * Copy GPU paging using the CP DMA engine (CIK+).
3601 * Used by the radeon ttm implementation to move pages if
3602 * registered as the asic copy callback.
3604 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3605 uint64_t src_offset, uint64_t dst_offset,
3606 unsigned num_gpu_pages,
3607 struct reservation_object *resv)
3609 struct radeon_fence *fence;
3610 struct radeon_sync sync;
3611 int ring_index = rdev->asic->copy.blit_ring_index;
3612 struct radeon_ring *ring = &rdev->ring[ring_index];
3613 u32 size_in_bytes, cur_size_in_bytes, control;
3617 radeon_sync_create(&sync);
3619 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3620 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3621 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3623 DRM_ERROR("radeon: moving bo (%d).\n", r);
3624 radeon_sync_free(rdev, &sync, NULL);
3628 radeon_sync_resv(rdev, &sync, resv, false);
3629 radeon_sync_rings(rdev, &sync, ring->idx);
3631 for (i = 0; i < num_loops; i++) {
3632 cur_size_in_bytes = size_in_bytes;
3633 if (cur_size_in_bytes > 0x1fffff)
3634 cur_size_in_bytes = 0x1fffff;
3635 size_in_bytes -= cur_size_in_bytes;
3637 if (size_in_bytes == 0)
3638 control |= PACKET3_DMA_DATA_CP_SYNC;
3639 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3640 radeon_ring_write(ring, control);
3641 radeon_ring_write(ring, lower_32_bits(src_offset));
3642 radeon_ring_write(ring, upper_32_bits(src_offset));
3643 radeon_ring_write(ring, lower_32_bits(dst_offset));
3644 radeon_ring_write(ring, upper_32_bits(dst_offset));
3645 radeon_ring_write(ring, cur_size_in_bytes);
3646 src_offset += cur_size_in_bytes;
3647 dst_offset += cur_size_in_bytes;
3650 r = radeon_fence_emit(rdev, &fence, ring->idx);
3652 radeon_ring_unlock_undo(rdev, ring);
3653 radeon_sync_free(rdev, &sync, NULL);
3657 radeon_ring_unlock_commit(rdev, ring, false);
3658 radeon_sync_free(rdev, &sync, fence);
3667 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3669 * @rdev: radeon_device pointer
3670 * @ib: radeon indirect buffer object
3672 * Emits a DE (drawing engine) or CE (constant engine) IB
3673 * on the gfx ring. IBs are usually generated by userspace
3674 * acceleration drivers and submitted to the kernel for
3675 * scheduling on the ring. This function schedules the IB
3676 * on the gfx ring for execution by the GPU.
3678 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3680 struct radeon_ring *ring = &rdev->ring[ib->ring];
3681 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3682 u32 header, control = INDIRECT_BUFFER_VALID;
3684 if (ib->is_const_ib) {
3685 /* set switch buffer packet before const IB */
3686 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3687 radeon_ring_write(ring, 0);
3689 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3692 if (ring->rptr_save_reg) {
3693 next_rptr = ring->wptr + 3 + 4;
3694 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3695 radeon_ring_write(ring, ((ring->rptr_save_reg -
3696 PACKET3_SET_UCONFIG_REG_START) >> 2));
3697 radeon_ring_write(ring, next_rptr);
3698 } else if (rdev->wb.enabled) {
3699 next_rptr = ring->wptr + 5 + 4;
3700 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3701 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3702 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3703 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3704 radeon_ring_write(ring, next_rptr);
3707 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3710 control |= ib->length_dw | (vm_id << 24);
3712 radeon_ring_write(ring, header);
3713 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3714 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3715 radeon_ring_write(ring, control);
3719 * cik_ib_test - basic gfx ring IB test
3721 * @rdev: radeon_device pointer
3722 * @ring: radeon_ring structure holding ring information
3724 * Allocate an IB and execute it on the gfx ring (CIK).
3725 * Provides a basic gfx ring test to verify that IBs are working.
3726 * Returns 0 on success, error on failure.
3728 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3730 struct radeon_ib ib;
3736 r = radeon_scratch_get(rdev, &scratch);
3738 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3741 WREG32(scratch, 0xCAFEDEAD);
3742 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3744 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3745 radeon_scratch_free(rdev, scratch);
3748 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3749 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3750 ib.ptr[2] = 0xDEADBEEF;
3752 r = radeon_ib_schedule(rdev, &ib, NULL, false);
3754 radeon_scratch_free(rdev, scratch);
3755 radeon_ib_free(rdev, &ib);
3756 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3759 r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3760 RADEON_USEC_IB_TEST_TIMEOUT));
3762 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3763 radeon_scratch_free(rdev, scratch);
3764 radeon_ib_free(rdev, &ib);
3766 } else if (r == 0) {
3767 DRM_ERROR("radeon: fence wait timed out.\n");
3768 radeon_scratch_free(rdev, scratch);
3769 radeon_ib_free(rdev, &ib);
3773 for (i = 0; i < rdev->usec_timeout; i++) {
3774 tmp = RREG32(scratch);
3775 if (tmp == 0xDEADBEEF)
3779 if (i < rdev->usec_timeout) {
3780 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3782 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3786 radeon_scratch_free(rdev, scratch);
3787 radeon_ib_free(rdev, &ib);
3793 * On CIK, gfx and compute now have independant command processors.
3796 * Gfx consists of a single ring and can process both gfx jobs and
3797 * compute jobs. The gfx CP consists of three microengines (ME):
3798 * PFP - Pre-Fetch Parser
3800 * CE - Constant Engine
3801 * The PFP and ME make up what is considered the Drawing Engine (DE).
3802 * The CE is an asynchronous engine used for updating buffer desciptors
3803 * used by the DE so that they can be loaded into cache in parallel
3804 * while the DE is processing state update packets.
3807 * The compute CP consists of two microengines (ME):
3808 * MEC1 - Compute MicroEngine 1
3809 * MEC2 - Compute MicroEngine 2
3810 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3811 * The queues are exposed to userspace and are programmed directly
3812 * by the compute runtime.
3815 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3817 * @rdev: radeon_device pointer
3818 * @enable: enable or disable the MEs
3820 * Halts or unhalts the gfx MEs.
3822 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3825 WREG32(CP_ME_CNTL, 0);
3827 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3828 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3829 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3830 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3836 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3838 * @rdev: radeon_device pointer
3840 * Loads the gfx PFP, ME, and CE ucode.
3841 * Returns 0 for success, -EINVAL if the ucode is not available.
3843 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3847 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3850 cik_cp_gfx_enable(rdev, false);
3853 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3854 (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3855 const struct gfx_firmware_header_v1_0 *ce_hdr =
3856 (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3857 const struct gfx_firmware_header_v1_0 *me_hdr =
3858 (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3859 const __le32 *fw_data;
3862 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3863 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3864 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3867 fw_data = (const __le32 *)
3868 (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3869 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3870 WREG32(CP_PFP_UCODE_ADDR, 0);
3871 for (i = 0; i < fw_size; i++)
3872 WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3873 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3876 fw_data = (const __le32 *)
3877 (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3878 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3879 WREG32(CP_CE_UCODE_ADDR, 0);
3880 for (i = 0; i < fw_size; i++)
3881 WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3882 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3885 fw_data = (const __be32 *)
3886 (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3887 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3888 WREG32(CP_ME_RAM_WADDR, 0);
3889 for (i = 0; i < fw_size; i++)
3890 WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3891 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3892 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3894 const __be32 *fw_data;
3897 fw_data = (const __be32 *)rdev->pfp_fw->data;
3898 WREG32(CP_PFP_UCODE_ADDR, 0);
3899 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3900 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3901 WREG32(CP_PFP_UCODE_ADDR, 0);
3904 fw_data = (const __be32 *)rdev->ce_fw->data;
3905 WREG32(CP_CE_UCODE_ADDR, 0);
3906 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3907 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3908 WREG32(CP_CE_UCODE_ADDR, 0);
3911 fw_data = (const __be32 *)rdev->me_fw->data;
3912 WREG32(CP_ME_RAM_WADDR, 0);
3913 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3914 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3915 WREG32(CP_ME_RAM_WADDR, 0);
3922 * cik_cp_gfx_start - start the gfx ring
3924 * @rdev: radeon_device pointer
3926 * Enables the ring and loads the clear state context and other
3927 * packets required to init the ring.
3928 * Returns 0 for success, error for failure.
3930 static int cik_cp_gfx_start(struct radeon_device *rdev)
3932 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3936 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3937 WREG32(CP_ENDIAN_SWAP, 0);
3938 WREG32(CP_DEVICE_ID, 1);
3940 cik_cp_gfx_enable(rdev, true);
3942 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3944 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3948 /* init the CE partitions. CE only used for gfx on CIK */
3949 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3950 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3951 radeon_ring_write(ring, 0x8000);
3952 radeon_ring_write(ring, 0x8000);
3954 /* setup clear context state */
3955 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3956 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3958 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3959 radeon_ring_write(ring, 0x80000000);
3960 radeon_ring_write(ring, 0x80000000);
3962 for (i = 0; i < cik_default_size; i++)
3963 radeon_ring_write(ring, cik_default_state[i]);
3965 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3966 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3968 /* set clear context state */
3969 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3970 radeon_ring_write(ring, 0);
3972 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3973 radeon_ring_write(ring, 0x00000316);
3974 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3975 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3977 radeon_ring_unlock_commit(rdev, ring, false);
3983 * cik_cp_gfx_fini - stop the gfx ring
3985 * @rdev: radeon_device pointer
3987 * Stop the gfx ring and tear down the driver ring
3990 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3992 cik_cp_gfx_enable(rdev, false);
3993 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3997 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3999 * @rdev: radeon_device pointer
4001 * Program the location and size of the gfx ring buffer
4002 * and test it to make sure it's working.
4003 * Returns 0 for success, error for failure.
4005 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4007 struct radeon_ring *ring;
4013 WREG32(CP_SEM_WAIT_TIMER, 0x0);
4014 if (rdev->family != CHIP_HAWAII)
4015 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4017 /* Set the write pointer delay */
4018 WREG32(CP_RB_WPTR_DELAY, 0);
4020 /* set the RB to use vmid 0 */
4021 WREG32(CP_RB_VMID, 0);
4023 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4025 /* ring 0 - compute and gfx */
4026 /* Set ring buffer size */
4027 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4028 rb_bufsz = order_base_2(ring->ring_size / 8);
4029 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4031 tmp |= BUF_SWAP_32BIT;
4033 WREG32(CP_RB0_CNTL, tmp);
4035 /* Initialize the ring buffer's read and write pointers */
4036 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4038 WREG32(CP_RB0_WPTR, ring->wptr);
4040 /* set the wb address wether it's enabled or not */
4041 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4042 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4044 /* scratch register shadowing is no longer supported */
4045 WREG32(SCRATCH_UMSK, 0);
4047 if (!rdev->wb.enabled)
4048 tmp |= RB_NO_UPDATE;
4051 WREG32(CP_RB0_CNTL, tmp);
4053 rb_addr = ring->gpu_addr >> 8;
4054 WREG32(CP_RB0_BASE, rb_addr);
4055 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4057 /* start the ring */
4058 cik_cp_gfx_start(rdev);
4059 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4060 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4062 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4066 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4067 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4072 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4073 struct radeon_ring *ring)
4077 if (rdev->wb.enabled)
4078 rptr = rdev->wb.wb[ring->rptr_offs/4];
4080 rptr = RREG32(CP_RB0_RPTR);
4085 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4086 struct radeon_ring *ring)
4088 return RREG32(CP_RB0_WPTR);
4091 void cik_gfx_set_wptr(struct radeon_device *rdev,
4092 struct radeon_ring *ring)
4094 WREG32(CP_RB0_WPTR, ring->wptr);
4095 (void)RREG32(CP_RB0_WPTR);
4098 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4099 struct radeon_ring *ring)
4103 if (rdev->wb.enabled) {
4104 rptr = rdev->wb.wb[ring->rptr_offs/4];
4106 mutex_lock(&rdev->srbm_mutex);
4107 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4108 rptr = RREG32(CP_HQD_PQ_RPTR);
4109 cik_srbm_select(rdev, 0, 0, 0, 0);
4110 mutex_unlock(&rdev->srbm_mutex);
4116 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4117 struct radeon_ring *ring)
4121 if (rdev->wb.enabled) {
4122 /* XXX check if swapping is necessary on BE */
4123 wptr = rdev->wb.wb[ring->wptr_offs/4];
4125 mutex_lock(&rdev->srbm_mutex);
4126 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4127 wptr = RREG32(CP_HQD_PQ_WPTR);
4128 cik_srbm_select(rdev, 0, 0, 0, 0);
4129 mutex_unlock(&rdev->srbm_mutex);
4135 void cik_compute_set_wptr(struct radeon_device *rdev,
4136 struct radeon_ring *ring)
4138 /* XXX check if swapping is necessary on BE */
4139 rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4140 WDOORBELL32(ring->doorbell_index, ring->wptr);
4143 static void cik_compute_stop(struct radeon_device *rdev,
4144 struct radeon_ring *ring)
4148 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4149 /* Disable wptr polling. */
4150 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4151 tmp &= ~WPTR_POLL_EN;
4152 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4154 if (RREG32(CP_HQD_ACTIVE) & 1) {
4155 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4156 for (j = 0; j < rdev->usec_timeout; j++) {
4157 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4161 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4162 WREG32(CP_HQD_PQ_RPTR, 0);
4163 WREG32(CP_HQD_PQ_WPTR, 0);
4165 cik_srbm_select(rdev, 0, 0, 0, 0);
4169 * cik_cp_compute_enable - enable/disable the compute CP MEs
4171 * @rdev: radeon_device pointer
4172 * @enable: enable or disable the MEs
4174 * Halts or unhalts the compute MEs.
4176 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4179 WREG32(CP_MEC_CNTL, 0);
4182 * To make hibernation reliable we need to clear compute ring
4183 * configuration before halting the compute ring.
4185 mutex_lock(&rdev->srbm_mutex);
4186 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4187 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4188 mutex_unlock(&rdev->srbm_mutex);
4190 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4191 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4192 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4198 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4200 * @rdev: radeon_device pointer
4202 * Loads the compute MEC1&2 ucode.
4203 * Returns 0 for success, -EINVAL if the ucode is not available.
4205 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4212 cik_cp_compute_enable(rdev, false);
4215 const struct gfx_firmware_header_v1_0 *mec_hdr =
4216 (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4217 const __le32 *fw_data;
4220 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4223 fw_data = (const __le32 *)
4224 (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4225 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4226 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4227 for (i = 0; i < fw_size; i++)
4228 WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4229 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4232 if (rdev->family == CHIP_KAVERI) {
4233 const struct gfx_firmware_header_v1_0 *mec2_hdr =
4234 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4236 fw_data = (const __le32 *)
4237 (rdev->mec2_fw->data +
4238 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4239 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4240 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4241 for (i = 0; i < fw_size; i++)
4242 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4243 WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4246 const __be32 *fw_data;
4249 fw_data = (const __be32 *)rdev->mec_fw->data;
4250 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4251 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4252 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4253 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4255 if (rdev->family == CHIP_KAVERI) {
4257 fw_data = (const __be32 *)rdev->mec_fw->data;
4258 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4259 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4260 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4261 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4269 * cik_cp_compute_start - start the compute queues
4271 * @rdev: radeon_device pointer
4273 * Enable the compute queues.
4274 * Returns 0 for success, error for failure.
4276 static int cik_cp_compute_start(struct radeon_device *rdev)
4278 cik_cp_compute_enable(rdev, true);
4284 * cik_cp_compute_fini - stop the compute queues
4286 * @rdev: radeon_device pointer
4288 * Stop the compute queues and tear down the driver queue
4291 static void cik_cp_compute_fini(struct radeon_device *rdev)
4295 cik_cp_compute_enable(rdev, false);
4297 for (i = 0; i < 2; i++) {
4299 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4301 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4303 if (rdev->ring[idx].mqd_obj) {
4304 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4305 if (unlikely(r != 0))
4306 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4308 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4309 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4311 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4312 rdev->ring[idx].mqd_obj = NULL;
4317 static void cik_mec_fini(struct radeon_device *rdev)
4321 if (rdev->mec.hpd_eop_obj) {
4322 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4323 if (unlikely(r != 0))
4324 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4325 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4326 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4328 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4329 rdev->mec.hpd_eop_obj = NULL;
4333 #define MEC_HPD_SIZE 2048
4335 static int cik_mec_init(struct radeon_device *rdev)
4341 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4342 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4343 * Nonetheless, we assign only 1 pipe because all other pipes will
4346 rdev->mec.num_mec = 1;
4347 rdev->mec.num_pipe = 1;
4348 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4350 if (rdev->mec.hpd_eop_obj == NULL) {
4351 r = radeon_bo_create(rdev,
4352 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4354 RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4355 &rdev->mec.hpd_eop_obj);
4357 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4362 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4363 if (unlikely(r != 0)) {
4367 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4368 &rdev->mec.hpd_eop_gpu_addr);
4370 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4374 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4376 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4381 /* clear memory. Not sure if this is required or not */
4382 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4384 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4385 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4390 struct hqd_registers
4392 u32 cp_mqd_base_addr;
4393 u32 cp_mqd_base_addr_hi;
4396 u32 cp_hqd_persistent_state;
4397 u32 cp_hqd_pipe_priority;
4398 u32 cp_hqd_queue_priority;
4401 u32 cp_hqd_pq_base_hi;
4403 u32 cp_hqd_pq_rptr_report_addr;
4404 u32 cp_hqd_pq_rptr_report_addr_hi;
4405 u32 cp_hqd_pq_wptr_poll_addr;
4406 u32 cp_hqd_pq_wptr_poll_addr_hi;
4407 u32 cp_hqd_pq_doorbell_control;
4409 u32 cp_hqd_pq_control;
4410 u32 cp_hqd_ib_base_addr;
4411 u32 cp_hqd_ib_base_addr_hi;
4413 u32 cp_hqd_ib_control;
4414 u32 cp_hqd_iq_timer;
4416 u32 cp_hqd_dequeue_request;
4417 u32 cp_hqd_dma_offload;
4418 u32 cp_hqd_sema_cmd;
4419 u32 cp_hqd_msg_type;
4420 u32 cp_hqd_atomic0_preop_lo;
4421 u32 cp_hqd_atomic0_preop_hi;
4422 u32 cp_hqd_atomic1_preop_lo;
4423 u32 cp_hqd_atomic1_preop_hi;
4424 u32 cp_hqd_hq_scheduler0;
4425 u32 cp_hqd_hq_scheduler1;
4432 u32 dispatch_initiator;
4436 u32 pipeline_stat_enable;
4437 u32 perf_counter_enable;
4443 u32 resource_limits;
4444 u32 static_thread_mgmt01[2];
4446 u32 static_thread_mgmt23[2];
4448 u32 thread_trace_enable;
4451 u32 vgtcs_invoke_count[2];
4452 struct hqd_registers queue_state;
4454 u32 interrupt_queue[64];
4458 * cik_cp_compute_resume - setup the compute queue registers
4460 * @rdev: radeon_device pointer
4462 * Program the compute queues and test them to make sure they
4464 * Returns 0 for success, error for failure.
4466 static int cik_cp_compute_resume(struct radeon_device *rdev)
4470 bool use_doorbell = true;
4476 struct bonaire_mqd *mqd;
4478 r = cik_cp_compute_start(rdev);
4482 /* fix up chicken bits */
4483 tmp = RREG32(CP_CPF_DEBUG);
4485 WREG32(CP_CPF_DEBUG, tmp);
4487 /* init the pipes */
4488 mutex_lock(&rdev->srbm_mutex);
4490 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4492 cik_srbm_select(rdev, 0, 0, 0, 0);
4494 /* write the EOP addr */
4495 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4496 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4498 /* set the VMID assigned */
4499 WREG32(CP_HPD_EOP_VMID, 0);
4501 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4502 tmp = RREG32(CP_HPD_EOP_CONTROL);
4503 tmp &= ~EOP_SIZE_MASK;
4504 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4505 WREG32(CP_HPD_EOP_CONTROL, tmp);
4507 mutex_unlock(&rdev->srbm_mutex);
4509 /* init the queues. Just two for now. */
4510 for (i = 0; i < 2; i++) {
4512 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4514 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4516 if (rdev->ring[idx].mqd_obj == NULL) {
4517 r = radeon_bo_create(rdev,
4518 sizeof(struct bonaire_mqd),
4520 RADEON_GEM_DOMAIN_GTT, 0, NULL,
4521 NULL, &rdev->ring[idx].mqd_obj);
4523 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4528 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4529 if (unlikely(r != 0)) {
4530 cik_cp_compute_fini(rdev);
4533 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4536 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4537 cik_cp_compute_fini(rdev);
4540 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4542 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4543 cik_cp_compute_fini(rdev);
4547 /* init the mqd struct */
4548 memset(buf, 0, sizeof(struct bonaire_mqd));
4550 mqd = (struct bonaire_mqd *)buf;
4551 mqd->header = 0xC0310800;
4552 mqd->static_thread_mgmt01[0] = 0xffffffff;
4553 mqd->static_thread_mgmt01[1] = 0xffffffff;
4554 mqd->static_thread_mgmt23[0] = 0xffffffff;
4555 mqd->static_thread_mgmt23[1] = 0xffffffff;
4557 mutex_lock(&rdev->srbm_mutex);
4558 cik_srbm_select(rdev, rdev->ring[idx].me,
4559 rdev->ring[idx].pipe,
4560 rdev->ring[idx].queue, 0);
4562 /* disable wptr polling */
4563 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4564 tmp &= ~WPTR_POLL_EN;
4565 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4567 /* enable doorbell? */
4568 mqd->queue_state.cp_hqd_pq_doorbell_control =
4569 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4571 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4573 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4574 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4575 mqd->queue_state.cp_hqd_pq_doorbell_control);
4577 /* disable the queue if it's active */
4578 mqd->queue_state.cp_hqd_dequeue_request = 0;
4579 mqd->queue_state.cp_hqd_pq_rptr = 0;
4580 mqd->queue_state.cp_hqd_pq_wptr= 0;
4581 if (RREG32(CP_HQD_ACTIVE) & 1) {
4582 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4583 for (j = 0; j < rdev->usec_timeout; j++) {
4584 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4588 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4589 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4590 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4593 /* set the pointer to the MQD */
4594 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4595 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4596 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4597 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4598 /* set MQD vmid to 0 */
4599 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4600 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4601 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4603 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4604 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4605 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4606 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4607 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4608 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4610 /* set up the HQD, this is similar to CP_RB0_CNTL */
4611 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4612 mqd->queue_state.cp_hqd_pq_control &=
4613 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4615 mqd->queue_state.cp_hqd_pq_control |=
4616 order_base_2(rdev->ring[idx].ring_size / 8);
4617 mqd->queue_state.cp_hqd_pq_control |=
4618 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4620 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4622 mqd->queue_state.cp_hqd_pq_control &=
4623 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4624 mqd->queue_state.cp_hqd_pq_control |=
4625 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4626 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4628 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4630 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4632 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4633 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4634 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4635 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4636 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4637 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4639 /* set the wb address wether it's enabled or not */
4641 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4643 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4644 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4645 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4646 upper_32_bits(wb_gpu_addr) & 0xffff;
4647 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4648 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4649 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4650 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4652 /* enable the doorbell if requested */
4654 mqd->queue_state.cp_hqd_pq_doorbell_control =
4655 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4656 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4657 mqd->queue_state.cp_hqd_pq_doorbell_control |=
4658 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4659 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4660 mqd->queue_state.cp_hqd_pq_doorbell_control &=
4661 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4664 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4666 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4667 mqd->queue_state.cp_hqd_pq_doorbell_control);
4669 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4670 rdev->ring[idx].wptr = 0;
4671 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4672 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4673 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4675 /* set the vmid for the queue */
4676 mqd->queue_state.cp_hqd_vmid = 0;
4677 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4679 /* activate the queue */
4680 mqd->queue_state.cp_hqd_active = 1;
4681 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4683 cik_srbm_select(rdev, 0, 0, 0, 0);
4684 mutex_unlock(&rdev->srbm_mutex);
4686 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4687 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4689 rdev->ring[idx].ready = true;
4690 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4692 rdev->ring[idx].ready = false;
4698 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4700 cik_cp_gfx_enable(rdev, enable);
4701 cik_cp_compute_enable(rdev, enable);
4704 static int cik_cp_load_microcode(struct radeon_device *rdev)
4708 r = cik_cp_gfx_load_microcode(rdev);
4711 r = cik_cp_compute_load_microcode(rdev);
4718 static void cik_cp_fini(struct radeon_device *rdev)
4720 cik_cp_gfx_fini(rdev);
4721 cik_cp_compute_fini(rdev);
4724 static int cik_cp_resume(struct radeon_device *rdev)
4728 cik_enable_gui_idle_interrupt(rdev, false);
4730 r = cik_cp_load_microcode(rdev);
4734 r = cik_cp_gfx_resume(rdev);
4737 r = cik_cp_compute_resume(rdev);
4741 cik_enable_gui_idle_interrupt(rdev, true);
4746 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4748 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4749 RREG32(GRBM_STATUS));
4750 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4751 RREG32(GRBM_STATUS2));
4752 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4753 RREG32(GRBM_STATUS_SE0));
4754 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4755 RREG32(GRBM_STATUS_SE1));
4756 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4757 RREG32(GRBM_STATUS_SE2));
4758 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4759 RREG32(GRBM_STATUS_SE3));
4760 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4761 RREG32(SRBM_STATUS));
4762 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4763 RREG32(SRBM_STATUS2));
4764 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4765 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4766 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4767 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4768 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4769 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4770 RREG32(CP_STALLED_STAT1));
4771 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4772 RREG32(CP_STALLED_STAT2));
4773 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4774 RREG32(CP_STALLED_STAT3));
4775 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4776 RREG32(CP_CPF_BUSY_STAT));
4777 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4778 RREG32(CP_CPF_STALLED_STAT1));
4779 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4780 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4781 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4782 RREG32(CP_CPC_STALLED_STAT1));
4783 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4787 * cik_gpu_check_soft_reset - check which blocks are busy
4789 * @rdev: radeon_device pointer
4791 * Check which blocks are busy and return the relevant reset
4792 * mask to be used by cik_gpu_soft_reset().
4793 * Returns a mask of the blocks to be reset.
4795 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4801 tmp = RREG32(GRBM_STATUS);
4802 if (tmp & (PA_BUSY | SC_BUSY |
4803 BCI_BUSY | SX_BUSY |
4804 TA_BUSY | VGT_BUSY |
4806 GDS_BUSY | SPI_BUSY |
4807 IA_BUSY | IA_BUSY_NO_DMA))
4808 reset_mask |= RADEON_RESET_GFX;
4810 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4811 reset_mask |= RADEON_RESET_CP;
4814 tmp = RREG32(GRBM_STATUS2);
4816 reset_mask |= RADEON_RESET_RLC;
4818 /* SDMA0_STATUS_REG */
4819 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4820 if (!(tmp & SDMA_IDLE))
4821 reset_mask |= RADEON_RESET_DMA;
4823 /* SDMA1_STATUS_REG */
4824 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4825 if (!(tmp & SDMA_IDLE))
4826 reset_mask |= RADEON_RESET_DMA1;
4829 tmp = RREG32(SRBM_STATUS2);
4830 if (tmp & SDMA_BUSY)
4831 reset_mask |= RADEON_RESET_DMA;
4833 if (tmp & SDMA1_BUSY)
4834 reset_mask |= RADEON_RESET_DMA1;
4837 tmp = RREG32(SRBM_STATUS);
4840 reset_mask |= RADEON_RESET_IH;
4843 reset_mask |= RADEON_RESET_SEM;
4845 if (tmp & GRBM_RQ_PENDING)
4846 reset_mask |= RADEON_RESET_GRBM;
4849 reset_mask |= RADEON_RESET_VMC;
4851 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4852 MCC_BUSY | MCD_BUSY))
4853 reset_mask |= RADEON_RESET_MC;
4855 if (evergreen_is_display_hung(rdev))
4856 reset_mask |= RADEON_RESET_DISPLAY;
4858 /* Skip MC reset as it's mostly likely not hung, just busy */
4859 if (reset_mask & RADEON_RESET_MC) {
4860 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4861 reset_mask &= ~RADEON_RESET_MC;
4868 * cik_gpu_soft_reset - soft reset GPU
4870 * @rdev: radeon_device pointer
4871 * @reset_mask: mask of which blocks to reset
4873 * Soft reset the blocks specified in @reset_mask.
4875 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4877 struct evergreen_mc_save save;
4878 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4881 if (reset_mask == 0)
4884 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4886 cik_print_gpu_status_regs(rdev);
4887 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4888 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4889 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4890 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4899 /* Disable GFX parsing/prefetching */
4900 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4902 /* Disable MEC parsing/prefetching */
4903 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4905 if (reset_mask & RADEON_RESET_DMA) {
4907 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4909 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4911 if (reset_mask & RADEON_RESET_DMA1) {
4913 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4915 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4918 evergreen_mc_stop(rdev, &save);
4919 if (evergreen_mc_wait_for_idle(rdev)) {
4920 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4923 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4924 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4926 if (reset_mask & RADEON_RESET_CP) {
4927 grbm_soft_reset |= SOFT_RESET_CP;
4929 srbm_soft_reset |= SOFT_RESET_GRBM;
4932 if (reset_mask & RADEON_RESET_DMA)
4933 srbm_soft_reset |= SOFT_RESET_SDMA;
4935 if (reset_mask & RADEON_RESET_DMA1)
4936 srbm_soft_reset |= SOFT_RESET_SDMA1;
4938 if (reset_mask & RADEON_RESET_DISPLAY)
4939 srbm_soft_reset |= SOFT_RESET_DC;
4941 if (reset_mask & RADEON_RESET_RLC)
4942 grbm_soft_reset |= SOFT_RESET_RLC;
4944 if (reset_mask & RADEON_RESET_SEM)
4945 srbm_soft_reset |= SOFT_RESET_SEM;
4947 if (reset_mask & RADEON_RESET_IH)
4948 srbm_soft_reset |= SOFT_RESET_IH;
4950 if (reset_mask & RADEON_RESET_GRBM)
4951 srbm_soft_reset |= SOFT_RESET_GRBM;
4953 if (reset_mask & RADEON_RESET_VMC)
4954 srbm_soft_reset |= SOFT_RESET_VMC;
4956 if (!(rdev->flags & RADEON_IS_IGP)) {
4957 if (reset_mask & RADEON_RESET_MC)
4958 srbm_soft_reset |= SOFT_RESET_MC;
4961 if (grbm_soft_reset) {
4962 tmp = RREG32(GRBM_SOFT_RESET);
4963 tmp |= grbm_soft_reset;
4964 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4965 WREG32(GRBM_SOFT_RESET, tmp);
4966 tmp = RREG32(GRBM_SOFT_RESET);
4970 tmp &= ~grbm_soft_reset;
4971 WREG32(GRBM_SOFT_RESET, tmp);
4972 tmp = RREG32(GRBM_SOFT_RESET);
4975 if (srbm_soft_reset) {
4976 tmp = RREG32(SRBM_SOFT_RESET);
4977 tmp |= srbm_soft_reset;
4978 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4979 WREG32(SRBM_SOFT_RESET, tmp);
4980 tmp = RREG32(SRBM_SOFT_RESET);
4984 tmp &= ~srbm_soft_reset;
4985 WREG32(SRBM_SOFT_RESET, tmp);
4986 tmp = RREG32(SRBM_SOFT_RESET);
4989 /* Wait a little for things to settle down */
4992 evergreen_mc_resume(rdev, &save);
4995 cik_print_gpu_status_regs(rdev);
4998 struct kv_reset_save_regs {
4999 u32 gmcon_reng_execute;
5004 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5005 struct kv_reset_save_regs *save)
5007 save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5008 save->gmcon_misc = RREG32(GMCON_MISC);
5009 save->gmcon_misc3 = RREG32(GMCON_MISC3);
5011 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5012 WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5013 STCTRL_STUTTER_EN));
5016 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5017 struct kv_reset_save_regs *save)
5021 WREG32(GMCON_PGFSM_WRITE, 0);
5022 WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5024 for (i = 0; i < 5; i++)
5025 WREG32(GMCON_PGFSM_WRITE, 0);
5027 WREG32(GMCON_PGFSM_WRITE, 0);
5028 WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5030 for (i = 0; i < 5; i++)
5031 WREG32(GMCON_PGFSM_WRITE, 0);
5033 WREG32(GMCON_PGFSM_WRITE, 0x210000);
5034 WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5036 for (i = 0; i < 5; i++)
5037 WREG32(GMCON_PGFSM_WRITE, 0);
5039 WREG32(GMCON_PGFSM_WRITE, 0x21003);
5040 WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5042 for (i = 0; i < 5; i++)
5043 WREG32(GMCON_PGFSM_WRITE, 0);
5045 WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5046 WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5048 for (i = 0; i < 5; i++)
5049 WREG32(GMCON_PGFSM_WRITE, 0);
5051 WREG32(GMCON_PGFSM_WRITE, 0);
5052 WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5054 for (i = 0; i < 5; i++)
5055 WREG32(GMCON_PGFSM_WRITE, 0);
5057 WREG32(GMCON_PGFSM_WRITE, 0x420000);
5058 WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5060 for (i = 0; i < 5; i++)
5061 WREG32(GMCON_PGFSM_WRITE, 0);
5063 WREG32(GMCON_PGFSM_WRITE, 0x120202);
5064 WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5066 for (i = 0; i < 5; i++)
5067 WREG32(GMCON_PGFSM_WRITE, 0);
5069 WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5070 WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5072 for (i = 0; i < 5; i++)
5073 WREG32(GMCON_PGFSM_WRITE, 0);
5075 WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5076 WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5078 for (i = 0; i < 5; i++)
5079 WREG32(GMCON_PGFSM_WRITE, 0);
5081 WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5082 WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5084 WREG32(GMCON_MISC3, save->gmcon_misc3);
5085 WREG32(GMCON_MISC, save->gmcon_misc);
5086 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5089 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5091 struct evergreen_mc_save save;
5092 struct kv_reset_save_regs kv_save = { 0 };
5095 dev_info(rdev->dev, "GPU pci config reset\n");
5103 /* Disable GFX parsing/prefetching */
5104 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5106 /* Disable MEC parsing/prefetching */
5107 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5110 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5112 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5114 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5116 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5117 /* XXX other engines? */
5119 /* halt the rlc, disable cp internal ints */
5124 /* disable mem access */
5125 evergreen_mc_stop(rdev, &save);
5126 if (evergreen_mc_wait_for_idle(rdev)) {
5127 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5130 if (rdev->flags & RADEON_IS_IGP)
5131 kv_save_regs_for_reset(rdev, &kv_save);
5134 pci_clear_master(rdev->pdev);
5136 radeon_pci_config_reset(rdev);
5140 /* wait for asic to come out of reset */
5141 for (i = 0; i < rdev->usec_timeout; i++) {
5142 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5147 /* does asic init need to be run first??? */
5148 if (rdev->flags & RADEON_IS_IGP)
5149 kv_restore_regs_for_reset(rdev, &kv_save);
5153 * cik_asic_reset - soft reset GPU
5155 * @rdev: radeon_device pointer
5156 * @hard: force hard reset
5158 * Look up which blocks are hung and attempt
5160 * Returns 0 for success.
5162 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5167 cik_gpu_pci_config_reset(rdev);
5171 reset_mask = cik_gpu_check_soft_reset(rdev);
5174 r600_set_bios_scratch_engine_hung(rdev, true);
5176 /* try soft reset */
5177 cik_gpu_soft_reset(rdev, reset_mask);
5179 reset_mask = cik_gpu_check_soft_reset(rdev);
5181 /* try pci config reset */
5182 if (reset_mask && radeon_hard_reset)
5183 cik_gpu_pci_config_reset(rdev);
5185 reset_mask = cik_gpu_check_soft_reset(rdev);
5188 r600_set_bios_scratch_engine_hung(rdev, false);
5194 * cik_gfx_is_lockup - check if the 3D engine is locked up
5196 * @rdev: radeon_device pointer
5197 * @ring: radeon_ring structure holding ring information
5199 * Check if the 3D engine is locked up (CIK).
5200 * Returns true if the engine is locked, false if not.
5202 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5204 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5206 if (!(reset_mask & (RADEON_RESET_GFX |
5207 RADEON_RESET_COMPUTE |
5208 RADEON_RESET_CP))) {
5209 radeon_ring_lockup_update(rdev, ring);
5212 return radeon_ring_test_lockup(rdev, ring);
5217 * cik_mc_program - program the GPU memory controller
5219 * @rdev: radeon_device pointer
5221 * Set the location of vram, gart, and AGP in the GPU's
5222 * physical address space (CIK).
5224 static void cik_mc_program(struct radeon_device *rdev)
5226 struct evergreen_mc_save save;
5230 /* Initialize HDP */
5231 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5232 WREG32((0x2c14 + j), 0x00000000);
5233 WREG32((0x2c18 + j), 0x00000000);
5234 WREG32((0x2c1c + j), 0x00000000);
5235 WREG32((0x2c20 + j), 0x00000000);
5236 WREG32((0x2c24 + j), 0x00000000);
5238 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5240 evergreen_mc_stop(rdev, &save);
5241 if (radeon_mc_wait_for_idle(rdev)) {
5242 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5244 /* Lockout access through VGA aperture*/
5245 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5246 /* Update configuration */
5247 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5248 rdev->mc.vram_start >> 12);
5249 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5250 rdev->mc.vram_end >> 12);
5251 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5252 rdev->vram_scratch.gpu_addr >> 12);
5253 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5254 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5255 WREG32(MC_VM_FB_LOCATION, tmp);
5256 /* XXX double check these! */
5257 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5258 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5259 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5260 WREG32(MC_VM_AGP_BASE, 0);
5261 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5262 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5263 if (radeon_mc_wait_for_idle(rdev)) {
5264 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5266 evergreen_mc_resume(rdev, &save);
5267 /* we need to own VRAM, so turn off the VGA renderer here
5268 * to stop it overwriting our objects */
5269 rv515_vga_render_disable(rdev);
5273 * cik_mc_init - initialize the memory controller driver params
5275 * @rdev: radeon_device pointer
5277 * Look up the amount of vram, vram width, and decide how to place
5278 * vram and gart within the GPU's physical address space (CIK).
5279 * Returns 0 for success.
5281 static int cik_mc_init(struct radeon_device *rdev)
5284 int chansize, numchan;
5286 /* Get VRAM informations */
5287 rdev->mc.vram_is_ddr = true;
5288 tmp = RREG32(MC_ARB_RAMCFG);
5289 if (tmp & CHANSIZE_MASK) {
5294 tmp = RREG32(MC_SHARED_CHMAP);
5295 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5325 rdev->mc.vram_width = numchan * chansize;
5326 /* Could aper size report 0 ? */
5327 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5328 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5329 /* size in MB on si */
5330 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5331 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5332 rdev->mc.visible_vram_size = rdev->mc.aper_size;
5333 si_vram_gtt_location(rdev, &rdev->mc);
5334 radeon_update_bandwidth_info(rdev);
5341 * VMID 0 is the physical GPU addresses as used by the kernel.
5342 * VMIDs 1-15 are used for userspace clients and are handled
5343 * by the radeon vm/hsa code.
5346 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5348 * @rdev: radeon_device pointer
5350 * Flush the TLB for the VMID 0 page table (CIK).
5352 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5354 /* flush hdp cache */
5355 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5357 /* bits 0-15 are the VM contexts0-15 */
5358 WREG32(VM_INVALIDATE_REQUEST, 0x1);
5361 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5364 uint32_t sh_mem_bases, sh_mem_config;
5366 sh_mem_bases = 0x6000 | 0x6000 << 16;
5367 sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5368 sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5370 mutex_lock(&rdev->srbm_mutex);
5371 for (i = 8; i < 16; i++) {
5372 cik_srbm_select(rdev, 0, 0, 0, i);
5373 /* CP and shaders */
5374 WREG32(SH_MEM_CONFIG, sh_mem_config);
5375 WREG32(SH_MEM_APE1_BASE, 1);
5376 WREG32(SH_MEM_APE1_LIMIT, 0);
5377 WREG32(SH_MEM_BASES, sh_mem_bases);
5379 cik_srbm_select(rdev, 0, 0, 0, 0);
5380 mutex_unlock(&rdev->srbm_mutex);
5384 * cik_pcie_gart_enable - gart enable
5386 * @rdev: radeon_device pointer
5388 * This sets up the TLBs, programs the page tables for VMID0,
5389 * sets up the hw for VMIDs 1-15 which are allocated on
5390 * demand, and sets up the global locations for the LDS, GDS,
5391 * and GPUVM for FSA64 clients (CIK).
5392 * Returns 0 for success, errors for failure.
5394 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5398 if (rdev->gart.robj == NULL) {
5399 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5402 r = radeon_gart_table_vram_pin(rdev);
5405 /* Setup TLB control */
5406 WREG32(MC_VM_MX_L1_TLB_CNTL,
5409 ENABLE_L1_FRAGMENT_PROCESSING |
5410 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5411 ENABLE_ADVANCED_DRIVER_MODEL |
5412 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5413 /* Setup L2 cache */
5414 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5415 ENABLE_L2_FRAGMENT_PROCESSING |
5416 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5417 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5418 EFFECTIVE_L2_QUEUE_SIZE(7) |
5419 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5420 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5421 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5423 L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5424 /* setup context0 */
5425 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5426 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5427 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5428 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5429 (u32)(rdev->dummy_page.addr >> 12));
5430 WREG32(VM_CONTEXT0_CNTL2, 0);
5431 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5432 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5438 /* restore context1-15 */
5439 /* set vm size, must be a multiple of 4 */
5440 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5441 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5442 for (i = 1; i < 16; i++) {
5444 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5445 rdev->vm_manager.saved_table_addr[i]);
5447 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5448 rdev->vm_manager.saved_table_addr[i]);
5451 /* enable context1-15 */
5452 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5453 (u32)(rdev->dummy_page.addr >> 12));
5454 WREG32(VM_CONTEXT1_CNTL2, 4);
5455 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5456 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5457 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5458 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5459 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5460 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5461 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5462 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5463 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5464 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5465 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5466 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5467 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5468 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5470 if (rdev->family == CHIP_KAVERI) {
5471 u32 tmp = RREG32(CHUB_CONTROL);
5473 WREG32(CHUB_CONTROL, tmp);
5476 /* XXX SH_MEM regs */
5477 /* where to put LDS, scratch, GPUVM in FSA64 space */
5478 mutex_lock(&rdev->srbm_mutex);
5479 for (i = 0; i < 16; i++) {
5480 cik_srbm_select(rdev, 0, 0, 0, i);
5481 /* CP and shaders */
5482 WREG32(SH_MEM_CONFIG, 0);
5483 WREG32(SH_MEM_APE1_BASE, 1);
5484 WREG32(SH_MEM_APE1_LIMIT, 0);
5485 WREG32(SH_MEM_BASES, 0);
5487 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5488 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5489 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5490 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5491 /* XXX SDMA RLC - todo */
5493 cik_srbm_select(rdev, 0, 0, 0, 0);
5494 mutex_unlock(&rdev->srbm_mutex);
5496 cik_pcie_init_compute_vmid(rdev);
5498 cik_pcie_gart_tlb_flush(rdev);
5499 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5500 (unsigned)(rdev->mc.gtt_size >> 20),
5501 (unsigned long long)rdev->gart.table_addr);
5502 rdev->gart.ready = true;
5507 * cik_pcie_gart_disable - gart disable
5509 * @rdev: radeon_device pointer
5511 * This disables all VM page table (CIK).
5513 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5517 for (i = 1; i < 16; ++i) {
5520 reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5522 reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5523 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5526 /* Disable all tables */
5527 WREG32(VM_CONTEXT0_CNTL, 0);
5528 WREG32(VM_CONTEXT1_CNTL, 0);
5529 /* Setup TLB control */
5530 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5531 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5532 /* Setup L2 cache */
5534 ENABLE_L2_FRAGMENT_PROCESSING |
5535 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5536 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5537 EFFECTIVE_L2_QUEUE_SIZE(7) |
5538 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5539 WREG32(VM_L2_CNTL2, 0);
5540 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5541 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5542 radeon_gart_table_vram_unpin(rdev);
5546 * cik_pcie_gart_fini - vm fini callback
5548 * @rdev: radeon_device pointer
5550 * Tears down the driver GART/VM setup (CIK).
5552 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5554 cik_pcie_gart_disable(rdev);
5555 radeon_gart_table_vram_free(rdev);
5556 radeon_gart_fini(rdev);
5561 * cik_ib_parse - vm ib_parse callback
5563 * @rdev: radeon_device pointer
5564 * @ib: indirect buffer pointer
5566 * CIK uses hw IB checking so this is a nop (CIK).
5568 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5575 * VMID 0 is the physical GPU addresses as used by the kernel.
5576 * VMIDs 1-15 are used for userspace clients and are handled
5577 * by the radeon vm/hsa code.
5580 * cik_vm_init - cik vm init callback
5582 * @rdev: radeon_device pointer
5584 * Inits cik specific vm parameters (number of VMs, base of vram for
5585 * VMIDs 1-15) (CIK).
5586 * Returns 0 for success.
5588 int cik_vm_init(struct radeon_device *rdev)
5592 * VMID 0 is reserved for System
5593 * radeon graphics/compute will use VMIDs 1-7
5594 * amdkfd will use VMIDs 8-15
5596 rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5597 /* base offset of vram pages */
5598 if (rdev->flags & RADEON_IS_IGP) {
5599 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5601 rdev->vm_manager.vram_base_offset = tmp;
5603 rdev->vm_manager.vram_base_offset = 0;
5609 * cik_vm_fini - cik vm fini callback
5611 * @rdev: radeon_device pointer
5613 * Tear down any asic specific VM setup (CIK).
5615 void cik_vm_fini(struct radeon_device *rdev)
5620 * cik_vm_decode_fault - print human readable fault info
5622 * @rdev: radeon_device pointer
5623 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5624 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5626 * Print human readable fault information (CIK).
5628 static void cik_vm_decode_fault(struct radeon_device *rdev,
5629 u32 status, u32 addr, u32 mc_client)
5632 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5633 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5634 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5635 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5637 if (rdev->family == CHIP_HAWAII)
5638 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5640 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5642 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5643 protections, vmid, addr,
5644 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5645 block, mc_client, mc_id);
5649 * cik_vm_flush - cik vm flush using the CP
5651 * @rdev: radeon_device pointer
5653 * Update the page table base and flush the VM TLB
5654 * using the CP (CIK).
5656 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5657 unsigned vm_id, uint64_t pd_addr)
5659 int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5661 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5662 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5663 WRITE_DATA_DST_SEL(0)));
5665 radeon_ring_write(ring,
5666 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5668 radeon_ring_write(ring,
5669 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5671 radeon_ring_write(ring, 0);
5672 radeon_ring_write(ring, pd_addr >> 12);
5674 /* update SH_MEM_* regs */
5675 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5676 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5677 WRITE_DATA_DST_SEL(0)));
5678 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5679 radeon_ring_write(ring, 0);
5680 radeon_ring_write(ring, VMID(vm_id));
5682 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5683 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5684 WRITE_DATA_DST_SEL(0)));
5685 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5686 radeon_ring_write(ring, 0);
5688 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5689 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5690 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5691 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5693 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5694 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5695 WRITE_DATA_DST_SEL(0)));
5696 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5697 radeon_ring_write(ring, 0);
5698 radeon_ring_write(ring, VMID(0));
5701 cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5703 /* bits 0-15 are the VM contexts0-15 */
5704 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5705 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5706 WRITE_DATA_DST_SEL(0)));
5707 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5708 radeon_ring_write(ring, 0);
5709 radeon_ring_write(ring, 1 << vm_id);
5711 /* wait for the invalidate to complete */
5712 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5713 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5714 WAIT_REG_MEM_FUNCTION(0) | /* always */
5715 WAIT_REG_MEM_ENGINE(0))); /* me */
5716 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5717 radeon_ring_write(ring, 0);
5718 radeon_ring_write(ring, 0); /* ref */
5719 radeon_ring_write(ring, 0); /* mask */
5720 radeon_ring_write(ring, 0x20); /* poll interval */
5722 /* compute doesn't have PFP */
5724 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5725 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5726 radeon_ring_write(ring, 0x0);
5732 * The RLC is a multi-purpose microengine that handles a
5733 * variety of functions, the most important of which is
5734 * the interrupt controller.
5736 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5739 u32 tmp = RREG32(CP_INT_CNTL_RING0);
5742 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5744 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5745 WREG32(CP_INT_CNTL_RING0, tmp);
5748 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5752 tmp = RREG32(RLC_LB_CNTL);
5754 tmp |= LOAD_BALANCE_ENABLE;
5756 tmp &= ~LOAD_BALANCE_ENABLE;
5757 WREG32(RLC_LB_CNTL, tmp);
5760 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5765 mutex_lock(&rdev->grbm_idx_mutex);
5766 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5767 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5768 cik_select_se_sh(rdev, i, j);
5769 for (k = 0; k < rdev->usec_timeout; k++) {
5770 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5776 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5777 mutex_unlock(&rdev->grbm_idx_mutex);
5779 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5780 for (k = 0; k < rdev->usec_timeout; k++) {
5781 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5787 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5791 tmp = RREG32(RLC_CNTL);
5793 WREG32(RLC_CNTL, rlc);
5796 static u32 cik_halt_rlc(struct radeon_device *rdev)
5800 orig = data = RREG32(RLC_CNTL);
5802 if (data & RLC_ENABLE) {
5805 data &= ~RLC_ENABLE;
5806 WREG32(RLC_CNTL, data);
5808 for (i = 0; i < rdev->usec_timeout; i++) {
5809 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5814 cik_wait_for_rlc_serdes(rdev);
5820 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5824 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5825 WREG32(RLC_GPR_REG2, tmp);
5827 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5828 for (i = 0; i < rdev->usec_timeout; i++) {
5829 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5834 for (i = 0; i < rdev->usec_timeout; i++) {
5835 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5841 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5845 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5846 WREG32(RLC_GPR_REG2, tmp);
5850 * cik_rlc_stop - stop the RLC ME
5852 * @rdev: radeon_device pointer
5854 * Halt the RLC ME (MicroEngine) (CIK).
5856 static void cik_rlc_stop(struct radeon_device *rdev)
5858 WREG32(RLC_CNTL, 0);
5860 cik_enable_gui_idle_interrupt(rdev, false);
5862 cik_wait_for_rlc_serdes(rdev);
5866 * cik_rlc_start - start the RLC ME
5868 * @rdev: radeon_device pointer
5870 * Unhalt the RLC ME (MicroEngine) (CIK).
5872 static void cik_rlc_start(struct radeon_device *rdev)
5874 WREG32(RLC_CNTL, RLC_ENABLE);
5876 cik_enable_gui_idle_interrupt(rdev, true);
5882 * cik_rlc_resume - setup the RLC hw
5884 * @rdev: radeon_device pointer
5886 * Initialize the RLC registers, load the ucode,
5887 * and start the RLC (CIK).
5888 * Returns 0 for success, -EINVAL if the ucode is not available.
5890 static int cik_rlc_resume(struct radeon_device *rdev)
5900 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5901 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5909 WREG32(RLC_LB_CNTR_INIT, 0);
5910 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5912 mutex_lock(&rdev->grbm_idx_mutex);
5913 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5914 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5915 WREG32(RLC_LB_PARAMS, 0x00600408);
5916 WREG32(RLC_LB_CNTL, 0x80000004);
5917 mutex_unlock(&rdev->grbm_idx_mutex);
5919 WREG32(RLC_MC_CNTL, 0);
5920 WREG32(RLC_UCODE_CNTL, 0);
5923 const struct rlc_firmware_header_v1_0 *hdr =
5924 (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5925 const __le32 *fw_data = (const __le32 *)
5926 (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5928 radeon_ucode_print_rlc_hdr(&hdr->header);
5930 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5931 WREG32(RLC_GPM_UCODE_ADDR, 0);
5932 for (i = 0; i < size; i++)
5933 WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5934 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5936 const __be32 *fw_data;
5938 switch (rdev->family) {
5942 size = BONAIRE_RLC_UCODE_SIZE;
5945 size = KV_RLC_UCODE_SIZE;
5948 size = KB_RLC_UCODE_SIZE;
5951 size = ML_RLC_UCODE_SIZE;
5955 fw_data = (const __be32 *)rdev->rlc_fw->data;
5956 WREG32(RLC_GPM_UCODE_ADDR, 0);
5957 for (i = 0; i < size; i++)
5958 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5959 WREG32(RLC_GPM_UCODE_ADDR, 0);
5962 /* XXX - find out what chips support lbpw */
5963 cik_enable_lbpw(rdev, false);
5965 if (rdev->family == CHIP_BONAIRE)
5966 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5968 cik_rlc_start(rdev);
5973 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5975 u32 data, orig, tmp, tmp2;
5977 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5979 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5980 cik_enable_gui_idle_interrupt(rdev, true);
5982 tmp = cik_halt_rlc(rdev);
5984 mutex_lock(&rdev->grbm_idx_mutex);
5985 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5986 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5987 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5988 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5989 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5990 mutex_unlock(&rdev->grbm_idx_mutex);
5992 cik_update_rlc(rdev, tmp);
5994 data |= CGCG_EN | CGLS_EN;
5996 cik_enable_gui_idle_interrupt(rdev, false);
5998 RREG32(CB_CGTT_SCLK_CTRL);
5999 RREG32(CB_CGTT_SCLK_CTRL);
6000 RREG32(CB_CGTT_SCLK_CTRL);
6001 RREG32(CB_CGTT_SCLK_CTRL);
6003 data &= ~(CGCG_EN | CGLS_EN);
6007 WREG32(RLC_CGCG_CGLS_CTRL, data);
6011 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6013 u32 data, orig, tmp = 0;
6015 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6016 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6017 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6018 orig = data = RREG32(CP_MEM_SLP_CNTL);
6019 data |= CP_MEM_LS_EN;
6021 WREG32(CP_MEM_SLP_CNTL, data);
6025 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6029 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6031 tmp = cik_halt_rlc(rdev);
6033 mutex_lock(&rdev->grbm_idx_mutex);
6034 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6035 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6036 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6037 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6038 WREG32(RLC_SERDES_WR_CTRL, data);
6039 mutex_unlock(&rdev->grbm_idx_mutex);
6041 cik_update_rlc(rdev, tmp);
6043 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6044 orig = data = RREG32(CGTS_SM_CTRL_REG);
6045 data &= ~SM_MODE_MASK;
6046 data |= SM_MODE(0x2);
6047 data |= SM_MODE_ENABLE;
6048 data &= ~CGTS_OVERRIDE;
6049 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6050 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6051 data &= ~CGTS_LS_OVERRIDE;
6052 data &= ~ON_MONITOR_ADD_MASK;
6053 data |= ON_MONITOR_ADD_EN;
6054 data |= ON_MONITOR_ADD(0x96);
6056 WREG32(CGTS_SM_CTRL_REG, data);
6059 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6062 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6064 data = RREG32(RLC_MEM_SLP_CNTL);
6065 if (data & RLC_MEM_LS_EN) {
6066 data &= ~RLC_MEM_LS_EN;
6067 WREG32(RLC_MEM_SLP_CNTL, data);
6070 data = RREG32(CP_MEM_SLP_CNTL);
6071 if (data & CP_MEM_LS_EN) {
6072 data &= ~CP_MEM_LS_EN;
6073 WREG32(CP_MEM_SLP_CNTL, data);
6076 orig = data = RREG32(CGTS_SM_CTRL_REG);
6077 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6079 WREG32(CGTS_SM_CTRL_REG, data);
6081 tmp = cik_halt_rlc(rdev);
6083 mutex_lock(&rdev->grbm_idx_mutex);
6084 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6085 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6086 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6087 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6088 WREG32(RLC_SERDES_WR_CTRL, data);
6089 mutex_unlock(&rdev->grbm_idx_mutex);
6091 cik_update_rlc(rdev, tmp);
6095 static const u32 mc_cg_registers[] =
6108 static void cik_enable_mc_ls(struct radeon_device *rdev,
6114 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6115 orig = data = RREG32(mc_cg_registers[i]);
6116 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6117 data |= MC_LS_ENABLE;
6119 data &= ~MC_LS_ENABLE;
6121 WREG32(mc_cg_registers[i], data);
6125 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6131 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6132 orig = data = RREG32(mc_cg_registers[i]);
6133 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6134 data |= MC_CG_ENABLE;
6136 data &= ~MC_CG_ENABLE;
6138 WREG32(mc_cg_registers[i], data);
6142 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6147 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6148 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6149 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6151 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6154 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6156 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6159 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6163 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6168 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6169 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6172 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6174 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6177 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6179 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6182 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6184 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6187 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6191 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6196 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6197 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6199 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6201 orig = data = RREG32(UVD_CGC_CTRL);
6204 WREG32(UVD_CGC_CTRL, data);
6206 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6208 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6210 orig = data = RREG32(UVD_CGC_CTRL);
6213 WREG32(UVD_CGC_CTRL, data);
6217 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6222 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6224 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6225 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6226 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6228 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6229 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6232 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6235 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6240 orig = data = RREG32(HDP_HOST_PATH_CNTL);
6242 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6243 data &= ~CLOCK_GATING_DIS;
6245 data |= CLOCK_GATING_DIS;
6248 WREG32(HDP_HOST_PATH_CNTL, data);
6251 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6256 orig = data = RREG32(HDP_MEM_POWER_LS);
6258 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6259 data |= HDP_LS_ENABLE;
6261 data &= ~HDP_LS_ENABLE;
6264 WREG32(HDP_MEM_POWER_LS, data);
6267 void cik_update_cg(struct radeon_device *rdev,
6268 u32 block, bool enable)
6271 if (block & RADEON_CG_BLOCK_GFX) {
6272 cik_enable_gui_idle_interrupt(rdev, false);
6273 /* order matters! */
6275 cik_enable_mgcg(rdev, true);
6276 cik_enable_cgcg(rdev, true);
6278 cik_enable_cgcg(rdev, false);
6279 cik_enable_mgcg(rdev, false);
6281 cik_enable_gui_idle_interrupt(rdev, true);
6284 if (block & RADEON_CG_BLOCK_MC) {
6285 if (!(rdev->flags & RADEON_IS_IGP)) {
6286 cik_enable_mc_mgcg(rdev, enable);
6287 cik_enable_mc_ls(rdev, enable);
6291 if (block & RADEON_CG_BLOCK_SDMA) {
6292 cik_enable_sdma_mgcg(rdev, enable);
6293 cik_enable_sdma_mgls(rdev, enable);
6296 if (block & RADEON_CG_BLOCK_BIF) {
6297 cik_enable_bif_mgls(rdev, enable);
6300 if (block & RADEON_CG_BLOCK_UVD) {
6302 cik_enable_uvd_mgcg(rdev, enable);
6305 if (block & RADEON_CG_BLOCK_HDP) {
6306 cik_enable_hdp_mgcg(rdev, enable);
6307 cik_enable_hdp_ls(rdev, enable);
6310 if (block & RADEON_CG_BLOCK_VCE) {
6311 vce_v2_0_enable_mgcg(rdev, enable);
6315 static void cik_init_cg(struct radeon_device *rdev)
6318 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6321 si_init_uvd_internal_cg(rdev);
6323 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6324 RADEON_CG_BLOCK_SDMA |
6325 RADEON_CG_BLOCK_BIF |
6326 RADEON_CG_BLOCK_UVD |
6327 RADEON_CG_BLOCK_HDP), true);
6330 static void cik_fini_cg(struct radeon_device *rdev)
6332 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6333 RADEON_CG_BLOCK_SDMA |
6334 RADEON_CG_BLOCK_BIF |
6335 RADEON_CG_BLOCK_UVD |
6336 RADEON_CG_BLOCK_HDP), false);
6338 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6341 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6346 orig = data = RREG32(RLC_PG_CNTL);
6347 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6348 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6350 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6352 WREG32(RLC_PG_CNTL, data);
6355 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6360 orig = data = RREG32(RLC_PG_CNTL);
6361 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6362 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6364 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6366 WREG32(RLC_PG_CNTL, data);
6369 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6373 orig = data = RREG32(RLC_PG_CNTL);
6374 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6375 data &= ~DISABLE_CP_PG;
6377 data |= DISABLE_CP_PG;
6379 WREG32(RLC_PG_CNTL, data);
6382 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6386 orig = data = RREG32(RLC_PG_CNTL);
6387 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6388 data &= ~DISABLE_GDS_PG;
6390 data |= DISABLE_GDS_PG;
6392 WREG32(RLC_PG_CNTL, data);
6395 #define CP_ME_TABLE_SIZE 96
6396 #define CP_ME_TABLE_OFFSET 2048
6397 #define CP_MEC_TABLE_OFFSET 4096
6399 void cik_init_cp_pg_table(struct radeon_device *rdev)
6401 volatile u32 *dst_ptr;
6402 int me, i, max_me = 4;
6404 u32 table_offset, table_size;
6406 if (rdev->family == CHIP_KAVERI)
6409 if (rdev->rlc.cp_table_ptr == NULL)
6412 /* write the cp table buffer */
6413 dst_ptr = rdev->rlc.cp_table_ptr;
6414 for (me = 0; me < max_me; me++) {
6416 const __le32 *fw_data;
6417 const struct gfx_firmware_header_v1_0 *hdr;
6420 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6421 fw_data = (const __le32 *)
6422 (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6423 table_offset = le32_to_cpu(hdr->jt_offset);
6424 table_size = le32_to_cpu(hdr->jt_size);
6425 } else if (me == 1) {
6426 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6427 fw_data = (const __le32 *)
6428 (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6429 table_offset = le32_to_cpu(hdr->jt_offset);
6430 table_size = le32_to_cpu(hdr->jt_size);
6431 } else if (me == 2) {
6432 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6433 fw_data = (const __le32 *)
6434 (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6435 table_offset = le32_to_cpu(hdr->jt_offset);
6436 table_size = le32_to_cpu(hdr->jt_size);
6437 } else if (me == 3) {
6438 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6439 fw_data = (const __le32 *)
6440 (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6441 table_offset = le32_to_cpu(hdr->jt_offset);
6442 table_size = le32_to_cpu(hdr->jt_size);
6444 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6445 fw_data = (const __le32 *)
6446 (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6447 table_offset = le32_to_cpu(hdr->jt_offset);
6448 table_size = le32_to_cpu(hdr->jt_size);
6451 for (i = 0; i < table_size; i ++) {
6452 dst_ptr[bo_offset + i] =
6453 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6455 bo_offset += table_size;
6457 const __be32 *fw_data;
6458 table_size = CP_ME_TABLE_SIZE;
6461 fw_data = (const __be32 *)rdev->ce_fw->data;
6462 table_offset = CP_ME_TABLE_OFFSET;
6463 } else if (me == 1) {
6464 fw_data = (const __be32 *)rdev->pfp_fw->data;
6465 table_offset = CP_ME_TABLE_OFFSET;
6466 } else if (me == 2) {
6467 fw_data = (const __be32 *)rdev->me_fw->data;
6468 table_offset = CP_ME_TABLE_OFFSET;
6470 fw_data = (const __be32 *)rdev->mec_fw->data;
6471 table_offset = CP_MEC_TABLE_OFFSET;
6474 for (i = 0; i < table_size; i ++) {
6475 dst_ptr[bo_offset + i] =
6476 cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6478 bo_offset += table_size;
6483 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6488 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6489 orig = data = RREG32(RLC_PG_CNTL);
6490 data |= GFX_PG_ENABLE;
6492 WREG32(RLC_PG_CNTL, data);
6494 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6497 WREG32(RLC_AUTO_PG_CTRL, data);
6499 orig = data = RREG32(RLC_PG_CNTL);
6500 data &= ~GFX_PG_ENABLE;
6502 WREG32(RLC_PG_CNTL, data);
6504 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6505 data &= ~AUTO_PG_EN;
6507 WREG32(RLC_AUTO_PG_CTRL, data);
6509 data = RREG32(DB_RENDER_CONTROL);
6513 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6515 u32 mask = 0, tmp, tmp1;
6518 mutex_lock(&rdev->grbm_idx_mutex);
6519 cik_select_se_sh(rdev, se, sh);
6520 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6521 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6522 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6523 mutex_unlock(&rdev->grbm_idx_mutex);
6530 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6535 return (~tmp) & mask;
6538 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6540 u32 i, j, k, active_cu_number = 0;
6541 u32 mask, counter, cu_bitmap;
6544 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6545 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6549 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6550 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6558 active_cu_number += counter;
6559 tmp |= (cu_bitmap << (i * 16 + j * 8));
6563 WREG32(RLC_PG_AO_CU_MASK, tmp);
6565 tmp = RREG32(RLC_MAX_PG_CU);
6566 tmp &= ~MAX_PU_CU_MASK;
6567 tmp |= MAX_PU_CU(active_cu_number);
6568 WREG32(RLC_MAX_PG_CU, tmp);
6571 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6576 orig = data = RREG32(RLC_PG_CNTL);
6577 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6578 data |= STATIC_PER_CU_PG_ENABLE;
6580 data &= ~STATIC_PER_CU_PG_ENABLE;
6582 WREG32(RLC_PG_CNTL, data);
6585 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6590 orig = data = RREG32(RLC_PG_CNTL);
6591 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6592 data |= DYN_PER_CU_PG_ENABLE;
6594 data &= ~DYN_PER_CU_PG_ENABLE;
6596 WREG32(RLC_PG_CNTL, data);
6599 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6600 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6602 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6607 if (rdev->rlc.cs_data) {
6608 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6609 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6610 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6611 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6613 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6614 for (i = 0; i < 3; i++)
6615 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6617 if (rdev->rlc.reg_list) {
6618 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6619 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6620 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6623 orig = data = RREG32(RLC_PG_CNTL);
6626 WREG32(RLC_PG_CNTL, data);
6628 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6629 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6631 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6632 data &= ~IDLE_POLL_COUNT_MASK;
6633 data |= IDLE_POLL_COUNT(0x60);
6634 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6637 WREG32(RLC_PG_DELAY, data);
6639 data = RREG32(RLC_PG_DELAY_2);
6642 WREG32(RLC_PG_DELAY_2, data);
6644 data = RREG32(RLC_AUTO_PG_CTRL);
6645 data &= ~GRBM_REG_SGIT_MASK;
6646 data |= GRBM_REG_SGIT(0x700);
6647 WREG32(RLC_AUTO_PG_CTRL, data);
6651 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6653 cik_enable_gfx_cgpg(rdev, enable);
6654 cik_enable_gfx_static_mgpg(rdev, enable);
6655 cik_enable_gfx_dynamic_mgpg(rdev, enable);
6658 u32 cik_get_csb_size(struct radeon_device *rdev)
6661 const struct cs_section_def *sect = NULL;
6662 const struct cs_extent_def *ext = NULL;
6664 if (rdev->rlc.cs_data == NULL)
6667 /* begin clear state */
6669 /* context control state */
6672 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6673 for (ext = sect->section; ext->extent != NULL; ++ext) {
6674 if (sect->id == SECT_CONTEXT)
6675 count += 2 + ext->reg_count;
6680 /* pa_sc_raster_config/pa_sc_raster_config1 */
6682 /* end clear state */
6690 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6693 const struct cs_section_def *sect = NULL;
6694 const struct cs_extent_def *ext = NULL;
6696 if (rdev->rlc.cs_data == NULL)
6701 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6702 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6704 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6705 buffer[count++] = cpu_to_le32(0x80000000);
6706 buffer[count++] = cpu_to_le32(0x80000000);
6708 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6709 for (ext = sect->section; ext->extent != NULL; ++ext) {
6710 if (sect->id == SECT_CONTEXT) {
6712 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6713 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6714 for (i = 0; i < ext->reg_count; i++)
6715 buffer[count++] = cpu_to_le32(ext->extent[i]);
6722 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6723 buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6724 switch (rdev->family) {
6726 buffer[count++] = cpu_to_le32(0x16000012);
6727 buffer[count++] = cpu_to_le32(0x00000000);
6730 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6731 buffer[count++] = cpu_to_le32(0x00000000);
6735 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6736 buffer[count++] = cpu_to_le32(0x00000000);
6739 buffer[count++] = cpu_to_le32(0x3a00161a);
6740 buffer[count++] = cpu_to_le32(0x0000002e);
6743 buffer[count++] = cpu_to_le32(0x00000000);
6744 buffer[count++] = cpu_to_le32(0x00000000);
6748 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6749 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6751 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6752 buffer[count++] = cpu_to_le32(0);
6755 static void cik_init_pg(struct radeon_device *rdev)
6757 if (rdev->pg_flags) {
6758 cik_enable_sck_slowdown_on_pu(rdev, true);
6759 cik_enable_sck_slowdown_on_pd(rdev, true);
6760 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6761 cik_init_gfx_cgpg(rdev);
6762 cik_enable_cp_pg(rdev, true);
6763 cik_enable_gds_pg(rdev, true);
6765 cik_init_ao_cu_mask(rdev);
6766 cik_update_gfx_pg(rdev, true);
6770 static void cik_fini_pg(struct radeon_device *rdev)
6772 if (rdev->pg_flags) {
6773 cik_update_gfx_pg(rdev, false);
6774 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6775 cik_enable_cp_pg(rdev, false);
6776 cik_enable_gds_pg(rdev, false);
6783 * Starting with r6xx, interrupts are handled via a ring buffer.
6784 * Ring buffers are areas of GPU accessible memory that the GPU
6785 * writes interrupt vectors into and the host reads vectors out of.
6786 * There is a rptr (read pointer) that determines where the
6787 * host is currently reading, and a wptr (write pointer)
6788 * which determines where the GPU has written. When the
6789 * pointers are equal, the ring is idle. When the GPU
6790 * writes vectors to the ring buffer, it increments the
6791 * wptr. When there is an interrupt, the host then starts
6792 * fetching commands and processing them until the pointers are
6793 * equal again at which point it updates the rptr.
6797 * cik_enable_interrupts - Enable the interrupt ring buffer
6799 * @rdev: radeon_device pointer
6801 * Enable the interrupt ring buffer (CIK).
6803 static void cik_enable_interrupts(struct radeon_device *rdev)
6805 u32 ih_cntl = RREG32(IH_CNTL);
6806 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6808 ih_cntl |= ENABLE_INTR;
6809 ih_rb_cntl |= IH_RB_ENABLE;
6810 WREG32(IH_CNTL, ih_cntl);
6811 WREG32(IH_RB_CNTL, ih_rb_cntl);
6812 rdev->ih.enabled = true;
6816 * cik_disable_interrupts - Disable the interrupt ring buffer
6818 * @rdev: radeon_device pointer
6820 * Disable the interrupt ring buffer (CIK).
6822 static void cik_disable_interrupts(struct radeon_device *rdev)
6824 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6825 u32 ih_cntl = RREG32(IH_CNTL);
6827 ih_rb_cntl &= ~IH_RB_ENABLE;
6828 ih_cntl &= ~ENABLE_INTR;
6829 WREG32(IH_RB_CNTL, ih_rb_cntl);
6830 WREG32(IH_CNTL, ih_cntl);
6831 /* set rptr, wptr to 0 */
6832 WREG32(IH_RB_RPTR, 0);
6833 WREG32(IH_RB_WPTR, 0);
6834 rdev->ih.enabled = false;
6839 * cik_disable_interrupt_state - Disable all interrupt sources
6841 * @rdev: radeon_device pointer
6843 * Clear all interrupt enable bits used by the driver (CIK).
6845 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6850 tmp = RREG32(CP_INT_CNTL_RING0) &
6851 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6852 WREG32(CP_INT_CNTL_RING0, tmp);
6854 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6855 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6856 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6857 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6858 /* compute queues */
6859 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6860 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6861 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6862 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6863 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6864 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6865 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6866 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6868 WREG32(GRBM_INT_CNTL, 0);
6870 WREG32(SRBM_INT_CNTL, 0);
6871 /* vline/vblank, etc. */
6872 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6873 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6874 if (rdev->num_crtc >= 4) {
6875 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6876 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6878 if (rdev->num_crtc >= 6) {
6879 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6880 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6883 if (rdev->num_crtc >= 2) {
6884 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6885 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6887 if (rdev->num_crtc >= 4) {
6888 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6889 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6891 if (rdev->num_crtc >= 6) {
6892 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6893 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6897 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6899 /* digital hotplug */
6900 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6901 WREG32(DC_HPD1_INT_CONTROL, tmp);
6902 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6903 WREG32(DC_HPD2_INT_CONTROL, tmp);
6904 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6905 WREG32(DC_HPD3_INT_CONTROL, tmp);
6906 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6907 WREG32(DC_HPD4_INT_CONTROL, tmp);
6908 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6909 WREG32(DC_HPD5_INT_CONTROL, tmp);
6910 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6911 WREG32(DC_HPD6_INT_CONTROL, tmp);
6916 * cik_irq_init - init and enable the interrupt ring
6918 * @rdev: radeon_device pointer
6920 * Allocate a ring buffer for the interrupt controller,
6921 * enable the RLC, disable interrupts, enable the IH
6922 * ring buffer and enable it (CIK).
6923 * Called at device load and reume.
6924 * Returns 0 for success, errors for failure.
6926 static int cik_irq_init(struct radeon_device *rdev)
6930 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6933 ret = r600_ih_ring_alloc(rdev);
6938 cik_disable_interrupts(rdev);
6941 ret = cik_rlc_resume(rdev);
6943 r600_ih_ring_fini(rdev);
6947 /* setup interrupt control */
6948 /* set dummy read address to dummy page address */
6949 WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6950 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6951 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6952 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6954 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6955 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6956 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6957 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6959 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6960 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6962 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6963 IH_WPTR_OVERFLOW_CLEAR |
6966 if (rdev->wb.enabled)
6967 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6969 /* set the writeback address whether it's enabled or not */
6970 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6971 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6973 WREG32(IH_RB_CNTL, ih_rb_cntl);
6975 /* set rptr, wptr to 0 */
6976 WREG32(IH_RB_RPTR, 0);
6977 WREG32(IH_RB_WPTR, 0);
6979 /* Default settings for IH_CNTL (disabled at first) */
6980 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6981 /* RPTR_REARM only works if msi's are enabled */
6982 if (rdev->msi_enabled)
6983 ih_cntl |= RPTR_REARM;
6984 WREG32(IH_CNTL, ih_cntl);
6986 /* force the active interrupt state to all disabled */
6987 cik_disable_interrupt_state(rdev);
6989 pci_set_master(rdev->pdev);
6992 cik_enable_interrupts(rdev);
6998 * cik_irq_set - enable/disable interrupt sources
7000 * @rdev: radeon_device pointer
7002 * Enable interrupt sources on the GPU (vblanks, hpd,
7004 * Returns 0 for success, errors for failure.
7006 int cik_irq_set(struct radeon_device *rdev)
7010 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7011 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7012 u32 grbm_int_cntl = 0;
7013 u32 dma_cntl, dma_cntl1;
7015 if (!rdev->irq.installed) {
7016 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7019 /* don't enable anything if the ih is disabled */
7020 if (!rdev->ih.enabled) {
7021 cik_disable_interrupts(rdev);
7022 /* force the active interrupt state to all disabled */
7023 cik_disable_interrupt_state(rdev);
7027 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7028 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7029 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7031 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7032 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7033 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7034 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7035 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7036 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7038 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7039 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7041 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7043 /* enable CP interrupts on all rings */
7044 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7045 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7046 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7048 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7049 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7050 DRM_DEBUG("si_irq_set: sw int cp1\n");
7051 if (ring->me == 1) {
7052 switch (ring->pipe) {
7054 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7057 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7061 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7064 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7065 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7066 DRM_DEBUG("si_irq_set: sw int cp2\n");
7067 if (ring->me == 1) {
7068 switch (ring->pipe) {
7070 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7073 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7077 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7081 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7082 DRM_DEBUG("cik_irq_set: sw int dma\n");
7083 dma_cntl |= TRAP_ENABLE;
7086 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7087 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7088 dma_cntl1 |= TRAP_ENABLE;
7091 if (rdev->irq.crtc_vblank_int[0] ||
7092 atomic_read(&rdev->irq.pflip[0])) {
7093 DRM_DEBUG("cik_irq_set: vblank 0\n");
7094 crtc1 |= VBLANK_INTERRUPT_MASK;
7096 if (rdev->irq.crtc_vblank_int[1] ||
7097 atomic_read(&rdev->irq.pflip[1])) {
7098 DRM_DEBUG("cik_irq_set: vblank 1\n");
7099 crtc2 |= VBLANK_INTERRUPT_MASK;
7101 if (rdev->irq.crtc_vblank_int[2] ||
7102 atomic_read(&rdev->irq.pflip[2])) {
7103 DRM_DEBUG("cik_irq_set: vblank 2\n");
7104 crtc3 |= VBLANK_INTERRUPT_MASK;
7106 if (rdev->irq.crtc_vblank_int[3] ||
7107 atomic_read(&rdev->irq.pflip[3])) {
7108 DRM_DEBUG("cik_irq_set: vblank 3\n");
7109 crtc4 |= VBLANK_INTERRUPT_MASK;
7111 if (rdev->irq.crtc_vblank_int[4] ||
7112 atomic_read(&rdev->irq.pflip[4])) {
7113 DRM_DEBUG("cik_irq_set: vblank 4\n");
7114 crtc5 |= VBLANK_INTERRUPT_MASK;
7116 if (rdev->irq.crtc_vblank_int[5] ||
7117 atomic_read(&rdev->irq.pflip[5])) {
7118 DRM_DEBUG("cik_irq_set: vblank 5\n");
7119 crtc6 |= VBLANK_INTERRUPT_MASK;
7121 if (rdev->irq.hpd[0]) {
7122 DRM_DEBUG("cik_irq_set: hpd 1\n");
7123 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7125 if (rdev->irq.hpd[1]) {
7126 DRM_DEBUG("cik_irq_set: hpd 2\n");
7127 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7129 if (rdev->irq.hpd[2]) {
7130 DRM_DEBUG("cik_irq_set: hpd 3\n");
7131 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7133 if (rdev->irq.hpd[3]) {
7134 DRM_DEBUG("cik_irq_set: hpd 4\n");
7135 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7137 if (rdev->irq.hpd[4]) {
7138 DRM_DEBUG("cik_irq_set: hpd 5\n");
7139 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7141 if (rdev->irq.hpd[5]) {
7142 DRM_DEBUG("cik_irq_set: hpd 6\n");
7143 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7146 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7148 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7149 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7151 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7153 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7155 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7156 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7157 if (rdev->num_crtc >= 4) {
7158 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7159 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7161 if (rdev->num_crtc >= 6) {
7162 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7163 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7166 if (rdev->num_crtc >= 2) {
7167 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7168 GRPH_PFLIP_INT_MASK);
7169 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7170 GRPH_PFLIP_INT_MASK);
7172 if (rdev->num_crtc >= 4) {
7173 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7174 GRPH_PFLIP_INT_MASK);
7175 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7176 GRPH_PFLIP_INT_MASK);
7178 if (rdev->num_crtc >= 6) {
7179 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7180 GRPH_PFLIP_INT_MASK);
7181 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7182 GRPH_PFLIP_INT_MASK);
7185 WREG32(DC_HPD1_INT_CONTROL, hpd1);
7186 WREG32(DC_HPD2_INT_CONTROL, hpd2);
7187 WREG32(DC_HPD3_INT_CONTROL, hpd3);
7188 WREG32(DC_HPD4_INT_CONTROL, hpd4);
7189 WREG32(DC_HPD5_INT_CONTROL, hpd5);
7190 WREG32(DC_HPD6_INT_CONTROL, hpd6);
7193 RREG32(SRBM_STATUS);
7199 * cik_irq_ack - ack interrupt sources
7201 * @rdev: radeon_device pointer
7203 * Ack interrupt sources on the GPU (vblanks, hpd,
7204 * etc.) (CIK). Certain interrupts sources are sw
7205 * generated and do not require an explicit ack.
7207 static inline void cik_irq_ack(struct radeon_device *rdev)
7211 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7212 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7213 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7214 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7215 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7216 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7217 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7219 rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7220 EVERGREEN_CRTC0_REGISTER_OFFSET);
7221 rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7222 EVERGREEN_CRTC1_REGISTER_OFFSET);
7223 if (rdev->num_crtc >= 4) {
7224 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7225 EVERGREEN_CRTC2_REGISTER_OFFSET);
7226 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7227 EVERGREEN_CRTC3_REGISTER_OFFSET);
7229 if (rdev->num_crtc >= 6) {
7230 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7231 EVERGREEN_CRTC4_REGISTER_OFFSET);
7232 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7233 EVERGREEN_CRTC5_REGISTER_OFFSET);
7236 if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7237 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7238 GRPH_PFLIP_INT_CLEAR);
7239 if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7240 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7241 GRPH_PFLIP_INT_CLEAR);
7242 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7243 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7244 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7245 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7246 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7247 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7248 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7249 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7251 if (rdev->num_crtc >= 4) {
7252 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7253 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7254 GRPH_PFLIP_INT_CLEAR);
7255 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7256 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7257 GRPH_PFLIP_INT_CLEAR);
7258 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7259 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7260 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7261 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7262 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7263 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7264 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7265 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7268 if (rdev->num_crtc >= 6) {
7269 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7270 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7271 GRPH_PFLIP_INT_CLEAR);
7272 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7273 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7274 GRPH_PFLIP_INT_CLEAR);
7275 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7276 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7277 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7278 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7279 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7280 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7281 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7282 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7285 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7286 tmp = RREG32(DC_HPD1_INT_CONTROL);
7287 tmp |= DC_HPDx_INT_ACK;
7288 WREG32(DC_HPD1_INT_CONTROL, tmp);
7290 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7291 tmp = RREG32(DC_HPD2_INT_CONTROL);
7292 tmp |= DC_HPDx_INT_ACK;
7293 WREG32(DC_HPD2_INT_CONTROL, tmp);
7295 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7296 tmp = RREG32(DC_HPD3_INT_CONTROL);
7297 tmp |= DC_HPDx_INT_ACK;
7298 WREG32(DC_HPD3_INT_CONTROL, tmp);
7300 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7301 tmp = RREG32(DC_HPD4_INT_CONTROL);
7302 tmp |= DC_HPDx_INT_ACK;
7303 WREG32(DC_HPD4_INT_CONTROL, tmp);
7305 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7306 tmp = RREG32(DC_HPD5_INT_CONTROL);
7307 tmp |= DC_HPDx_INT_ACK;
7308 WREG32(DC_HPD5_INT_CONTROL, tmp);
7310 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7311 tmp = RREG32(DC_HPD6_INT_CONTROL);
7312 tmp |= DC_HPDx_INT_ACK;
7313 WREG32(DC_HPD6_INT_CONTROL, tmp);
7315 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7316 tmp = RREG32(DC_HPD1_INT_CONTROL);
7317 tmp |= DC_HPDx_RX_INT_ACK;
7318 WREG32(DC_HPD1_INT_CONTROL, tmp);
7320 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7321 tmp = RREG32(DC_HPD2_INT_CONTROL);
7322 tmp |= DC_HPDx_RX_INT_ACK;
7323 WREG32(DC_HPD2_INT_CONTROL, tmp);
7325 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7326 tmp = RREG32(DC_HPD3_INT_CONTROL);
7327 tmp |= DC_HPDx_RX_INT_ACK;
7328 WREG32(DC_HPD3_INT_CONTROL, tmp);
7330 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7331 tmp = RREG32(DC_HPD4_INT_CONTROL);
7332 tmp |= DC_HPDx_RX_INT_ACK;
7333 WREG32(DC_HPD4_INT_CONTROL, tmp);
7335 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7336 tmp = RREG32(DC_HPD5_INT_CONTROL);
7337 tmp |= DC_HPDx_RX_INT_ACK;
7338 WREG32(DC_HPD5_INT_CONTROL, tmp);
7340 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7341 tmp = RREG32(DC_HPD6_INT_CONTROL);
7342 tmp |= DC_HPDx_RX_INT_ACK;
7343 WREG32(DC_HPD6_INT_CONTROL, tmp);
7348 * cik_irq_disable - disable interrupts
7350 * @rdev: radeon_device pointer
7352 * Disable interrupts on the hw (CIK).
7354 static void cik_irq_disable(struct radeon_device *rdev)
7356 cik_disable_interrupts(rdev);
7357 /* Wait and acknowledge irq */
7360 cik_disable_interrupt_state(rdev);
7364 * cik_irq_disable - disable interrupts for suspend
7366 * @rdev: radeon_device pointer
7368 * Disable interrupts and stop the RLC (CIK).
7371 static void cik_irq_suspend(struct radeon_device *rdev)
7373 cik_irq_disable(rdev);
7378 * cik_irq_fini - tear down interrupt support
7380 * @rdev: radeon_device pointer
7382 * Disable interrupts on the hw and free the IH ring
7384 * Used for driver unload.
7386 static void cik_irq_fini(struct radeon_device *rdev)
7388 cik_irq_suspend(rdev);
7389 r600_ih_ring_fini(rdev);
7393 * cik_get_ih_wptr - get the IH ring buffer wptr
7395 * @rdev: radeon_device pointer
7397 * Get the IH ring buffer wptr from either the register
7398 * or the writeback memory buffer (CIK). Also check for
7399 * ring buffer overflow and deal with it.
7400 * Used by cik_irq_process().
7401 * Returns the value of the wptr.
7403 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7407 if (rdev->wb.enabled)
7408 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7410 wptr = RREG32(IH_RB_WPTR);
7412 if (wptr & RB_OVERFLOW) {
7413 wptr &= ~RB_OVERFLOW;
7414 /* When a ring buffer overflow happen start parsing interrupt
7415 * from the last not overwritten vector (wptr + 16). Hopefully
7416 * this should allow us to catchup.
7418 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7419 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7420 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7421 tmp = RREG32(IH_RB_CNTL);
7422 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7423 WREG32(IH_RB_CNTL, tmp);
7425 return (wptr & rdev->ih.ptr_mask);
7429 * Each IV ring entry is 128 bits:
7430 * [7:0] - interrupt source id
7432 * [59:32] - interrupt source data
7433 * [63:60] - reserved
7436 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7437 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7438 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7439 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7440 * PIPE_ID - ME0 0=3D
7441 * - ME1&2 compute dispatcher (4 pipes each)
7443 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7444 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7445 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7448 * [127:96] - reserved
7451 * cik_irq_process - interrupt handler
7453 * @rdev: radeon_device pointer
7455 * Interrupt hander (CIK). Walk the IH ring,
7456 * ack interrupts and schedule work to handle
7458 * Returns irq process return code.
7460 int cik_irq_process(struct radeon_device *rdev)
7462 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7463 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7466 u32 src_id, src_data, ring_id;
7467 u8 me_id, pipe_id, queue_id;
7469 bool queue_hotplug = false;
7470 bool queue_dp = false;
7471 bool queue_reset = false;
7472 u32 addr, status, mc_client;
7473 bool queue_thermal = false;
7475 if (!rdev->ih.enabled || rdev->shutdown)
7478 wptr = cik_get_ih_wptr(rdev);
7481 /* is somebody else already processing irqs? */
7482 if (atomic_xchg(&rdev->ih.lock, 1))
7485 rptr = rdev->ih.rptr;
7486 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7488 /* Order reading of wptr vs. reading of IH ring data */
7491 /* display interrupts */
7494 while (rptr != wptr) {
7495 /* wptr/rptr are in bytes! */
7496 ring_index = rptr / 4;
7498 radeon_kfd_interrupt(rdev,
7499 (const void *) &rdev->ih.ring[ring_index]);
7501 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7502 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7503 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7506 case 1: /* D1 vblank/vline */
7508 case 0: /* D1 vblank */
7509 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7510 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7512 if (rdev->irq.crtc_vblank_int[0]) {
7513 drm_handle_vblank(rdev->ddev, 0);
7514 rdev->pm.vblank_sync = true;
7515 wake_up(&rdev->irq.vblank_queue);
7517 if (atomic_read(&rdev->irq.pflip[0]))
7518 radeon_crtc_handle_vblank(rdev, 0);
7519 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7520 DRM_DEBUG("IH: D1 vblank\n");
7523 case 1: /* D1 vline */
7524 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7525 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7527 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7528 DRM_DEBUG("IH: D1 vline\n");
7532 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7536 case 2: /* D2 vblank/vline */
7538 case 0: /* D2 vblank */
7539 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7540 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7542 if (rdev->irq.crtc_vblank_int[1]) {
7543 drm_handle_vblank(rdev->ddev, 1);
7544 rdev->pm.vblank_sync = true;
7545 wake_up(&rdev->irq.vblank_queue);
7547 if (atomic_read(&rdev->irq.pflip[1]))
7548 radeon_crtc_handle_vblank(rdev, 1);
7549 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7550 DRM_DEBUG("IH: D2 vblank\n");
7553 case 1: /* D2 vline */
7554 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7555 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7557 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7558 DRM_DEBUG("IH: D2 vline\n");
7562 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7566 case 3: /* D3 vblank/vline */
7568 case 0: /* D3 vblank */
7569 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7570 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7572 if (rdev->irq.crtc_vblank_int[2]) {
7573 drm_handle_vblank(rdev->ddev, 2);
7574 rdev->pm.vblank_sync = true;
7575 wake_up(&rdev->irq.vblank_queue);
7577 if (atomic_read(&rdev->irq.pflip[2]))
7578 radeon_crtc_handle_vblank(rdev, 2);
7579 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7580 DRM_DEBUG("IH: D3 vblank\n");
7583 case 1: /* D3 vline */
7584 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7585 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7587 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7588 DRM_DEBUG("IH: D3 vline\n");
7592 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7596 case 4: /* D4 vblank/vline */
7598 case 0: /* D4 vblank */
7599 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7600 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7602 if (rdev->irq.crtc_vblank_int[3]) {
7603 drm_handle_vblank(rdev->ddev, 3);
7604 rdev->pm.vblank_sync = true;
7605 wake_up(&rdev->irq.vblank_queue);
7607 if (atomic_read(&rdev->irq.pflip[3]))
7608 radeon_crtc_handle_vblank(rdev, 3);
7609 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7610 DRM_DEBUG("IH: D4 vblank\n");
7613 case 1: /* D4 vline */
7614 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7615 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7617 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7618 DRM_DEBUG("IH: D4 vline\n");
7622 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7626 case 5: /* D5 vblank/vline */
7628 case 0: /* D5 vblank */
7629 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7630 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7632 if (rdev->irq.crtc_vblank_int[4]) {
7633 drm_handle_vblank(rdev->ddev, 4);
7634 rdev->pm.vblank_sync = true;
7635 wake_up(&rdev->irq.vblank_queue);
7637 if (atomic_read(&rdev->irq.pflip[4]))
7638 radeon_crtc_handle_vblank(rdev, 4);
7639 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7640 DRM_DEBUG("IH: D5 vblank\n");
7643 case 1: /* D5 vline */
7644 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7645 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7647 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7648 DRM_DEBUG("IH: D5 vline\n");
7652 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7656 case 6: /* D6 vblank/vline */
7658 case 0: /* D6 vblank */
7659 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7660 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7662 if (rdev->irq.crtc_vblank_int[5]) {
7663 drm_handle_vblank(rdev->ddev, 5);
7664 rdev->pm.vblank_sync = true;
7665 wake_up(&rdev->irq.vblank_queue);
7667 if (atomic_read(&rdev->irq.pflip[5]))
7668 radeon_crtc_handle_vblank(rdev, 5);
7669 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7670 DRM_DEBUG("IH: D6 vblank\n");
7673 case 1: /* D6 vline */
7674 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7675 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7677 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7678 DRM_DEBUG("IH: D6 vline\n");
7682 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7686 case 8: /* D1 page flip */
7687 case 10: /* D2 page flip */
7688 case 12: /* D3 page flip */
7689 case 14: /* D4 page flip */
7690 case 16: /* D5 page flip */
7691 case 18: /* D6 page flip */
7692 DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7693 if (radeon_use_pflipirq > 0)
7694 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7696 case 42: /* HPD hotplug */
7699 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7700 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7702 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7703 queue_hotplug = true;
7704 DRM_DEBUG("IH: HPD1\n");
7708 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7709 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7711 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7712 queue_hotplug = true;
7713 DRM_DEBUG("IH: HPD2\n");
7717 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7718 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7720 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7721 queue_hotplug = true;
7722 DRM_DEBUG("IH: HPD3\n");
7726 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7727 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7729 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7730 queue_hotplug = true;
7731 DRM_DEBUG("IH: HPD4\n");
7735 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7736 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7738 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7739 queue_hotplug = true;
7740 DRM_DEBUG("IH: HPD5\n");
7744 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7745 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7747 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7748 queue_hotplug = true;
7749 DRM_DEBUG("IH: HPD6\n");
7753 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7754 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7756 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7758 DRM_DEBUG("IH: HPD_RX 1\n");
7762 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7763 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7765 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7767 DRM_DEBUG("IH: HPD_RX 2\n");
7771 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7772 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7774 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7776 DRM_DEBUG("IH: HPD_RX 3\n");
7780 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7781 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7783 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7785 DRM_DEBUG("IH: HPD_RX 4\n");
7789 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7790 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7792 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7794 DRM_DEBUG("IH: HPD_RX 5\n");
7798 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7799 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7801 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7803 DRM_DEBUG("IH: HPD_RX 6\n");
7807 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7812 DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7813 WREG32(SRBM_INT_ACK, 0x1);
7816 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7817 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7821 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7822 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7823 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7824 /* reset addr and status */
7825 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7826 if (addr == 0x0 && status == 0x0)
7828 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7829 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
7831 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7833 cik_vm_decode_fault(rdev, status, addr, mc_client);
7836 DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7839 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7842 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7845 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7849 case 176: /* GFX RB CP_INT */
7850 case 177: /* GFX IB CP_INT */
7851 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7853 case 181: /* CP EOP event */
7854 DRM_DEBUG("IH: CP EOP\n");
7855 /* XXX check the bitfield order! */
7856 me_id = (ring_id & 0x60) >> 5;
7857 pipe_id = (ring_id & 0x18) >> 3;
7858 queue_id = (ring_id & 0x7) >> 0;
7861 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7865 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7866 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7867 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7868 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7872 case 184: /* CP Privileged reg access */
7873 DRM_ERROR("Illegal register access in command stream\n");
7874 /* XXX check the bitfield order! */
7875 me_id = (ring_id & 0x60) >> 5;
7876 pipe_id = (ring_id & 0x18) >> 3;
7877 queue_id = (ring_id & 0x7) >> 0;
7880 /* This results in a full GPU reset, but all we need to do is soft
7881 * reset the CP for gfx
7895 case 185: /* CP Privileged inst */
7896 DRM_ERROR("Illegal instruction in command stream\n");
7897 /* XXX check the bitfield order! */
7898 me_id = (ring_id & 0x60) >> 5;
7899 pipe_id = (ring_id & 0x18) >> 3;
7900 queue_id = (ring_id & 0x7) >> 0;
7903 /* This results in a full GPU reset, but all we need to do is soft
7904 * reset the CP for gfx
7918 case 224: /* SDMA trap event */
7919 /* XXX check the bitfield order! */
7920 me_id = (ring_id & 0x3) >> 0;
7921 queue_id = (ring_id & 0xc) >> 2;
7922 DRM_DEBUG("IH: SDMA trap\n");
7927 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7940 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7952 case 230: /* thermal low to high */
7953 DRM_DEBUG("IH: thermal low to high\n");
7954 rdev->pm.dpm.thermal.high_to_low = false;
7955 queue_thermal = true;
7957 case 231: /* thermal high to low */
7958 DRM_DEBUG("IH: thermal high to low\n");
7959 rdev->pm.dpm.thermal.high_to_low = true;
7960 queue_thermal = true;
7962 case 233: /* GUI IDLE */
7963 DRM_DEBUG("IH: GUI idle\n");
7965 case 241: /* SDMA Privileged inst */
7966 case 247: /* SDMA Privileged inst */
7967 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7968 /* XXX check the bitfield order! */
7969 me_id = (ring_id & 0x3) >> 0;
7970 queue_id = (ring_id & 0xc) >> 2;
8005 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8009 /* wptr/rptr are in bytes! */
8011 rptr &= rdev->ih.ptr_mask;
8012 WREG32(IH_RB_RPTR, rptr);
8015 schedule_work(&rdev->dp_work);
8017 schedule_delayed_work(&rdev->hotplug_work, 0);
8019 rdev->needs_reset = true;
8020 wake_up_all(&rdev->fence_queue);
8023 schedule_work(&rdev->pm.dpm.thermal.work);
8024 rdev->ih.rptr = rptr;
8025 atomic_set(&rdev->ih.lock, 0);
8027 /* make sure wptr hasn't changed while processing */
8028 wptr = cik_get_ih_wptr(rdev);
8036 * startup/shutdown callbacks
8038 static void cik_uvd_init(struct radeon_device *rdev)
8045 r = radeon_uvd_init(rdev);
8047 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8049 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8050 * to early fails cik_uvd_start() and thus nothing happens
8051 * there. So it is pointless to try to go through that code
8052 * hence why we disable uvd here.
8057 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8058 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8061 static void cik_uvd_start(struct radeon_device *rdev)
8068 r = radeon_uvd_resume(rdev);
8070 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8073 r = uvd_v4_2_resume(rdev);
8075 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8078 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8080 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8086 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8089 static void cik_uvd_resume(struct radeon_device *rdev)
8091 struct radeon_ring *ring;
8094 if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8097 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8098 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8100 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8103 r = uvd_v1_0_init(rdev);
8105 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8110 static void cik_vce_init(struct radeon_device *rdev)
8117 r = radeon_vce_init(rdev);
8119 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8121 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8122 * to early fails cik_vce_start() and thus nothing happens
8123 * there. So it is pointless to try to go through that code
8124 * hence why we disable vce here.
8129 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8130 r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8131 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8132 r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8135 static void cik_vce_start(struct radeon_device *rdev)
8142 r = radeon_vce_resume(rdev);
8144 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8147 r = vce_v2_0_resume(rdev);
8149 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8152 r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8154 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8157 r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8159 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8165 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8166 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8169 static void cik_vce_resume(struct radeon_device *rdev)
8171 struct radeon_ring *ring;
8174 if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8177 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8178 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8180 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8183 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8184 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8186 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8189 r = vce_v1_0_init(rdev);
8191 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8197 * cik_startup - program the asic to a functional state
8199 * @rdev: radeon_device pointer
8201 * Programs the asic to a functional state (CIK).
8202 * Called by cik_init() and cik_resume().
8203 * Returns 0 for success, error for failure.
8205 static int cik_startup(struct radeon_device *rdev)
8207 struct radeon_ring *ring;
8211 /* enable pcie gen2/3 link */
8212 cik_pcie_gen3_enable(rdev);
8214 cik_program_aspm(rdev);
8216 /* scratch needs to be initialized before MC */
8217 r = r600_vram_scratch_init(rdev);
8221 cik_mc_program(rdev);
8223 if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8224 r = ci_mc_load_microcode(rdev);
8226 DRM_ERROR("Failed to load MC firmware!\n");
8231 r = cik_pcie_gart_enable(rdev);
8236 /* allocate rlc buffers */
8237 if (rdev->flags & RADEON_IS_IGP) {
8238 if (rdev->family == CHIP_KAVERI) {
8239 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8240 rdev->rlc.reg_list_size =
8241 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8243 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8244 rdev->rlc.reg_list_size =
8245 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8248 rdev->rlc.cs_data = ci_cs_data;
8249 rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8250 rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8251 r = sumo_rlc_init(rdev);
8253 DRM_ERROR("Failed to init rlc BOs!\n");
8257 /* allocate wb buffer */
8258 r = radeon_wb_init(rdev);
8262 /* allocate mec buffers */
8263 r = cik_mec_init(rdev);
8265 DRM_ERROR("Failed to init MEC BOs!\n");
8269 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8271 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8275 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8277 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8281 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8283 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8287 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8289 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8293 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8295 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8299 cik_uvd_start(rdev);
8300 cik_vce_start(rdev);
8303 if (!rdev->irq.installed) {
8304 r = radeon_irq_kms_init(rdev);
8309 r = cik_irq_init(rdev);
8311 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8312 radeon_irq_kms_fini(rdev);
8317 if (rdev->family == CHIP_HAWAII) {
8319 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8321 nop = RADEON_CP_PACKET2;
8323 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8326 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8327 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8332 /* set up the compute queues */
8333 /* type-2 packets are deprecated on MEC, use type-3 instead */
8334 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8335 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8339 ring->me = 1; /* first MEC */
8340 ring->pipe = 0; /* first pipe */
8341 ring->queue = 0; /* first queue */
8342 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8344 /* type-2 packets are deprecated on MEC, use type-3 instead */
8345 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8346 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8350 /* dGPU only have 1 MEC */
8351 ring->me = 1; /* first MEC */
8352 ring->pipe = 0; /* first pipe */
8353 ring->queue = 1; /* second queue */
8354 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8356 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8357 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8358 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8362 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8363 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8364 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8368 r = cik_cp_resume(rdev);
8372 r = cik_sdma_resume(rdev);
8376 cik_uvd_resume(rdev);
8377 cik_vce_resume(rdev);
8379 r = radeon_ib_pool_init(rdev);
8381 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8385 r = radeon_vm_manager_init(rdev);
8387 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8391 r = radeon_audio_init(rdev);
8395 r = radeon_kfd_resume(rdev);
8403 * cik_resume - resume the asic to a functional state
8405 * @rdev: radeon_device pointer
8407 * Programs the asic to a functional state (CIK).
8409 * Returns 0 for success, error for failure.
8411 int cik_resume(struct radeon_device *rdev)
8416 atom_asic_init(rdev->mode_info.atom_context);
8418 /* init golden registers */
8419 cik_init_golden_registers(rdev);
8421 if (rdev->pm.pm_method == PM_METHOD_DPM)
8422 radeon_pm_resume(rdev);
8424 rdev->accel_working = true;
8425 r = cik_startup(rdev);
8427 DRM_ERROR("cik startup failed on resume\n");
8428 rdev->accel_working = false;
8437 * cik_suspend - suspend the asic
8439 * @rdev: radeon_device pointer
8441 * Bring the chip into a state suitable for suspend (CIK).
8442 * Called at suspend.
8443 * Returns 0 for success.
8445 int cik_suspend(struct radeon_device *rdev)
8447 radeon_kfd_suspend(rdev);
8448 radeon_pm_suspend(rdev);
8449 radeon_audio_fini(rdev);
8450 radeon_vm_manager_fini(rdev);
8451 cik_cp_enable(rdev, false);
8452 cik_sdma_enable(rdev, false);
8453 if (rdev->has_uvd) {
8454 uvd_v1_0_fini(rdev);
8455 radeon_uvd_suspend(rdev);
8458 radeon_vce_suspend(rdev);
8461 cik_irq_suspend(rdev);
8462 radeon_wb_disable(rdev);
8463 cik_pcie_gart_disable(rdev);
8467 /* Plan is to move initialization in that function and use
8468 * helper function so that radeon_device_init pretty much
8469 * do nothing more than calling asic specific function. This
8470 * should also allow to remove a bunch of callback function
8474 * cik_init - asic specific driver and hw init
8476 * @rdev: radeon_device pointer
8478 * Setup asic specific driver variables and program the hw
8479 * to a functional state (CIK).
8480 * Called at driver startup.
8481 * Returns 0 for success, errors for failure.
8483 int cik_init(struct radeon_device *rdev)
8485 struct radeon_ring *ring;
8489 if (!radeon_get_bios(rdev)) {
8490 if (ASIC_IS_AVIVO(rdev))
8493 /* Must be an ATOMBIOS */
8494 if (!rdev->is_atom_bios) {
8495 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8498 r = radeon_atombios_init(rdev);
8502 /* Post card if necessary */
8503 if (!radeon_card_posted(rdev)) {
8505 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8508 DRM_INFO("GPU not posted. posting now...\n");
8509 atom_asic_init(rdev->mode_info.atom_context);
8511 /* init golden registers */
8512 cik_init_golden_registers(rdev);
8513 /* Initialize scratch registers */
8514 cik_scratch_init(rdev);
8515 /* Initialize surface registers */
8516 radeon_surface_init(rdev);
8517 /* Initialize clocks */
8518 radeon_get_clock_info(rdev->ddev);
8521 r = radeon_fence_driver_init(rdev);
8525 /* initialize memory controller */
8526 r = cik_mc_init(rdev);
8529 /* Memory manager */
8530 r = radeon_bo_init(rdev);
8534 if (rdev->flags & RADEON_IS_IGP) {
8535 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8536 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8537 r = cik_init_microcode(rdev);
8539 DRM_ERROR("Failed to load firmware!\n");
8544 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8545 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8547 r = cik_init_microcode(rdev);
8549 DRM_ERROR("Failed to load firmware!\n");
8555 /* Initialize power management */
8556 radeon_pm_init(rdev);
8558 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8559 ring->ring_obj = NULL;
8560 r600_ring_init(rdev, ring, 1024 * 1024);
8562 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8563 ring->ring_obj = NULL;
8564 r600_ring_init(rdev, ring, 1024 * 1024);
8565 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8569 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8570 ring->ring_obj = NULL;
8571 r600_ring_init(rdev, ring, 1024 * 1024);
8572 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8576 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8577 ring->ring_obj = NULL;
8578 r600_ring_init(rdev, ring, 256 * 1024);
8580 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8581 ring->ring_obj = NULL;
8582 r600_ring_init(rdev, ring, 256 * 1024);
8587 rdev->ih.ring_obj = NULL;
8588 r600_ih_ring_init(rdev, 64 * 1024);
8590 r = r600_pcie_gart_init(rdev);
8594 rdev->accel_working = true;
8595 r = cik_startup(rdev);
8597 dev_err(rdev->dev, "disabling GPU acceleration\n");
8599 cik_sdma_fini(rdev);
8601 sumo_rlc_fini(rdev);
8603 radeon_wb_fini(rdev);
8604 radeon_ib_pool_fini(rdev);
8605 radeon_vm_manager_fini(rdev);
8606 radeon_irq_kms_fini(rdev);
8607 cik_pcie_gart_fini(rdev);
8608 rdev->accel_working = false;
8611 /* Don't start up if the MC ucode is missing.
8612 * The default clocks and voltages before the MC ucode
8613 * is loaded are not suffient for advanced operations.
8615 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8616 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8624 * cik_fini - asic specific driver and hw fini
8626 * @rdev: radeon_device pointer
8628 * Tear down the asic specific driver variables and program the hw
8629 * to an idle state (CIK).
8630 * Called at driver unload.
8632 void cik_fini(struct radeon_device *rdev)
8634 radeon_pm_fini(rdev);
8636 cik_sdma_fini(rdev);
8640 sumo_rlc_fini(rdev);
8642 radeon_wb_fini(rdev);
8643 radeon_vm_manager_fini(rdev);
8644 radeon_ib_pool_fini(rdev);
8645 radeon_irq_kms_fini(rdev);
8646 uvd_v1_0_fini(rdev);
8647 radeon_uvd_fini(rdev);
8648 radeon_vce_fini(rdev);
8649 cik_pcie_gart_fini(rdev);
8650 r600_vram_scratch_fini(rdev);
8651 radeon_gem_fini(rdev);
8652 radeon_fence_driver_fini(rdev);
8653 radeon_bo_fini(rdev);
8654 radeon_atombios_fini(rdev);
8659 void dce8_program_fmt(struct drm_encoder *encoder)
8661 struct drm_device *dev = encoder->dev;
8662 struct radeon_device *rdev = dev->dev_private;
8663 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8664 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8665 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8668 enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8671 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8672 bpc = radeon_get_monitor_bpc(connector);
8673 dither = radeon_connector->dither;
8676 /* LVDS/eDP FMT is set up by atom */
8677 if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8680 /* not needed for analog */
8681 if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8682 (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8690 if (dither == RADEON_FMT_DITHER_ENABLE)
8691 /* XXX sort out optimal dither settings */
8692 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8693 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8695 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8698 if (dither == RADEON_FMT_DITHER_ENABLE)
8699 /* XXX sort out optimal dither settings */
8700 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8701 FMT_RGB_RANDOM_ENABLE |
8702 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8704 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8707 if (dither == RADEON_FMT_DITHER_ENABLE)
8708 /* XXX sort out optimal dither settings */
8709 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8710 FMT_RGB_RANDOM_ENABLE |
8711 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8713 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8720 WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8723 /* display watermark setup */
8725 * dce8_line_buffer_adjust - Set up the line buffer
8727 * @rdev: radeon_device pointer
8728 * @radeon_crtc: the selected display controller
8729 * @mode: the current display mode on the selected display
8732 * Setup up the line buffer allocation for
8733 * the selected display controller (CIK).
8734 * Returns the line buffer size in pixels.
8736 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8737 struct radeon_crtc *radeon_crtc,
8738 struct drm_display_mode *mode)
8740 u32 tmp, buffer_alloc, i;
8741 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8744 * There are 6 line buffers, one for each display controllers.
8745 * There are 3 partitions per LB. Select the number of partitions
8746 * to enable based on the display width. For display widths larger
8747 * than 4096, you need use to use 2 display controllers and combine
8748 * them using the stereo blender.
8750 if (radeon_crtc->base.enabled && mode) {
8751 if (mode->crtc_hdisplay < 1920) {
8754 } else if (mode->crtc_hdisplay < 2560) {
8757 } else if (mode->crtc_hdisplay < 4096) {
8759 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8761 DRM_DEBUG_KMS("Mode too big for LB!\n");
8763 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8770 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8771 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8773 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8774 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8775 for (i = 0; i < rdev->usec_timeout; i++) {
8776 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8777 DMIF_BUFFERS_ALLOCATED_COMPLETED)
8782 if (radeon_crtc->base.enabled && mode) {
8794 /* controller not enabled, so no lb used */
8799 * cik_get_number_of_dram_channels - get the number of dram channels
8801 * @rdev: radeon_device pointer
8803 * Look up the number of video ram channels (CIK).
8804 * Used for display watermark bandwidth calculations
8805 * Returns the number of dram channels
8807 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8809 u32 tmp = RREG32(MC_SHARED_CHMAP);
8811 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8834 struct dce8_wm_params {
8835 u32 dram_channels; /* number of dram channels */
8836 u32 yclk; /* bandwidth per dram data pin in kHz */
8837 u32 sclk; /* engine clock in kHz */
8838 u32 disp_clk; /* display clock in kHz */
8839 u32 src_width; /* viewport width */
8840 u32 active_time; /* active display time in ns */
8841 u32 blank_time; /* blank time in ns */
8842 bool interlaced; /* mode is interlaced */
8843 fixed20_12 vsc; /* vertical scale ratio */
8844 u32 num_heads; /* number of active crtcs */
8845 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8846 u32 lb_size; /* line buffer allocated to pipe */
8847 u32 vtaps; /* vertical scaler taps */
8851 * dce8_dram_bandwidth - get the dram bandwidth
8853 * @wm: watermark calculation data
8855 * Calculate the raw dram bandwidth (CIK).
8856 * Used for display watermark bandwidth calculations
8857 * Returns the dram bandwidth in MBytes/s
8859 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8861 /* Calculate raw DRAM Bandwidth */
8862 fixed20_12 dram_efficiency; /* 0.7 */
8863 fixed20_12 yclk, dram_channels, bandwidth;
8866 a.full = dfixed_const(1000);
8867 yclk.full = dfixed_const(wm->yclk);
8868 yclk.full = dfixed_div(yclk, a);
8869 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8870 a.full = dfixed_const(10);
8871 dram_efficiency.full = dfixed_const(7);
8872 dram_efficiency.full = dfixed_div(dram_efficiency, a);
8873 bandwidth.full = dfixed_mul(dram_channels, yclk);
8874 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8876 return dfixed_trunc(bandwidth);
8880 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8882 * @wm: watermark calculation data
8884 * Calculate the dram bandwidth used for display (CIK).
8885 * Used for display watermark bandwidth calculations
8886 * Returns the dram bandwidth for display in MBytes/s
8888 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8890 /* Calculate DRAM Bandwidth and the part allocated to display. */
8891 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8892 fixed20_12 yclk, dram_channels, bandwidth;
8895 a.full = dfixed_const(1000);
8896 yclk.full = dfixed_const(wm->yclk);
8897 yclk.full = dfixed_div(yclk, a);
8898 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8899 a.full = dfixed_const(10);
8900 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8901 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8902 bandwidth.full = dfixed_mul(dram_channels, yclk);
8903 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8905 return dfixed_trunc(bandwidth);
8909 * dce8_data_return_bandwidth - get the data return bandwidth
8911 * @wm: watermark calculation data
8913 * Calculate the data return bandwidth used for display (CIK).
8914 * Used for display watermark bandwidth calculations
8915 * Returns the data return bandwidth in MBytes/s
8917 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8919 /* Calculate the display Data return Bandwidth */
8920 fixed20_12 return_efficiency; /* 0.8 */
8921 fixed20_12 sclk, bandwidth;
8924 a.full = dfixed_const(1000);
8925 sclk.full = dfixed_const(wm->sclk);
8926 sclk.full = dfixed_div(sclk, a);
8927 a.full = dfixed_const(10);
8928 return_efficiency.full = dfixed_const(8);
8929 return_efficiency.full = dfixed_div(return_efficiency, a);
8930 a.full = dfixed_const(32);
8931 bandwidth.full = dfixed_mul(a, sclk);
8932 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8934 return dfixed_trunc(bandwidth);
8938 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8940 * @wm: watermark calculation data
8942 * Calculate the dmif bandwidth used for display (CIK).
8943 * Used for display watermark bandwidth calculations
8944 * Returns the dmif bandwidth in MBytes/s
8946 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8948 /* Calculate the DMIF Request Bandwidth */
8949 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8950 fixed20_12 disp_clk, bandwidth;
8953 a.full = dfixed_const(1000);
8954 disp_clk.full = dfixed_const(wm->disp_clk);
8955 disp_clk.full = dfixed_div(disp_clk, a);
8956 a.full = dfixed_const(32);
8957 b.full = dfixed_mul(a, disp_clk);
8959 a.full = dfixed_const(10);
8960 disp_clk_request_efficiency.full = dfixed_const(8);
8961 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8963 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8965 return dfixed_trunc(bandwidth);
8969 * dce8_available_bandwidth - get the min available bandwidth
8971 * @wm: watermark calculation data
8973 * Calculate the min available bandwidth used for display (CIK).
8974 * Used for display watermark bandwidth calculations
8975 * Returns the min available bandwidth in MBytes/s
8977 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8979 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8980 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8981 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8982 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8984 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8988 * dce8_average_bandwidth - get the average available bandwidth
8990 * @wm: watermark calculation data
8992 * Calculate the average available bandwidth used for display (CIK).
8993 * Used for display watermark bandwidth calculations
8994 * Returns the average available bandwidth in MBytes/s
8996 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8998 /* Calculate the display mode Average Bandwidth
8999 * DisplayMode should contain the source and destination dimensions,
9003 fixed20_12 line_time;
9004 fixed20_12 src_width;
9005 fixed20_12 bandwidth;
9008 a.full = dfixed_const(1000);
9009 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9010 line_time.full = dfixed_div(line_time, a);
9011 bpp.full = dfixed_const(wm->bytes_per_pixel);
9012 src_width.full = dfixed_const(wm->src_width);
9013 bandwidth.full = dfixed_mul(src_width, bpp);
9014 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9015 bandwidth.full = dfixed_div(bandwidth, line_time);
9017 return dfixed_trunc(bandwidth);
9021 * dce8_latency_watermark - get the latency watermark
9023 * @wm: watermark calculation data
9025 * Calculate the latency watermark (CIK).
9026 * Used for display watermark bandwidth calculations
9027 * Returns the latency watermark in ns
9029 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9031 /* First calculate the latency in ns */
9032 u32 mc_latency = 2000; /* 2000 ns. */
9033 u32 available_bandwidth = dce8_available_bandwidth(wm);
9034 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9035 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9036 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9037 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9038 (wm->num_heads * cursor_line_pair_return_time);
9039 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9040 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9041 u32 tmp, dmif_size = 12288;
9044 if (wm->num_heads == 0)
9047 a.full = dfixed_const(2);
9048 b.full = dfixed_const(1);
9049 if ((wm->vsc.full > a.full) ||
9050 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9052 ((wm->vsc.full >= a.full) && wm->interlaced))
9053 max_src_lines_per_dst_line = 4;
9055 max_src_lines_per_dst_line = 2;
9057 a.full = dfixed_const(available_bandwidth);
9058 b.full = dfixed_const(wm->num_heads);
9059 a.full = dfixed_div(a, b);
9061 b.full = dfixed_const(mc_latency + 512);
9062 c.full = dfixed_const(wm->disp_clk);
9063 b.full = dfixed_div(b, c);
9065 c.full = dfixed_const(dmif_size);
9066 b.full = dfixed_div(c, b);
9068 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9070 b.full = dfixed_const(1000);
9071 c.full = dfixed_const(wm->disp_clk);
9072 b.full = dfixed_div(c, b);
9073 c.full = dfixed_const(wm->bytes_per_pixel);
9074 b.full = dfixed_mul(b, c);
9076 lb_fill_bw = min(tmp, dfixed_trunc(b));
9078 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9079 b.full = dfixed_const(1000);
9080 c.full = dfixed_const(lb_fill_bw);
9081 b.full = dfixed_div(c, b);
9082 a.full = dfixed_div(a, b);
9083 line_fill_time = dfixed_trunc(a);
9085 if (line_fill_time < wm->active_time)
9088 return latency + (line_fill_time - wm->active_time);
9093 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9094 * average and available dram bandwidth
9096 * @wm: watermark calculation data
9098 * Check if the display average bandwidth fits in the display
9099 * dram bandwidth (CIK).
9100 * Used for display watermark bandwidth calculations
9101 * Returns true if the display fits, false if not.
9103 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9105 if (dce8_average_bandwidth(wm) <=
9106 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9113 * dce8_average_bandwidth_vs_available_bandwidth - check
9114 * average and available bandwidth
9116 * @wm: watermark calculation data
9118 * Check if the display average bandwidth fits in the display
9119 * available bandwidth (CIK).
9120 * Used for display watermark bandwidth calculations
9121 * Returns true if the display fits, false if not.
9123 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9125 if (dce8_average_bandwidth(wm) <=
9126 (dce8_available_bandwidth(wm) / wm->num_heads))
9133 * dce8_check_latency_hiding - check latency hiding
9135 * @wm: watermark calculation data
9137 * Check latency hiding (CIK).
9138 * Used for display watermark bandwidth calculations
9139 * Returns true if the display fits, false if not.
9141 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9143 u32 lb_partitions = wm->lb_size / wm->src_width;
9144 u32 line_time = wm->active_time + wm->blank_time;
9145 u32 latency_tolerant_lines;
9149 a.full = dfixed_const(1);
9150 if (wm->vsc.full > a.full)
9151 latency_tolerant_lines = 1;
9153 if (lb_partitions <= (wm->vtaps + 1))
9154 latency_tolerant_lines = 1;
9156 latency_tolerant_lines = 2;
9159 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9161 if (dce8_latency_watermark(wm) <= latency_hiding)
9168 * dce8_program_watermarks - program display watermarks
9170 * @rdev: radeon_device pointer
9171 * @radeon_crtc: the selected display controller
9172 * @lb_size: line buffer size
9173 * @num_heads: number of display controllers in use
9175 * Calculate and program the display watermarks for the
9176 * selected display controller (CIK).
9178 static void dce8_program_watermarks(struct radeon_device *rdev,
9179 struct radeon_crtc *radeon_crtc,
9180 u32 lb_size, u32 num_heads)
9182 struct drm_display_mode *mode = &radeon_crtc->base.mode;
9183 struct dce8_wm_params wm_low, wm_high;
9186 u32 latency_watermark_a = 0, latency_watermark_b = 0;
9189 if (radeon_crtc->base.enabled && num_heads && mode) {
9190 pixel_period = 1000000 / (u32)mode->clock;
9191 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9193 /* watermark for high clocks */
9194 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9195 rdev->pm.dpm_enabled) {
9197 radeon_dpm_get_mclk(rdev, false) * 10;
9199 radeon_dpm_get_sclk(rdev, false) * 10;
9201 wm_high.yclk = rdev->pm.current_mclk * 10;
9202 wm_high.sclk = rdev->pm.current_sclk * 10;
9205 wm_high.disp_clk = mode->clock;
9206 wm_high.src_width = mode->crtc_hdisplay;
9207 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9208 wm_high.blank_time = line_time - wm_high.active_time;
9209 wm_high.interlaced = false;
9210 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9211 wm_high.interlaced = true;
9212 wm_high.vsc = radeon_crtc->vsc;
9214 if (radeon_crtc->rmx_type != RMX_OFF)
9216 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9217 wm_high.lb_size = lb_size;
9218 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9219 wm_high.num_heads = num_heads;
9221 /* set for high clocks */
9222 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9224 /* possibly force display priority to high */
9225 /* should really do this at mode validation time... */
9226 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9227 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9228 !dce8_check_latency_hiding(&wm_high) ||
9229 (rdev->disp_priority == 2)) {
9230 DRM_DEBUG_KMS("force priority to high\n");
9233 /* watermark for low clocks */
9234 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9235 rdev->pm.dpm_enabled) {
9237 radeon_dpm_get_mclk(rdev, true) * 10;
9239 radeon_dpm_get_sclk(rdev, true) * 10;
9241 wm_low.yclk = rdev->pm.current_mclk * 10;
9242 wm_low.sclk = rdev->pm.current_sclk * 10;
9245 wm_low.disp_clk = mode->clock;
9246 wm_low.src_width = mode->crtc_hdisplay;
9247 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9248 wm_low.blank_time = line_time - wm_low.active_time;
9249 wm_low.interlaced = false;
9250 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9251 wm_low.interlaced = true;
9252 wm_low.vsc = radeon_crtc->vsc;
9254 if (radeon_crtc->rmx_type != RMX_OFF)
9256 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9257 wm_low.lb_size = lb_size;
9258 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9259 wm_low.num_heads = num_heads;
9261 /* set for low clocks */
9262 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9264 /* possibly force display priority to high */
9265 /* should really do this at mode validation time... */
9266 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9267 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9268 !dce8_check_latency_hiding(&wm_low) ||
9269 (rdev->disp_priority == 2)) {
9270 DRM_DEBUG_KMS("force priority to high\n");
9273 /* Save number of lines the linebuffer leads before the scanout */
9274 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9278 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9280 tmp &= ~LATENCY_WATERMARK_MASK(3);
9281 tmp |= LATENCY_WATERMARK_MASK(1);
9282 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9283 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9284 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9285 LATENCY_HIGH_WATERMARK(line_time)));
9287 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9288 tmp &= ~LATENCY_WATERMARK_MASK(3);
9289 tmp |= LATENCY_WATERMARK_MASK(2);
9290 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9291 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9292 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9293 LATENCY_HIGH_WATERMARK(line_time)));
9294 /* restore original selection */
9295 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9297 /* save values for DPM */
9298 radeon_crtc->line_time = line_time;
9299 radeon_crtc->wm_high = latency_watermark_a;
9300 radeon_crtc->wm_low = latency_watermark_b;
9304 * dce8_bandwidth_update - program display watermarks
9306 * @rdev: radeon_device pointer
9308 * Calculate and program the display watermarks and line
9309 * buffer allocation (CIK).
9311 void dce8_bandwidth_update(struct radeon_device *rdev)
9313 struct drm_display_mode *mode = NULL;
9314 u32 num_heads = 0, lb_size;
9317 if (!rdev->mode_info.mode_config_initialized)
9320 radeon_update_display_priority(rdev);
9322 for (i = 0; i < rdev->num_crtc; i++) {
9323 if (rdev->mode_info.crtcs[i]->base.enabled)
9326 for (i = 0; i < rdev->num_crtc; i++) {
9327 mode = &rdev->mode_info.crtcs[i]->base.mode;
9328 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9329 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9334 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9336 * @rdev: radeon_device pointer
9338 * Fetches a GPU clock counter snapshot (SI).
9339 * Returns the 64 bit clock counter snapshot.
9341 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9345 mutex_lock(&rdev->gpu_clock_mutex);
9346 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9347 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9348 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9349 mutex_unlock(&rdev->gpu_clock_mutex);
9353 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9354 u32 cntl_reg, u32 status_reg)
9357 struct atom_clock_dividers dividers;
9360 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9361 clock, false, ÷rs);
9365 tmp = RREG32_SMC(cntl_reg);
9366 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9367 tmp |= dividers.post_divider;
9368 WREG32_SMC(cntl_reg, tmp);
9370 for (i = 0; i < 100; i++) {
9371 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9381 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9385 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9389 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9393 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9396 struct atom_clock_dividers dividers;
9399 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9400 ecclk, false, ÷rs);
9404 for (i = 0; i < 100; i++) {
9405 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9412 tmp = RREG32_SMC(CG_ECLK_CNTL);
9413 tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9414 tmp |= dividers.post_divider;
9415 WREG32_SMC(CG_ECLK_CNTL, tmp);
9417 for (i = 0; i < 100; i++) {
9418 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9428 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9430 struct pci_dev *root = rdev->pdev->bus->self;
9431 int bridge_pos, gpu_pos;
9432 u32 speed_cntl, mask, current_data_rate;
9436 if (pci_is_root_bus(rdev->pdev->bus))
9439 if (radeon_pcie_gen2 == 0)
9442 if (rdev->flags & RADEON_IS_IGP)
9445 if (!(rdev->flags & RADEON_IS_PCIE))
9448 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9452 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9455 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9456 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9457 LC_CURRENT_DATA_RATE_SHIFT;
9458 if (mask & DRM_PCIE_SPEED_80) {
9459 if (current_data_rate == 2) {
9460 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9463 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9464 } else if (mask & DRM_PCIE_SPEED_50) {
9465 if (current_data_rate == 1) {
9466 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9469 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9472 bridge_pos = pci_pcie_cap(root);
9476 gpu_pos = pci_pcie_cap(rdev->pdev);
9480 if (mask & DRM_PCIE_SPEED_80) {
9481 /* re-try equalization if gen3 is not already enabled */
9482 if (current_data_rate != 2) {
9483 u16 bridge_cfg, gpu_cfg;
9484 u16 bridge_cfg2, gpu_cfg2;
9485 u32 max_lw, current_lw, tmp;
9487 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9488 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9490 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9491 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9493 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9494 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9496 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9497 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9498 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9500 if (current_lw < max_lw) {
9501 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9502 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9503 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9504 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9505 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9506 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9510 for (i = 0; i < 10; i++) {
9512 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9513 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9516 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9517 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9519 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9520 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9522 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9523 tmp |= LC_SET_QUIESCE;
9524 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9526 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9528 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9533 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9534 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9535 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9536 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9538 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9539 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9540 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9541 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9544 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9545 tmp16 &= ~((1 << 4) | (7 << 9));
9546 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9547 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9549 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9550 tmp16 &= ~((1 << 4) | (7 << 9));
9551 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9552 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9554 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9555 tmp &= ~LC_SET_QUIESCE;
9556 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9561 /* set the link speed */
9562 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9563 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9564 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9566 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9568 if (mask & DRM_PCIE_SPEED_80)
9569 tmp16 |= 3; /* gen3 */
9570 else if (mask & DRM_PCIE_SPEED_50)
9571 tmp16 |= 2; /* gen2 */
9573 tmp16 |= 1; /* gen1 */
9574 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9576 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9577 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9578 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9580 for (i = 0; i < rdev->usec_timeout; i++) {
9581 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9582 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9588 static void cik_program_aspm(struct radeon_device *rdev)
9591 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9592 bool disable_clkreq = false;
9594 if (radeon_aspm == 0)
9597 /* XXX double check IGPs */
9598 if (rdev->flags & RADEON_IS_IGP)
9601 if (!(rdev->flags & RADEON_IS_PCIE))
9604 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9605 data &= ~LC_XMIT_N_FTS_MASK;
9606 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9608 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9610 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9611 data |= LC_GO_TO_RECOVERY;
9613 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9615 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9616 data |= P_IGNORE_EDB_ERR;
9618 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9620 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9621 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9622 data |= LC_PMI_TO_L1_DIS;
9624 data |= LC_L0S_INACTIVITY(7);
9627 data |= LC_L1_INACTIVITY(7);
9628 data &= ~LC_PMI_TO_L1_DIS;
9630 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9632 if (!disable_plloff_in_l1) {
9633 bool clk_req_support;
9635 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9636 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9637 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9639 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9641 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9642 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9643 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9645 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9647 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9648 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9649 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9651 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9653 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9654 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9655 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9657 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9659 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9660 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9661 data |= LC_DYN_LANES_PWR_STATE(3);
9663 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9665 if (!disable_clkreq &&
9666 !pci_is_root_bus(rdev->pdev->bus)) {
9667 struct pci_dev *root = rdev->pdev->bus->self;
9670 clk_req_support = false;
9671 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9672 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9673 clk_req_support = true;
9675 clk_req_support = false;
9678 if (clk_req_support) {
9679 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9680 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9682 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9684 orig = data = RREG32_SMC(THM_CLK_CNTL);
9685 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9686 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9688 WREG32_SMC(THM_CLK_CNTL, data);
9690 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9691 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9692 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9694 WREG32_SMC(MISC_CLK_CTRL, data);
9696 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9697 data &= ~BCLK_AS_XCLK;
9699 WREG32_SMC(CG_CLKPIN_CNTL, data);
9701 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9702 data &= ~FORCE_BIF_REFCLK_EN;
9704 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9706 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9707 data &= ~MPLL_CLKOUT_SEL_MASK;
9708 data |= MPLL_CLKOUT_SEL(4);
9710 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9715 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9718 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9719 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9721 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9724 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9725 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9726 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9727 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9728 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9729 data &= ~LC_L0S_INACTIVITY_MASK;
9731 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);