2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
40 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
41 extern void r600_ih_ring_fini(struct radeon_device *rdev);
42 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
43 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
44 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
45 extern void sumo_rlc_fini(struct radeon_device *rdev);
46 extern int sumo_rlc_init(struct radeon_device *rdev);
47 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
48 extern void si_rlc_reset(struct radeon_device *rdev);
49 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
50 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
51 extern int cik_sdma_resume(struct radeon_device *rdev);
52 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
53 extern void cik_sdma_fini(struct radeon_device *rdev);
54 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
55 static void cik_rlc_stop(struct radeon_device *rdev);
56 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
57 static void cik_program_aspm(struct radeon_device *rdev);
58 static void cik_init_pg(struct radeon_device *rdev);
59 static void cik_init_cg(struct radeon_device *rdev);
60 static void cik_fini_pg(struct radeon_device *rdev);
61 static void cik_fini_cg(struct radeon_device *rdev);
62 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
66 * cik_get_allowed_info_register - fetch the register for the info ioctl
68 * @rdev: radeon_device pointer
69 * @reg: register offset in bytes
70 * @val: register value
72 * Returns 0 for success or -EINVAL for an invalid register
75 int cik_get_allowed_info_register(struct radeon_device *rdev,
87 case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
88 case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
99 * Indirect registers accessor
101 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
106 spin_lock_irqsave(&rdev->didt_idx_lock, flags);
107 WREG32(CIK_DIDT_IND_INDEX, (reg));
108 r = RREG32(CIK_DIDT_IND_DATA);
109 spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
113 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
117 spin_lock_irqsave(&rdev->didt_idx_lock, flags);
118 WREG32(CIK_DIDT_IND_INDEX, (reg));
119 WREG32(CIK_DIDT_IND_DATA, (v));
120 spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
123 /* get temperature in millidegrees */
124 int ci_get_temp(struct radeon_device *rdev)
129 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
135 actual_temp = temp & 0x1ff;
137 actual_temp = actual_temp * 1000;
142 /* get temperature in millidegrees */
143 int kv_get_temp(struct radeon_device *rdev)
148 temp = RREG32_SMC(0xC0300E0C);
151 actual_temp = (temp / 8) - 49;
155 actual_temp = actual_temp * 1000;
161 * Indirect registers accessor
163 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
168 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
169 WREG32(PCIE_INDEX, reg);
170 (void)RREG32(PCIE_INDEX);
171 r = RREG32(PCIE_DATA);
172 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
176 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
180 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
181 WREG32(PCIE_INDEX, reg);
182 (void)RREG32(PCIE_INDEX);
183 WREG32(PCIE_DATA, v);
184 (void)RREG32(PCIE_DATA);
185 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
188 static const u32 spectre_rlc_save_restore_register_list[] =
190 (0x0e00 << 16) | (0xc12c >> 2),
192 (0x0e00 << 16) | (0xc140 >> 2),
194 (0x0e00 << 16) | (0xc150 >> 2),
196 (0x0e00 << 16) | (0xc15c >> 2),
198 (0x0e00 << 16) | (0xc168 >> 2),
200 (0x0e00 << 16) | (0xc170 >> 2),
202 (0x0e00 << 16) | (0xc178 >> 2),
204 (0x0e00 << 16) | (0xc204 >> 2),
206 (0x0e00 << 16) | (0xc2b4 >> 2),
208 (0x0e00 << 16) | (0xc2b8 >> 2),
210 (0x0e00 << 16) | (0xc2bc >> 2),
212 (0x0e00 << 16) | (0xc2c0 >> 2),
214 (0x0e00 << 16) | (0x8228 >> 2),
216 (0x0e00 << 16) | (0x829c >> 2),
218 (0x0e00 << 16) | (0x869c >> 2),
220 (0x0600 << 16) | (0x98f4 >> 2),
222 (0x0e00 << 16) | (0x98f8 >> 2),
224 (0x0e00 << 16) | (0x9900 >> 2),
226 (0x0e00 << 16) | (0xc260 >> 2),
228 (0x0e00 << 16) | (0x90e8 >> 2),
230 (0x0e00 << 16) | (0x3c000 >> 2),
232 (0x0e00 << 16) | (0x3c00c >> 2),
234 (0x0e00 << 16) | (0x8c1c >> 2),
236 (0x0e00 << 16) | (0x9700 >> 2),
238 (0x0e00 << 16) | (0xcd20 >> 2),
240 (0x4e00 << 16) | (0xcd20 >> 2),
242 (0x5e00 << 16) | (0xcd20 >> 2),
244 (0x6e00 << 16) | (0xcd20 >> 2),
246 (0x7e00 << 16) | (0xcd20 >> 2),
248 (0x8e00 << 16) | (0xcd20 >> 2),
250 (0x9e00 << 16) | (0xcd20 >> 2),
252 (0xae00 << 16) | (0xcd20 >> 2),
254 (0xbe00 << 16) | (0xcd20 >> 2),
256 (0x0e00 << 16) | (0x89bc >> 2),
258 (0x0e00 << 16) | (0x8900 >> 2),
261 (0x0e00 << 16) | (0xc130 >> 2),
263 (0x0e00 << 16) | (0xc134 >> 2),
265 (0x0e00 << 16) | (0xc1fc >> 2),
267 (0x0e00 << 16) | (0xc208 >> 2),
269 (0x0e00 << 16) | (0xc264 >> 2),
271 (0x0e00 << 16) | (0xc268 >> 2),
273 (0x0e00 << 16) | (0xc26c >> 2),
275 (0x0e00 << 16) | (0xc270 >> 2),
277 (0x0e00 << 16) | (0xc274 >> 2),
279 (0x0e00 << 16) | (0xc278 >> 2),
281 (0x0e00 << 16) | (0xc27c >> 2),
283 (0x0e00 << 16) | (0xc280 >> 2),
285 (0x0e00 << 16) | (0xc284 >> 2),
287 (0x0e00 << 16) | (0xc288 >> 2),
289 (0x0e00 << 16) | (0xc28c >> 2),
291 (0x0e00 << 16) | (0xc290 >> 2),
293 (0x0e00 << 16) | (0xc294 >> 2),
295 (0x0e00 << 16) | (0xc298 >> 2),
297 (0x0e00 << 16) | (0xc29c >> 2),
299 (0x0e00 << 16) | (0xc2a0 >> 2),
301 (0x0e00 << 16) | (0xc2a4 >> 2),
303 (0x0e00 << 16) | (0xc2a8 >> 2),
305 (0x0e00 << 16) | (0xc2ac >> 2),
307 (0x0e00 << 16) | (0xc2b0 >> 2),
309 (0x0e00 << 16) | (0x301d0 >> 2),
311 (0x0e00 << 16) | (0x30238 >> 2),
313 (0x0e00 << 16) | (0x30250 >> 2),
315 (0x0e00 << 16) | (0x30254 >> 2),
317 (0x0e00 << 16) | (0x30258 >> 2),
319 (0x0e00 << 16) | (0x3025c >> 2),
321 (0x4e00 << 16) | (0xc900 >> 2),
323 (0x5e00 << 16) | (0xc900 >> 2),
325 (0x6e00 << 16) | (0xc900 >> 2),
327 (0x7e00 << 16) | (0xc900 >> 2),
329 (0x8e00 << 16) | (0xc900 >> 2),
331 (0x9e00 << 16) | (0xc900 >> 2),
333 (0xae00 << 16) | (0xc900 >> 2),
335 (0xbe00 << 16) | (0xc900 >> 2),
337 (0x4e00 << 16) | (0xc904 >> 2),
339 (0x5e00 << 16) | (0xc904 >> 2),
341 (0x6e00 << 16) | (0xc904 >> 2),
343 (0x7e00 << 16) | (0xc904 >> 2),
345 (0x8e00 << 16) | (0xc904 >> 2),
347 (0x9e00 << 16) | (0xc904 >> 2),
349 (0xae00 << 16) | (0xc904 >> 2),
351 (0xbe00 << 16) | (0xc904 >> 2),
353 (0x4e00 << 16) | (0xc908 >> 2),
355 (0x5e00 << 16) | (0xc908 >> 2),
357 (0x6e00 << 16) | (0xc908 >> 2),
359 (0x7e00 << 16) | (0xc908 >> 2),
361 (0x8e00 << 16) | (0xc908 >> 2),
363 (0x9e00 << 16) | (0xc908 >> 2),
365 (0xae00 << 16) | (0xc908 >> 2),
367 (0xbe00 << 16) | (0xc908 >> 2),
369 (0x4e00 << 16) | (0xc90c >> 2),
371 (0x5e00 << 16) | (0xc90c >> 2),
373 (0x6e00 << 16) | (0xc90c >> 2),
375 (0x7e00 << 16) | (0xc90c >> 2),
377 (0x8e00 << 16) | (0xc90c >> 2),
379 (0x9e00 << 16) | (0xc90c >> 2),
381 (0xae00 << 16) | (0xc90c >> 2),
383 (0xbe00 << 16) | (0xc90c >> 2),
385 (0x4e00 << 16) | (0xc910 >> 2),
387 (0x5e00 << 16) | (0xc910 >> 2),
389 (0x6e00 << 16) | (0xc910 >> 2),
391 (0x7e00 << 16) | (0xc910 >> 2),
393 (0x8e00 << 16) | (0xc910 >> 2),
395 (0x9e00 << 16) | (0xc910 >> 2),
397 (0xae00 << 16) | (0xc910 >> 2),
399 (0xbe00 << 16) | (0xc910 >> 2),
401 (0x0e00 << 16) | (0xc99c >> 2),
403 (0x0e00 << 16) | (0x9834 >> 2),
405 (0x0000 << 16) | (0x30f00 >> 2),
407 (0x0001 << 16) | (0x30f00 >> 2),
409 (0x0000 << 16) | (0x30f04 >> 2),
411 (0x0001 << 16) | (0x30f04 >> 2),
413 (0x0000 << 16) | (0x30f08 >> 2),
415 (0x0001 << 16) | (0x30f08 >> 2),
417 (0x0000 << 16) | (0x30f0c >> 2),
419 (0x0001 << 16) | (0x30f0c >> 2),
421 (0x0600 << 16) | (0x9b7c >> 2),
423 (0x0e00 << 16) | (0x8a14 >> 2),
425 (0x0e00 << 16) | (0x8a18 >> 2),
427 (0x0600 << 16) | (0x30a00 >> 2),
429 (0x0e00 << 16) | (0x8bf0 >> 2),
431 (0x0e00 << 16) | (0x8bcc >> 2),
433 (0x0e00 << 16) | (0x8b24 >> 2),
435 (0x0e00 << 16) | (0x30a04 >> 2),
437 (0x0600 << 16) | (0x30a10 >> 2),
439 (0x0600 << 16) | (0x30a14 >> 2),
441 (0x0600 << 16) | (0x30a18 >> 2),
443 (0x0600 << 16) | (0x30a2c >> 2),
445 (0x0e00 << 16) | (0xc700 >> 2),
447 (0x0e00 << 16) | (0xc704 >> 2),
449 (0x0e00 << 16) | (0xc708 >> 2),
451 (0x0e00 << 16) | (0xc768 >> 2),
453 (0x0400 << 16) | (0xc770 >> 2),
455 (0x0400 << 16) | (0xc774 >> 2),
457 (0x0400 << 16) | (0xc778 >> 2),
459 (0x0400 << 16) | (0xc77c >> 2),
461 (0x0400 << 16) | (0xc780 >> 2),
463 (0x0400 << 16) | (0xc784 >> 2),
465 (0x0400 << 16) | (0xc788 >> 2),
467 (0x0400 << 16) | (0xc78c >> 2),
469 (0x0400 << 16) | (0xc798 >> 2),
471 (0x0400 << 16) | (0xc79c >> 2),
473 (0x0400 << 16) | (0xc7a0 >> 2),
475 (0x0400 << 16) | (0xc7a4 >> 2),
477 (0x0400 << 16) | (0xc7a8 >> 2),
479 (0x0400 << 16) | (0xc7ac >> 2),
481 (0x0400 << 16) | (0xc7b0 >> 2),
483 (0x0400 << 16) | (0xc7b4 >> 2),
485 (0x0e00 << 16) | (0x9100 >> 2),
487 (0x0e00 << 16) | (0x3c010 >> 2),
489 (0x0e00 << 16) | (0x92a8 >> 2),
491 (0x0e00 << 16) | (0x92ac >> 2),
493 (0x0e00 << 16) | (0x92b4 >> 2),
495 (0x0e00 << 16) | (0x92b8 >> 2),
497 (0x0e00 << 16) | (0x92bc >> 2),
499 (0x0e00 << 16) | (0x92c0 >> 2),
501 (0x0e00 << 16) | (0x92c4 >> 2),
503 (0x0e00 << 16) | (0x92c8 >> 2),
505 (0x0e00 << 16) | (0x92cc >> 2),
507 (0x0e00 << 16) | (0x92d0 >> 2),
509 (0x0e00 << 16) | (0x8c00 >> 2),
511 (0x0e00 << 16) | (0x8c04 >> 2),
513 (0x0e00 << 16) | (0x8c20 >> 2),
515 (0x0e00 << 16) | (0x8c38 >> 2),
517 (0x0e00 << 16) | (0x8c3c >> 2),
519 (0x0e00 << 16) | (0xae00 >> 2),
521 (0x0e00 << 16) | (0x9604 >> 2),
523 (0x0e00 << 16) | (0xac08 >> 2),
525 (0x0e00 << 16) | (0xac0c >> 2),
527 (0x0e00 << 16) | (0xac10 >> 2),
529 (0x0e00 << 16) | (0xac14 >> 2),
531 (0x0e00 << 16) | (0xac58 >> 2),
533 (0x0e00 << 16) | (0xac68 >> 2),
535 (0x0e00 << 16) | (0xac6c >> 2),
537 (0x0e00 << 16) | (0xac70 >> 2),
539 (0x0e00 << 16) | (0xac74 >> 2),
541 (0x0e00 << 16) | (0xac78 >> 2),
543 (0x0e00 << 16) | (0xac7c >> 2),
545 (0x0e00 << 16) | (0xac80 >> 2),
547 (0x0e00 << 16) | (0xac84 >> 2),
549 (0x0e00 << 16) | (0xac88 >> 2),
551 (0x0e00 << 16) | (0xac8c >> 2),
553 (0x0e00 << 16) | (0x970c >> 2),
555 (0x0e00 << 16) | (0x9714 >> 2),
557 (0x0e00 << 16) | (0x9718 >> 2),
559 (0x0e00 << 16) | (0x971c >> 2),
561 (0x0e00 << 16) | (0x31068 >> 2),
563 (0x4e00 << 16) | (0x31068 >> 2),
565 (0x5e00 << 16) | (0x31068 >> 2),
567 (0x6e00 << 16) | (0x31068 >> 2),
569 (0x7e00 << 16) | (0x31068 >> 2),
571 (0x8e00 << 16) | (0x31068 >> 2),
573 (0x9e00 << 16) | (0x31068 >> 2),
575 (0xae00 << 16) | (0x31068 >> 2),
577 (0xbe00 << 16) | (0x31068 >> 2),
579 (0x0e00 << 16) | (0xcd10 >> 2),
581 (0x0e00 << 16) | (0xcd14 >> 2),
583 (0x0e00 << 16) | (0x88b0 >> 2),
585 (0x0e00 << 16) | (0x88b4 >> 2),
587 (0x0e00 << 16) | (0x88b8 >> 2),
589 (0x0e00 << 16) | (0x88bc >> 2),
591 (0x0400 << 16) | (0x89c0 >> 2),
593 (0x0e00 << 16) | (0x88c4 >> 2),
595 (0x0e00 << 16) | (0x88c8 >> 2),
597 (0x0e00 << 16) | (0x88d0 >> 2),
599 (0x0e00 << 16) | (0x88d4 >> 2),
601 (0x0e00 << 16) | (0x88d8 >> 2),
603 (0x0e00 << 16) | (0x8980 >> 2),
605 (0x0e00 << 16) | (0x30938 >> 2),
607 (0x0e00 << 16) | (0x3093c >> 2),
609 (0x0e00 << 16) | (0x30940 >> 2),
611 (0x0e00 << 16) | (0x89a0 >> 2),
613 (0x0e00 << 16) | (0x30900 >> 2),
615 (0x0e00 << 16) | (0x30904 >> 2),
617 (0x0e00 << 16) | (0x89b4 >> 2),
619 (0x0e00 << 16) | (0x3c210 >> 2),
621 (0x0e00 << 16) | (0x3c214 >> 2),
623 (0x0e00 << 16) | (0x3c218 >> 2),
625 (0x0e00 << 16) | (0x8904 >> 2),
628 (0x0e00 << 16) | (0x8c28 >> 2),
629 (0x0e00 << 16) | (0x8c2c >> 2),
630 (0x0e00 << 16) | (0x8c30 >> 2),
631 (0x0e00 << 16) | (0x8c34 >> 2),
632 (0x0e00 << 16) | (0x9600 >> 2),
635 static const u32 kalindi_rlc_save_restore_register_list[] =
637 (0x0e00 << 16) | (0xc12c >> 2),
639 (0x0e00 << 16) | (0xc140 >> 2),
641 (0x0e00 << 16) | (0xc150 >> 2),
643 (0x0e00 << 16) | (0xc15c >> 2),
645 (0x0e00 << 16) | (0xc168 >> 2),
647 (0x0e00 << 16) | (0xc170 >> 2),
649 (0x0e00 << 16) | (0xc204 >> 2),
651 (0x0e00 << 16) | (0xc2b4 >> 2),
653 (0x0e00 << 16) | (0xc2b8 >> 2),
655 (0x0e00 << 16) | (0xc2bc >> 2),
657 (0x0e00 << 16) | (0xc2c0 >> 2),
659 (0x0e00 << 16) | (0x8228 >> 2),
661 (0x0e00 << 16) | (0x829c >> 2),
663 (0x0e00 << 16) | (0x869c >> 2),
665 (0x0600 << 16) | (0x98f4 >> 2),
667 (0x0e00 << 16) | (0x98f8 >> 2),
669 (0x0e00 << 16) | (0x9900 >> 2),
671 (0x0e00 << 16) | (0xc260 >> 2),
673 (0x0e00 << 16) | (0x90e8 >> 2),
675 (0x0e00 << 16) | (0x3c000 >> 2),
677 (0x0e00 << 16) | (0x3c00c >> 2),
679 (0x0e00 << 16) | (0x8c1c >> 2),
681 (0x0e00 << 16) | (0x9700 >> 2),
683 (0x0e00 << 16) | (0xcd20 >> 2),
685 (0x4e00 << 16) | (0xcd20 >> 2),
687 (0x5e00 << 16) | (0xcd20 >> 2),
689 (0x6e00 << 16) | (0xcd20 >> 2),
691 (0x7e00 << 16) | (0xcd20 >> 2),
693 (0x0e00 << 16) | (0x89bc >> 2),
695 (0x0e00 << 16) | (0x8900 >> 2),
698 (0x0e00 << 16) | (0xc130 >> 2),
700 (0x0e00 << 16) | (0xc134 >> 2),
702 (0x0e00 << 16) | (0xc1fc >> 2),
704 (0x0e00 << 16) | (0xc208 >> 2),
706 (0x0e00 << 16) | (0xc264 >> 2),
708 (0x0e00 << 16) | (0xc268 >> 2),
710 (0x0e00 << 16) | (0xc26c >> 2),
712 (0x0e00 << 16) | (0xc270 >> 2),
714 (0x0e00 << 16) | (0xc274 >> 2),
716 (0x0e00 << 16) | (0xc28c >> 2),
718 (0x0e00 << 16) | (0xc290 >> 2),
720 (0x0e00 << 16) | (0xc294 >> 2),
722 (0x0e00 << 16) | (0xc298 >> 2),
724 (0x0e00 << 16) | (0xc2a0 >> 2),
726 (0x0e00 << 16) | (0xc2a4 >> 2),
728 (0x0e00 << 16) | (0xc2a8 >> 2),
730 (0x0e00 << 16) | (0xc2ac >> 2),
732 (0x0e00 << 16) | (0x301d0 >> 2),
734 (0x0e00 << 16) | (0x30238 >> 2),
736 (0x0e00 << 16) | (0x30250 >> 2),
738 (0x0e00 << 16) | (0x30254 >> 2),
740 (0x0e00 << 16) | (0x30258 >> 2),
742 (0x0e00 << 16) | (0x3025c >> 2),
744 (0x4e00 << 16) | (0xc900 >> 2),
746 (0x5e00 << 16) | (0xc900 >> 2),
748 (0x6e00 << 16) | (0xc900 >> 2),
750 (0x7e00 << 16) | (0xc900 >> 2),
752 (0x4e00 << 16) | (0xc904 >> 2),
754 (0x5e00 << 16) | (0xc904 >> 2),
756 (0x6e00 << 16) | (0xc904 >> 2),
758 (0x7e00 << 16) | (0xc904 >> 2),
760 (0x4e00 << 16) | (0xc908 >> 2),
762 (0x5e00 << 16) | (0xc908 >> 2),
764 (0x6e00 << 16) | (0xc908 >> 2),
766 (0x7e00 << 16) | (0xc908 >> 2),
768 (0x4e00 << 16) | (0xc90c >> 2),
770 (0x5e00 << 16) | (0xc90c >> 2),
772 (0x6e00 << 16) | (0xc90c >> 2),
774 (0x7e00 << 16) | (0xc90c >> 2),
776 (0x4e00 << 16) | (0xc910 >> 2),
778 (0x5e00 << 16) | (0xc910 >> 2),
780 (0x6e00 << 16) | (0xc910 >> 2),
782 (0x7e00 << 16) | (0xc910 >> 2),
784 (0x0e00 << 16) | (0xc99c >> 2),
786 (0x0e00 << 16) | (0x9834 >> 2),
788 (0x0000 << 16) | (0x30f00 >> 2),
790 (0x0000 << 16) | (0x30f04 >> 2),
792 (0x0000 << 16) | (0x30f08 >> 2),
794 (0x0000 << 16) | (0x30f0c >> 2),
796 (0x0600 << 16) | (0x9b7c >> 2),
798 (0x0e00 << 16) | (0x8a14 >> 2),
800 (0x0e00 << 16) | (0x8a18 >> 2),
802 (0x0600 << 16) | (0x30a00 >> 2),
804 (0x0e00 << 16) | (0x8bf0 >> 2),
806 (0x0e00 << 16) | (0x8bcc >> 2),
808 (0x0e00 << 16) | (0x8b24 >> 2),
810 (0x0e00 << 16) | (0x30a04 >> 2),
812 (0x0600 << 16) | (0x30a10 >> 2),
814 (0x0600 << 16) | (0x30a14 >> 2),
816 (0x0600 << 16) | (0x30a18 >> 2),
818 (0x0600 << 16) | (0x30a2c >> 2),
820 (0x0e00 << 16) | (0xc700 >> 2),
822 (0x0e00 << 16) | (0xc704 >> 2),
824 (0x0e00 << 16) | (0xc708 >> 2),
826 (0x0e00 << 16) | (0xc768 >> 2),
828 (0x0400 << 16) | (0xc770 >> 2),
830 (0x0400 << 16) | (0xc774 >> 2),
832 (0x0400 << 16) | (0xc798 >> 2),
834 (0x0400 << 16) | (0xc79c >> 2),
836 (0x0e00 << 16) | (0x9100 >> 2),
838 (0x0e00 << 16) | (0x3c010 >> 2),
840 (0x0e00 << 16) | (0x8c00 >> 2),
842 (0x0e00 << 16) | (0x8c04 >> 2),
844 (0x0e00 << 16) | (0x8c20 >> 2),
846 (0x0e00 << 16) | (0x8c38 >> 2),
848 (0x0e00 << 16) | (0x8c3c >> 2),
850 (0x0e00 << 16) | (0xae00 >> 2),
852 (0x0e00 << 16) | (0x9604 >> 2),
854 (0x0e00 << 16) | (0xac08 >> 2),
856 (0x0e00 << 16) | (0xac0c >> 2),
858 (0x0e00 << 16) | (0xac10 >> 2),
860 (0x0e00 << 16) | (0xac14 >> 2),
862 (0x0e00 << 16) | (0xac58 >> 2),
864 (0x0e00 << 16) | (0xac68 >> 2),
866 (0x0e00 << 16) | (0xac6c >> 2),
868 (0x0e00 << 16) | (0xac70 >> 2),
870 (0x0e00 << 16) | (0xac74 >> 2),
872 (0x0e00 << 16) | (0xac78 >> 2),
874 (0x0e00 << 16) | (0xac7c >> 2),
876 (0x0e00 << 16) | (0xac80 >> 2),
878 (0x0e00 << 16) | (0xac84 >> 2),
880 (0x0e00 << 16) | (0xac88 >> 2),
882 (0x0e00 << 16) | (0xac8c >> 2),
884 (0x0e00 << 16) | (0x970c >> 2),
886 (0x0e00 << 16) | (0x9714 >> 2),
888 (0x0e00 << 16) | (0x9718 >> 2),
890 (0x0e00 << 16) | (0x971c >> 2),
892 (0x0e00 << 16) | (0x31068 >> 2),
894 (0x4e00 << 16) | (0x31068 >> 2),
896 (0x5e00 << 16) | (0x31068 >> 2),
898 (0x6e00 << 16) | (0x31068 >> 2),
900 (0x7e00 << 16) | (0x31068 >> 2),
902 (0x0e00 << 16) | (0xcd10 >> 2),
904 (0x0e00 << 16) | (0xcd14 >> 2),
906 (0x0e00 << 16) | (0x88b0 >> 2),
908 (0x0e00 << 16) | (0x88b4 >> 2),
910 (0x0e00 << 16) | (0x88b8 >> 2),
912 (0x0e00 << 16) | (0x88bc >> 2),
914 (0x0400 << 16) | (0x89c0 >> 2),
916 (0x0e00 << 16) | (0x88c4 >> 2),
918 (0x0e00 << 16) | (0x88c8 >> 2),
920 (0x0e00 << 16) | (0x88d0 >> 2),
922 (0x0e00 << 16) | (0x88d4 >> 2),
924 (0x0e00 << 16) | (0x88d8 >> 2),
926 (0x0e00 << 16) | (0x8980 >> 2),
928 (0x0e00 << 16) | (0x30938 >> 2),
930 (0x0e00 << 16) | (0x3093c >> 2),
932 (0x0e00 << 16) | (0x30940 >> 2),
934 (0x0e00 << 16) | (0x89a0 >> 2),
936 (0x0e00 << 16) | (0x30900 >> 2),
938 (0x0e00 << 16) | (0x30904 >> 2),
940 (0x0e00 << 16) | (0x89b4 >> 2),
942 (0x0e00 << 16) | (0x3e1fc >> 2),
944 (0x0e00 << 16) | (0x3c210 >> 2),
946 (0x0e00 << 16) | (0x3c214 >> 2),
948 (0x0e00 << 16) | (0x3c218 >> 2),
950 (0x0e00 << 16) | (0x8904 >> 2),
953 (0x0e00 << 16) | (0x8c28 >> 2),
954 (0x0e00 << 16) | (0x8c2c >> 2),
955 (0x0e00 << 16) | (0x8c30 >> 2),
956 (0x0e00 << 16) | (0x8c34 >> 2),
957 (0x0e00 << 16) | (0x9600 >> 2),
960 static const u32 bonaire_golden_spm_registers[] =
962 0x30800, 0xe0ffffff, 0xe0000000
965 static const u32 bonaire_golden_common_registers[] =
967 0xc770, 0xffffffff, 0x00000800,
968 0xc774, 0xffffffff, 0x00000800,
969 0xc798, 0xffffffff, 0x00007fbf,
970 0xc79c, 0xffffffff, 0x00007faf
973 static const u32 bonaire_golden_registers[] =
975 0x3354, 0x00000333, 0x00000333,
976 0x3350, 0x000c0fc0, 0x00040200,
977 0x9a10, 0x00010000, 0x00058208,
978 0x3c000, 0xffff1fff, 0x00140000,
979 0x3c200, 0xfdfc0fff, 0x00000100,
980 0x3c234, 0x40000000, 0x40000200,
981 0x9830, 0xffffffff, 0x00000000,
982 0x9834, 0xf00fffff, 0x00000400,
983 0x9838, 0x0002021c, 0x00020200,
984 0xc78, 0x00000080, 0x00000000,
985 0x5bb0, 0x000000f0, 0x00000070,
986 0x5bc0, 0xf0311fff, 0x80300000,
987 0x98f8, 0x73773777, 0x12010001,
988 0x350c, 0x00810000, 0x408af000,
989 0x7030, 0x31000111, 0x00000011,
990 0x2f48, 0x73773777, 0x12010001,
991 0x220c, 0x00007fb6, 0x0021a1b1,
992 0x2210, 0x00007fb6, 0x002021b1,
993 0x2180, 0x00007fb6, 0x00002191,
994 0x2218, 0x00007fb6, 0x002121b1,
995 0x221c, 0x00007fb6, 0x002021b1,
996 0x21dc, 0x00007fb6, 0x00002191,
997 0x21e0, 0x00007fb6, 0x00002191,
998 0x3628, 0x0000003f, 0x0000000a,
999 0x362c, 0x0000003f, 0x0000000a,
1000 0x2ae4, 0x00073ffe, 0x000022a2,
1001 0x240c, 0x000007ff, 0x00000000,
1002 0x8a14, 0xf000003f, 0x00000007,
1003 0x8bf0, 0x00002001, 0x00000001,
1004 0x8b24, 0xffffffff, 0x00ffffff,
1005 0x30a04, 0x0000ff0f, 0x00000000,
1006 0x28a4c, 0x07ffffff, 0x06000000,
1007 0x4d8, 0x00000fff, 0x00000100,
1008 0x3e78, 0x00000001, 0x00000002,
1009 0x9100, 0x03000000, 0x0362c688,
1010 0x8c00, 0x000000ff, 0x00000001,
1011 0xe40, 0x00001fff, 0x00001fff,
1012 0x9060, 0x0000007f, 0x00000020,
1013 0x9508, 0x00010000, 0x00010000,
1014 0xac14, 0x000003ff, 0x000000f3,
1015 0xac0c, 0xffffffff, 0x00001032
1018 static const u32 bonaire_mgcg_cgcg_init[] =
1020 0xc420, 0xffffffff, 0xfffffffc,
1021 0x30800, 0xffffffff, 0xe0000000,
1022 0x3c2a0, 0xffffffff, 0x00000100,
1023 0x3c208, 0xffffffff, 0x00000100,
1024 0x3c2c0, 0xffffffff, 0xc0000100,
1025 0x3c2c8, 0xffffffff, 0xc0000100,
1026 0x3c2c4, 0xffffffff, 0xc0000100,
1027 0x55e4, 0xffffffff, 0x00600100,
1028 0x3c280, 0xffffffff, 0x00000100,
1029 0x3c214, 0xffffffff, 0x06000100,
1030 0x3c220, 0xffffffff, 0x00000100,
1031 0x3c218, 0xffffffff, 0x06000100,
1032 0x3c204, 0xffffffff, 0x00000100,
1033 0x3c2e0, 0xffffffff, 0x00000100,
1034 0x3c224, 0xffffffff, 0x00000100,
1035 0x3c200, 0xffffffff, 0x00000100,
1036 0x3c230, 0xffffffff, 0x00000100,
1037 0x3c234, 0xffffffff, 0x00000100,
1038 0x3c250, 0xffffffff, 0x00000100,
1039 0x3c254, 0xffffffff, 0x00000100,
1040 0x3c258, 0xffffffff, 0x00000100,
1041 0x3c25c, 0xffffffff, 0x00000100,
1042 0x3c260, 0xffffffff, 0x00000100,
1043 0x3c27c, 0xffffffff, 0x00000100,
1044 0x3c278, 0xffffffff, 0x00000100,
1045 0x3c210, 0xffffffff, 0x06000100,
1046 0x3c290, 0xffffffff, 0x00000100,
1047 0x3c274, 0xffffffff, 0x00000100,
1048 0x3c2b4, 0xffffffff, 0x00000100,
1049 0x3c2b0, 0xffffffff, 0x00000100,
1050 0x3c270, 0xffffffff, 0x00000100,
1051 0x30800, 0xffffffff, 0xe0000000,
1052 0x3c020, 0xffffffff, 0x00010000,
1053 0x3c024, 0xffffffff, 0x00030002,
1054 0x3c028, 0xffffffff, 0x00040007,
1055 0x3c02c, 0xffffffff, 0x00060005,
1056 0x3c030, 0xffffffff, 0x00090008,
1057 0x3c034, 0xffffffff, 0x00010000,
1058 0x3c038, 0xffffffff, 0x00030002,
1059 0x3c03c, 0xffffffff, 0x00040007,
1060 0x3c040, 0xffffffff, 0x00060005,
1061 0x3c044, 0xffffffff, 0x00090008,
1062 0x3c048, 0xffffffff, 0x00010000,
1063 0x3c04c, 0xffffffff, 0x00030002,
1064 0x3c050, 0xffffffff, 0x00040007,
1065 0x3c054, 0xffffffff, 0x00060005,
1066 0x3c058, 0xffffffff, 0x00090008,
1067 0x3c05c, 0xffffffff, 0x00010000,
1068 0x3c060, 0xffffffff, 0x00030002,
1069 0x3c064, 0xffffffff, 0x00040007,
1070 0x3c068, 0xffffffff, 0x00060005,
1071 0x3c06c, 0xffffffff, 0x00090008,
1072 0x3c070, 0xffffffff, 0x00010000,
1073 0x3c074, 0xffffffff, 0x00030002,
1074 0x3c078, 0xffffffff, 0x00040007,
1075 0x3c07c, 0xffffffff, 0x00060005,
1076 0x3c080, 0xffffffff, 0x00090008,
1077 0x3c084, 0xffffffff, 0x00010000,
1078 0x3c088, 0xffffffff, 0x00030002,
1079 0x3c08c, 0xffffffff, 0x00040007,
1080 0x3c090, 0xffffffff, 0x00060005,
1081 0x3c094, 0xffffffff, 0x00090008,
1082 0x3c098, 0xffffffff, 0x00010000,
1083 0x3c09c, 0xffffffff, 0x00030002,
1084 0x3c0a0, 0xffffffff, 0x00040007,
1085 0x3c0a4, 0xffffffff, 0x00060005,
1086 0x3c0a8, 0xffffffff, 0x00090008,
1087 0x3c000, 0xffffffff, 0x96e00200,
1088 0x8708, 0xffffffff, 0x00900100,
1089 0xc424, 0xffffffff, 0x0020003f,
1090 0x38, 0xffffffff, 0x0140001c,
1091 0x3c, 0x000f0000, 0x000f0000,
1092 0x220, 0xffffffff, 0xC060000C,
1093 0x224, 0xc0000fff, 0x00000100,
1094 0xf90, 0xffffffff, 0x00000100,
1095 0xf98, 0x00000101, 0x00000000,
1096 0x20a8, 0xffffffff, 0x00000104,
1097 0x55e4, 0xff000fff, 0x00000100,
1098 0x30cc, 0xc0000fff, 0x00000104,
1099 0xc1e4, 0x00000001, 0x00000001,
1100 0xd00c, 0xff000ff0, 0x00000100,
1101 0xd80c, 0xff000ff0, 0x00000100
1104 static const u32 spectre_golden_spm_registers[] =
1106 0x30800, 0xe0ffffff, 0xe0000000
1109 static const u32 spectre_golden_common_registers[] =
1111 0xc770, 0xffffffff, 0x00000800,
1112 0xc774, 0xffffffff, 0x00000800,
1113 0xc798, 0xffffffff, 0x00007fbf,
1114 0xc79c, 0xffffffff, 0x00007faf
1117 static const u32 spectre_golden_registers[] =
1119 0x3c000, 0xffff1fff, 0x96940200,
1120 0x3c00c, 0xffff0001, 0xff000000,
1121 0x3c200, 0xfffc0fff, 0x00000100,
1122 0x6ed8, 0x00010101, 0x00010000,
1123 0x9834, 0xf00fffff, 0x00000400,
1124 0x9838, 0xfffffffc, 0x00020200,
1125 0x5bb0, 0x000000f0, 0x00000070,
1126 0x5bc0, 0xf0311fff, 0x80300000,
1127 0x98f8, 0x73773777, 0x12010001,
1128 0x9b7c, 0x00ff0000, 0x00fc0000,
1129 0x2f48, 0x73773777, 0x12010001,
1130 0x8a14, 0xf000003f, 0x00000007,
1131 0x8b24, 0xffffffff, 0x00ffffff,
1132 0x28350, 0x3f3f3fff, 0x00000082,
1133 0x28354, 0x0000003f, 0x00000000,
1134 0x3e78, 0x00000001, 0x00000002,
1135 0x913c, 0xffff03df, 0x00000004,
1136 0xc768, 0x00000008, 0x00000008,
1137 0x8c00, 0x000008ff, 0x00000800,
1138 0x9508, 0x00010000, 0x00010000,
1139 0xac0c, 0xffffffff, 0x54763210,
1140 0x214f8, 0x01ff01ff, 0x00000002,
1141 0x21498, 0x007ff800, 0x00200000,
1142 0x2015c, 0xffffffff, 0x00000f40,
1143 0x30934, 0xffffffff, 0x00000001
1146 static const u32 spectre_mgcg_cgcg_init[] =
1148 0xc420, 0xffffffff, 0xfffffffc,
1149 0x30800, 0xffffffff, 0xe0000000,
1150 0x3c2a0, 0xffffffff, 0x00000100,
1151 0x3c208, 0xffffffff, 0x00000100,
1152 0x3c2c0, 0xffffffff, 0x00000100,
1153 0x3c2c8, 0xffffffff, 0x00000100,
1154 0x3c2c4, 0xffffffff, 0x00000100,
1155 0x55e4, 0xffffffff, 0x00600100,
1156 0x3c280, 0xffffffff, 0x00000100,
1157 0x3c214, 0xffffffff, 0x06000100,
1158 0x3c220, 0xffffffff, 0x00000100,
1159 0x3c218, 0xffffffff, 0x06000100,
1160 0x3c204, 0xffffffff, 0x00000100,
1161 0x3c2e0, 0xffffffff, 0x00000100,
1162 0x3c224, 0xffffffff, 0x00000100,
1163 0x3c200, 0xffffffff, 0x00000100,
1164 0x3c230, 0xffffffff, 0x00000100,
1165 0x3c234, 0xffffffff, 0x00000100,
1166 0x3c250, 0xffffffff, 0x00000100,
1167 0x3c254, 0xffffffff, 0x00000100,
1168 0x3c258, 0xffffffff, 0x00000100,
1169 0x3c25c, 0xffffffff, 0x00000100,
1170 0x3c260, 0xffffffff, 0x00000100,
1171 0x3c27c, 0xffffffff, 0x00000100,
1172 0x3c278, 0xffffffff, 0x00000100,
1173 0x3c210, 0xffffffff, 0x06000100,
1174 0x3c290, 0xffffffff, 0x00000100,
1175 0x3c274, 0xffffffff, 0x00000100,
1176 0x3c2b4, 0xffffffff, 0x00000100,
1177 0x3c2b0, 0xffffffff, 0x00000100,
1178 0x3c270, 0xffffffff, 0x00000100,
1179 0x30800, 0xffffffff, 0xe0000000,
1180 0x3c020, 0xffffffff, 0x00010000,
1181 0x3c024, 0xffffffff, 0x00030002,
1182 0x3c028, 0xffffffff, 0x00040007,
1183 0x3c02c, 0xffffffff, 0x00060005,
1184 0x3c030, 0xffffffff, 0x00090008,
1185 0x3c034, 0xffffffff, 0x00010000,
1186 0x3c038, 0xffffffff, 0x00030002,
1187 0x3c03c, 0xffffffff, 0x00040007,
1188 0x3c040, 0xffffffff, 0x00060005,
1189 0x3c044, 0xffffffff, 0x00090008,
1190 0x3c048, 0xffffffff, 0x00010000,
1191 0x3c04c, 0xffffffff, 0x00030002,
1192 0x3c050, 0xffffffff, 0x00040007,
1193 0x3c054, 0xffffffff, 0x00060005,
1194 0x3c058, 0xffffffff, 0x00090008,
1195 0x3c05c, 0xffffffff, 0x00010000,
1196 0x3c060, 0xffffffff, 0x00030002,
1197 0x3c064, 0xffffffff, 0x00040007,
1198 0x3c068, 0xffffffff, 0x00060005,
1199 0x3c06c, 0xffffffff, 0x00090008,
1200 0x3c070, 0xffffffff, 0x00010000,
1201 0x3c074, 0xffffffff, 0x00030002,
1202 0x3c078, 0xffffffff, 0x00040007,
1203 0x3c07c, 0xffffffff, 0x00060005,
1204 0x3c080, 0xffffffff, 0x00090008,
1205 0x3c084, 0xffffffff, 0x00010000,
1206 0x3c088, 0xffffffff, 0x00030002,
1207 0x3c08c, 0xffffffff, 0x00040007,
1208 0x3c090, 0xffffffff, 0x00060005,
1209 0x3c094, 0xffffffff, 0x00090008,
1210 0x3c098, 0xffffffff, 0x00010000,
1211 0x3c09c, 0xffffffff, 0x00030002,
1212 0x3c0a0, 0xffffffff, 0x00040007,
1213 0x3c0a4, 0xffffffff, 0x00060005,
1214 0x3c0a8, 0xffffffff, 0x00090008,
1215 0x3c0ac, 0xffffffff, 0x00010000,
1216 0x3c0b0, 0xffffffff, 0x00030002,
1217 0x3c0b4, 0xffffffff, 0x00040007,
1218 0x3c0b8, 0xffffffff, 0x00060005,
1219 0x3c0bc, 0xffffffff, 0x00090008,
1220 0x3c000, 0xffffffff, 0x96e00200,
1221 0x8708, 0xffffffff, 0x00900100,
1222 0xc424, 0xffffffff, 0x0020003f,
1223 0x38, 0xffffffff, 0x0140001c,
1224 0x3c, 0x000f0000, 0x000f0000,
1225 0x220, 0xffffffff, 0xC060000C,
1226 0x224, 0xc0000fff, 0x00000100,
1227 0xf90, 0xffffffff, 0x00000100,
1228 0xf98, 0x00000101, 0x00000000,
1229 0x20a8, 0xffffffff, 0x00000104,
1230 0x55e4, 0xff000fff, 0x00000100,
1231 0x30cc, 0xc0000fff, 0x00000104,
1232 0xc1e4, 0x00000001, 0x00000001,
1233 0xd00c, 0xff000ff0, 0x00000100,
1234 0xd80c, 0xff000ff0, 0x00000100
1237 static const u32 kalindi_golden_spm_registers[] =
1239 0x30800, 0xe0ffffff, 0xe0000000
1242 static const u32 kalindi_golden_common_registers[] =
1244 0xc770, 0xffffffff, 0x00000800,
1245 0xc774, 0xffffffff, 0x00000800,
1246 0xc798, 0xffffffff, 0x00007fbf,
1247 0xc79c, 0xffffffff, 0x00007faf
1250 static const u32 kalindi_golden_registers[] =
1252 0x3c000, 0xffffdfff, 0x6e944040,
1253 0x55e4, 0xff607fff, 0xfc000100,
1254 0x3c220, 0xff000fff, 0x00000100,
1255 0x3c224, 0xff000fff, 0x00000100,
1256 0x3c200, 0xfffc0fff, 0x00000100,
1257 0x6ed8, 0x00010101, 0x00010000,
1258 0x9830, 0xffffffff, 0x00000000,
1259 0x9834, 0xf00fffff, 0x00000400,
1260 0x5bb0, 0x000000f0, 0x00000070,
1261 0x5bc0, 0xf0311fff, 0x80300000,
1262 0x98f8, 0x73773777, 0x12010001,
1263 0x98fc, 0xffffffff, 0x00000010,
1264 0x9b7c, 0x00ff0000, 0x00fc0000,
1265 0x8030, 0x00001f0f, 0x0000100a,
1266 0x2f48, 0x73773777, 0x12010001,
1267 0x2408, 0x000fffff, 0x000c007f,
1268 0x8a14, 0xf000003f, 0x00000007,
1269 0x8b24, 0x3fff3fff, 0x00ffcfff,
1270 0x30a04, 0x0000ff0f, 0x00000000,
1271 0x28a4c, 0x07ffffff, 0x06000000,
1272 0x4d8, 0x00000fff, 0x00000100,
1273 0x3e78, 0x00000001, 0x00000002,
1274 0xc768, 0x00000008, 0x00000008,
1275 0x8c00, 0x000000ff, 0x00000003,
1276 0x214f8, 0x01ff01ff, 0x00000002,
1277 0x21498, 0x007ff800, 0x00200000,
1278 0x2015c, 0xffffffff, 0x00000f40,
1279 0x88c4, 0x001f3ae3, 0x00000082,
1280 0x88d4, 0x0000001f, 0x00000010,
1281 0x30934, 0xffffffff, 0x00000000
1284 static const u32 kalindi_mgcg_cgcg_init[] =
1286 0xc420, 0xffffffff, 0xfffffffc,
1287 0x30800, 0xffffffff, 0xe0000000,
1288 0x3c2a0, 0xffffffff, 0x00000100,
1289 0x3c208, 0xffffffff, 0x00000100,
1290 0x3c2c0, 0xffffffff, 0x00000100,
1291 0x3c2c8, 0xffffffff, 0x00000100,
1292 0x3c2c4, 0xffffffff, 0x00000100,
1293 0x55e4, 0xffffffff, 0x00600100,
1294 0x3c280, 0xffffffff, 0x00000100,
1295 0x3c214, 0xffffffff, 0x06000100,
1296 0x3c220, 0xffffffff, 0x00000100,
1297 0x3c218, 0xffffffff, 0x06000100,
1298 0x3c204, 0xffffffff, 0x00000100,
1299 0x3c2e0, 0xffffffff, 0x00000100,
1300 0x3c224, 0xffffffff, 0x00000100,
1301 0x3c200, 0xffffffff, 0x00000100,
1302 0x3c230, 0xffffffff, 0x00000100,
1303 0x3c234, 0xffffffff, 0x00000100,
1304 0x3c250, 0xffffffff, 0x00000100,
1305 0x3c254, 0xffffffff, 0x00000100,
1306 0x3c258, 0xffffffff, 0x00000100,
1307 0x3c25c, 0xffffffff, 0x00000100,
1308 0x3c260, 0xffffffff, 0x00000100,
1309 0x3c27c, 0xffffffff, 0x00000100,
1310 0x3c278, 0xffffffff, 0x00000100,
1311 0x3c210, 0xffffffff, 0x06000100,
1312 0x3c290, 0xffffffff, 0x00000100,
1313 0x3c274, 0xffffffff, 0x00000100,
1314 0x3c2b4, 0xffffffff, 0x00000100,
1315 0x3c2b0, 0xffffffff, 0x00000100,
1316 0x3c270, 0xffffffff, 0x00000100,
1317 0x30800, 0xffffffff, 0xe0000000,
1318 0x3c020, 0xffffffff, 0x00010000,
1319 0x3c024, 0xffffffff, 0x00030002,
1320 0x3c028, 0xffffffff, 0x00040007,
1321 0x3c02c, 0xffffffff, 0x00060005,
1322 0x3c030, 0xffffffff, 0x00090008,
1323 0x3c034, 0xffffffff, 0x00010000,
1324 0x3c038, 0xffffffff, 0x00030002,
1325 0x3c03c, 0xffffffff, 0x00040007,
1326 0x3c040, 0xffffffff, 0x00060005,
1327 0x3c044, 0xffffffff, 0x00090008,
1328 0x3c000, 0xffffffff, 0x96e00200,
1329 0x8708, 0xffffffff, 0x00900100,
1330 0xc424, 0xffffffff, 0x0020003f,
1331 0x38, 0xffffffff, 0x0140001c,
1332 0x3c, 0x000f0000, 0x000f0000,
1333 0x220, 0xffffffff, 0xC060000C,
1334 0x224, 0xc0000fff, 0x00000100,
1335 0x20a8, 0xffffffff, 0x00000104,
1336 0x55e4, 0xff000fff, 0x00000100,
1337 0x30cc, 0xc0000fff, 0x00000104,
1338 0xc1e4, 0x00000001, 0x00000001,
1339 0xd00c, 0xff000ff0, 0x00000100,
1340 0xd80c, 0xff000ff0, 0x00000100
1343 static const u32 hawaii_golden_spm_registers[] =
1345 0x30800, 0xe0ffffff, 0xe0000000
1348 static const u32 hawaii_golden_common_registers[] =
1350 0x30800, 0xffffffff, 0xe0000000,
1351 0x28350, 0xffffffff, 0x3a00161a,
1352 0x28354, 0xffffffff, 0x0000002e,
1353 0x9a10, 0xffffffff, 0x00018208,
1354 0x98f8, 0xffffffff, 0x12011003
1357 static const u32 hawaii_golden_registers[] =
1359 0x3354, 0x00000333, 0x00000333,
1360 0x9a10, 0x00010000, 0x00058208,
1361 0x9830, 0xffffffff, 0x00000000,
1362 0x9834, 0xf00fffff, 0x00000400,
1363 0x9838, 0x0002021c, 0x00020200,
1364 0xc78, 0x00000080, 0x00000000,
1365 0x5bb0, 0x000000f0, 0x00000070,
1366 0x5bc0, 0xf0311fff, 0x80300000,
1367 0x350c, 0x00810000, 0x408af000,
1368 0x7030, 0x31000111, 0x00000011,
1369 0x2f48, 0x73773777, 0x12010001,
1370 0x2120, 0x0000007f, 0x0000001b,
1371 0x21dc, 0x00007fb6, 0x00002191,
1372 0x3628, 0x0000003f, 0x0000000a,
1373 0x362c, 0x0000003f, 0x0000000a,
1374 0x2ae4, 0x00073ffe, 0x000022a2,
1375 0x240c, 0x000007ff, 0x00000000,
1376 0x8bf0, 0x00002001, 0x00000001,
1377 0x8b24, 0xffffffff, 0x00ffffff,
1378 0x30a04, 0x0000ff0f, 0x00000000,
1379 0x28a4c, 0x07ffffff, 0x06000000,
1380 0x3e78, 0x00000001, 0x00000002,
1381 0xc768, 0x00000008, 0x00000008,
1382 0xc770, 0x00000f00, 0x00000800,
1383 0xc774, 0x00000f00, 0x00000800,
1384 0xc798, 0x00ffffff, 0x00ff7fbf,
1385 0xc79c, 0x00ffffff, 0x00ff7faf,
1386 0x8c00, 0x000000ff, 0x00000800,
1387 0xe40, 0x00001fff, 0x00001fff,
1388 0x9060, 0x0000007f, 0x00000020,
1389 0x9508, 0x00010000, 0x00010000,
1390 0xae00, 0x00100000, 0x000ff07c,
1391 0xac14, 0x000003ff, 0x0000000f,
1392 0xac10, 0xffffffff, 0x7564fdec,
1393 0xac0c, 0xffffffff, 0x3120b9a8,
1394 0xac08, 0x20000000, 0x0f9c0000
1397 static const u32 hawaii_mgcg_cgcg_init[] =
1399 0xc420, 0xffffffff, 0xfffffffd,
1400 0x30800, 0xffffffff, 0xe0000000,
1401 0x3c2a0, 0xffffffff, 0x00000100,
1402 0x3c208, 0xffffffff, 0x00000100,
1403 0x3c2c0, 0xffffffff, 0x00000100,
1404 0x3c2c8, 0xffffffff, 0x00000100,
1405 0x3c2c4, 0xffffffff, 0x00000100,
1406 0x55e4, 0xffffffff, 0x00200100,
1407 0x3c280, 0xffffffff, 0x00000100,
1408 0x3c214, 0xffffffff, 0x06000100,
1409 0x3c220, 0xffffffff, 0x00000100,
1410 0x3c218, 0xffffffff, 0x06000100,
1411 0x3c204, 0xffffffff, 0x00000100,
1412 0x3c2e0, 0xffffffff, 0x00000100,
1413 0x3c224, 0xffffffff, 0x00000100,
1414 0x3c200, 0xffffffff, 0x00000100,
1415 0x3c230, 0xffffffff, 0x00000100,
1416 0x3c234, 0xffffffff, 0x00000100,
1417 0x3c250, 0xffffffff, 0x00000100,
1418 0x3c254, 0xffffffff, 0x00000100,
1419 0x3c258, 0xffffffff, 0x00000100,
1420 0x3c25c, 0xffffffff, 0x00000100,
1421 0x3c260, 0xffffffff, 0x00000100,
1422 0x3c27c, 0xffffffff, 0x00000100,
1423 0x3c278, 0xffffffff, 0x00000100,
1424 0x3c210, 0xffffffff, 0x06000100,
1425 0x3c290, 0xffffffff, 0x00000100,
1426 0x3c274, 0xffffffff, 0x00000100,
1427 0x3c2b4, 0xffffffff, 0x00000100,
1428 0x3c2b0, 0xffffffff, 0x00000100,
1429 0x3c270, 0xffffffff, 0x00000100,
1430 0x30800, 0xffffffff, 0xe0000000,
1431 0x3c020, 0xffffffff, 0x00010000,
1432 0x3c024, 0xffffffff, 0x00030002,
1433 0x3c028, 0xffffffff, 0x00040007,
1434 0x3c02c, 0xffffffff, 0x00060005,
1435 0x3c030, 0xffffffff, 0x00090008,
1436 0x3c034, 0xffffffff, 0x00010000,
1437 0x3c038, 0xffffffff, 0x00030002,
1438 0x3c03c, 0xffffffff, 0x00040007,
1439 0x3c040, 0xffffffff, 0x00060005,
1440 0x3c044, 0xffffffff, 0x00090008,
1441 0x3c048, 0xffffffff, 0x00010000,
1442 0x3c04c, 0xffffffff, 0x00030002,
1443 0x3c050, 0xffffffff, 0x00040007,
1444 0x3c054, 0xffffffff, 0x00060005,
1445 0x3c058, 0xffffffff, 0x00090008,
1446 0x3c05c, 0xffffffff, 0x00010000,
1447 0x3c060, 0xffffffff, 0x00030002,
1448 0x3c064, 0xffffffff, 0x00040007,
1449 0x3c068, 0xffffffff, 0x00060005,
1450 0x3c06c, 0xffffffff, 0x00090008,
1451 0x3c070, 0xffffffff, 0x00010000,
1452 0x3c074, 0xffffffff, 0x00030002,
1453 0x3c078, 0xffffffff, 0x00040007,
1454 0x3c07c, 0xffffffff, 0x00060005,
1455 0x3c080, 0xffffffff, 0x00090008,
1456 0x3c084, 0xffffffff, 0x00010000,
1457 0x3c088, 0xffffffff, 0x00030002,
1458 0x3c08c, 0xffffffff, 0x00040007,
1459 0x3c090, 0xffffffff, 0x00060005,
1460 0x3c094, 0xffffffff, 0x00090008,
1461 0x3c098, 0xffffffff, 0x00010000,
1462 0x3c09c, 0xffffffff, 0x00030002,
1463 0x3c0a0, 0xffffffff, 0x00040007,
1464 0x3c0a4, 0xffffffff, 0x00060005,
1465 0x3c0a8, 0xffffffff, 0x00090008,
1466 0x3c0ac, 0xffffffff, 0x00010000,
1467 0x3c0b0, 0xffffffff, 0x00030002,
1468 0x3c0b4, 0xffffffff, 0x00040007,
1469 0x3c0b8, 0xffffffff, 0x00060005,
1470 0x3c0bc, 0xffffffff, 0x00090008,
1471 0x3c0c0, 0xffffffff, 0x00010000,
1472 0x3c0c4, 0xffffffff, 0x00030002,
1473 0x3c0c8, 0xffffffff, 0x00040007,
1474 0x3c0cc, 0xffffffff, 0x00060005,
1475 0x3c0d0, 0xffffffff, 0x00090008,
1476 0x3c0d4, 0xffffffff, 0x00010000,
1477 0x3c0d8, 0xffffffff, 0x00030002,
1478 0x3c0dc, 0xffffffff, 0x00040007,
1479 0x3c0e0, 0xffffffff, 0x00060005,
1480 0x3c0e4, 0xffffffff, 0x00090008,
1481 0x3c0e8, 0xffffffff, 0x00010000,
1482 0x3c0ec, 0xffffffff, 0x00030002,
1483 0x3c0f0, 0xffffffff, 0x00040007,
1484 0x3c0f4, 0xffffffff, 0x00060005,
1485 0x3c0f8, 0xffffffff, 0x00090008,
1486 0xc318, 0xffffffff, 0x00020200,
1487 0x3350, 0xffffffff, 0x00000200,
1488 0x15c0, 0xffffffff, 0x00000400,
1489 0x55e8, 0xffffffff, 0x00000000,
1490 0x2f50, 0xffffffff, 0x00000902,
1491 0x3c000, 0xffffffff, 0x96940200,
1492 0x8708, 0xffffffff, 0x00900100,
1493 0xc424, 0xffffffff, 0x0020003f,
1494 0x38, 0xffffffff, 0x0140001c,
1495 0x3c, 0x000f0000, 0x000f0000,
1496 0x220, 0xffffffff, 0xc060000c,
1497 0x224, 0xc0000fff, 0x00000100,
1498 0xf90, 0xffffffff, 0x00000100,
1499 0xf98, 0x00000101, 0x00000000,
1500 0x20a8, 0xffffffff, 0x00000104,
1501 0x55e4, 0xff000fff, 0x00000100,
1502 0x30cc, 0xc0000fff, 0x00000104,
1503 0xc1e4, 0x00000001, 0x00000001,
1504 0xd00c, 0xff000ff0, 0x00000100,
1505 0xd80c, 0xff000ff0, 0x00000100
1508 static const u32 godavari_golden_registers[] =
1510 0x55e4, 0xff607fff, 0xfc000100,
1511 0x6ed8, 0x00010101, 0x00010000,
1512 0x9830, 0xffffffff, 0x00000000,
1513 0x98302, 0xf00fffff, 0x00000400,
1514 0x6130, 0xffffffff, 0x00010000,
1515 0x5bb0, 0x000000f0, 0x00000070,
1516 0x5bc0, 0xf0311fff, 0x80300000,
1517 0x98f8, 0x73773777, 0x12010001,
1518 0x98fc, 0xffffffff, 0x00000010,
1519 0x8030, 0x00001f0f, 0x0000100a,
1520 0x2f48, 0x73773777, 0x12010001,
1521 0x2408, 0x000fffff, 0x000c007f,
1522 0x8a14, 0xf000003f, 0x00000007,
1523 0x8b24, 0xffffffff, 0x00ff0fff,
1524 0x30a04, 0x0000ff0f, 0x00000000,
1525 0x28a4c, 0x07ffffff, 0x06000000,
1526 0x4d8, 0x00000fff, 0x00000100,
1527 0xd014, 0x00010000, 0x00810001,
1528 0xd814, 0x00010000, 0x00810001,
1529 0x3e78, 0x00000001, 0x00000002,
1530 0xc768, 0x00000008, 0x00000008,
1531 0xc770, 0x00000f00, 0x00000800,
1532 0xc774, 0x00000f00, 0x00000800,
1533 0xc798, 0x00ffffff, 0x00ff7fbf,
1534 0xc79c, 0x00ffffff, 0x00ff7faf,
1535 0x8c00, 0x000000ff, 0x00000001,
1536 0x214f8, 0x01ff01ff, 0x00000002,
1537 0x21498, 0x007ff800, 0x00200000,
1538 0x2015c, 0xffffffff, 0x00000f40,
1539 0x88c4, 0x001f3ae3, 0x00000082,
1540 0x88d4, 0x0000001f, 0x00000010,
1541 0x30934, 0xffffffff, 0x00000000
1545 static void cik_init_golden_registers(struct radeon_device *rdev)
1547 /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1548 mutex_lock(&rdev->grbm_idx_mutex);
1549 switch (rdev->family) {
1551 radeon_program_register_sequence(rdev,
1552 bonaire_mgcg_cgcg_init,
1553 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1554 radeon_program_register_sequence(rdev,
1555 bonaire_golden_registers,
1556 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1557 radeon_program_register_sequence(rdev,
1558 bonaire_golden_common_registers,
1559 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1560 radeon_program_register_sequence(rdev,
1561 bonaire_golden_spm_registers,
1562 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1565 radeon_program_register_sequence(rdev,
1566 kalindi_mgcg_cgcg_init,
1567 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1568 radeon_program_register_sequence(rdev,
1569 kalindi_golden_registers,
1570 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1571 radeon_program_register_sequence(rdev,
1572 kalindi_golden_common_registers,
1573 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1574 radeon_program_register_sequence(rdev,
1575 kalindi_golden_spm_registers,
1576 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1579 radeon_program_register_sequence(rdev,
1580 kalindi_mgcg_cgcg_init,
1581 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1582 radeon_program_register_sequence(rdev,
1583 godavari_golden_registers,
1584 (const u32)ARRAY_SIZE(godavari_golden_registers));
1585 radeon_program_register_sequence(rdev,
1586 kalindi_golden_common_registers,
1587 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1588 radeon_program_register_sequence(rdev,
1589 kalindi_golden_spm_registers,
1590 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1593 radeon_program_register_sequence(rdev,
1594 spectre_mgcg_cgcg_init,
1595 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1596 radeon_program_register_sequence(rdev,
1597 spectre_golden_registers,
1598 (const u32)ARRAY_SIZE(spectre_golden_registers));
1599 radeon_program_register_sequence(rdev,
1600 spectre_golden_common_registers,
1601 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1602 radeon_program_register_sequence(rdev,
1603 spectre_golden_spm_registers,
1604 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1607 radeon_program_register_sequence(rdev,
1608 hawaii_mgcg_cgcg_init,
1609 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1610 radeon_program_register_sequence(rdev,
1611 hawaii_golden_registers,
1612 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1613 radeon_program_register_sequence(rdev,
1614 hawaii_golden_common_registers,
1615 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1616 radeon_program_register_sequence(rdev,
1617 hawaii_golden_spm_registers,
1618 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1623 mutex_unlock(&rdev->grbm_idx_mutex);
1627 * cik_get_xclk - get the xclk
1629 * @rdev: radeon_device pointer
1631 * Returns the reference clock used by the gfx engine
1634 u32 cik_get_xclk(struct radeon_device *rdev)
1636 u32 reference_clock = rdev->clock.spll.reference_freq;
1638 if (rdev->flags & RADEON_IS_IGP) {
1639 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1640 return reference_clock / 2;
1642 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1643 return reference_clock / 4;
1645 return reference_clock;
1649 * cik_mm_rdoorbell - read a doorbell dword
1651 * @rdev: radeon_device pointer
1652 * @index: doorbell index
1654 * Returns the value in the doorbell aperture at the
1655 * requested doorbell index (CIK).
1657 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1659 if (index < rdev->doorbell.num_doorbells) {
1660 return readl(rdev->doorbell.ptr + index);
1662 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1668 * cik_mm_wdoorbell - write a doorbell dword
1670 * @rdev: radeon_device pointer
1671 * @index: doorbell index
1672 * @v: value to write
1674 * Writes @v to the doorbell aperture at the
1675 * requested doorbell index (CIK).
1677 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1679 if (index < rdev->doorbell.num_doorbells) {
1680 writel(v, rdev->doorbell.ptr + index);
1682 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1686 #define BONAIRE_IO_MC_REGS_SIZE 36
1688 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1690 {0x00000070, 0x04400000},
1691 {0x00000071, 0x80c01803},
1692 {0x00000072, 0x00004004},
1693 {0x00000073, 0x00000100},
1694 {0x00000074, 0x00ff0000},
1695 {0x00000075, 0x34000000},
1696 {0x00000076, 0x08000014},
1697 {0x00000077, 0x00cc08ec},
1698 {0x00000078, 0x00000400},
1699 {0x00000079, 0x00000000},
1700 {0x0000007a, 0x04090000},
1701 {0x0000007c, 0x00000000},
1702 {0x0000007e, 0x4408a8e8},
1703 {0x0000007f, 0x00000304},
1704 {0x00000080, 0x00000000},
1705 {0x00000082, 0x00000001},
1706 {0x00000083, 0x00000002},
1707 {0x00000084, 0xf3e4f400},
1708 {0x00000085, 0x052024e3},
1709 {0x00000087, 0x00000000},
1710 {0x00000088, 0x01000000},
1711 {0x0000008a, 0x1c0a0000},
1712 {0x0000008b, 0xff010000},
1713 {0x0000008d, 0xffffefff},
1714 {0x0000008e, 0xfff3efff},
1715 {0x0000008f, 0xfff3efbf},
1716 {0x00000092, 0xf7ffffff},
1717 {0x00000093, 0xffffff7f},
1718 {0x00000095, 0x00101101},
1719 {0x00000096, 0x00000fff},
1720 {0x00000097, 0x00116fff},
1721 {0x00000098, 0x60010000},
1722 {0x00000099, 0x10010000},
1723 {0x0000009a, 0x00006000},
1724 {0x0000009b, 0x00001000},
1725 {0x0000009f, 0x00b48000}
1728 #define HAWAII_IO_MC_REGS_SIZE 22
1730 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1732 {0x0000007d, 0x40000000},
1733 {0x0000007e, 0x40180304},
1734 {0x0000007f, 0x0000ff00},
1735 {0x00000081, 0x00000000},
1736 {0x00000083, 0x00000800},
1737 {0x00000086, 0x00000000},
1738 {0x00000087, 0x00000100},
1739 {0x00000088, 0x00020100},
1740 {0x00000089, 0x00000000},
1741 {0x0000008b, 0x00040000},
1742 {0x0000008c, 0x00000100},
1743 {0x0000008e, 0xff010000},
1744 {0x00000090, 0xffffefff},
1745 {0x00000091, 0xfff3efff},
1746 {0x00000092, 0xfff3efbf},
1747 {0x00000093, 0xf7ffffff},
1748 {0x00000094, 0xffffff7f},
1749 {0x00000095, 0x00000fff},
1750 {0x00000096, 0x00116fff},
1751 {0x00000097, 0x60010000},
1752 {0x00000098, 0x10010000},
1753 {0x0000009f, 0x00c79000}
1758 * cik_srbm_select - select specific register instances
1760 * @rdev: radeon_device pointer
1761 * @me: selected ME (micro engine)
1766 * Switches the currently active registers instances. Some
1767 * registers are instanced per VMID, others are instanced per
1768 * me/pipe/queue combination.
1770 static void cik_srbm_select(struct radeon_device *rdev,
1771 u32 me, u32 pipe, u32 queue, u32 vmid)
1773 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1776 QUEUEID(queue & 0x7));
1777 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1782 * ci_mc_load_microcode - load MC ucode into the hw
1784 * @rdev: radeon_device pointer
1786 * Load the GDDR MC ucode into the hw (CIK).
1787 * Returns 0 on success, error on failure.
1789 int ci_mc_load_microcode(struct radeon_device *rdev)
1791 const __be32 *fw_data = NULL;
1792 const __le32 *new_fw_data = NULL;
1793 u32 running, blackout = 0, tmp;
1794 u32 *io_mc_regs = NULL;
1795 const __le32 *new_io_mc_regs = NULL;
1796 int i, regs_size, ucode_size;
1802 const struct mc_firmware_header_v1_0 *hdr =
1803 (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1805 radeon_ucode_print_mc_hdr(&hdr->header);
1807 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1808 new_io_mc_regs = (const __le32 *)
1809 (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1810 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1811 new_fw_data = (const __le32 *)
1812 (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1814 ucode_size = rdev->mc_fw->size / 4;
1816 switch (rdev->family) {
1818 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1819 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1822 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1823 regs_size = HAWAII_IO_MC_REGS_SIZE;
1828 fw_data = (const __be32 *)rdev->mc_fw->data;
1831 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1835 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1836 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1839 /* reset the engine and set to writable */
1840 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1841 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1843 /* load mc io regs */
1844 for (i = 0; i < regs_size; i++) {
1846 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1847 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1849 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1850 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1854 tmp = RREG32(MC_SEQ_MISC0);
1855 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1856 WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1857 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1858 WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1859 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1862 /* load the MC ucode */
1863 for (i = 0; i < ucode_size; i++) {
1865 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1867 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1870 /* put the engine back into the active state */
1871 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1872 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1873 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1875 /* wait for training to complete */
1876 for (i = 0; i < rdev->usec_timeout; i++) {
1877 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1881 for (i = 0; i < rdev->usec_timeout; i++) {
1882 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1888 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1895 * cik_init_microcode - load ucode images from disk
1897 * @rdev: radeon_device pointer
1899 * Use the firmware interface to load the ucode images into
1900 * the driver (not loaded into hw).
1901 * Returns 0 on success, error on failure.
1903 static int cik_init_microcode(struct radeon_device *rdev)
1905 const char *chip_name;
1906 const char *new_chip_name;
1907 size_t pfp_req_size, me_req_size, ce_req_size,
1908 mec_req_size, rlc_req_size, mc_req_size = 0,
1909 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1917 switch (rdev->family) {
1919 chip_name = "BONAIRE";
1920 new_chip_name = "bonaire";
1921 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1922 me_req_size = CIK_ME_UCODE_SIZE * 4;
1923 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1924 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1925 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1926 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1927 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1928 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1929 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1933 chip_name = "HAWAII";
1934 new_chip_name = "hawaii";
1935 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1936 me_req_size = CIK_ME_UCODE_SIZE * 4;
1937 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1938 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1939 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1940 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1941 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1942 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1943 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1947 chip_name = "KAVERI";
1948 new_chip_name = "kaveri";
1949 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1950 me_req_size = CIK_ME_UCODE_SIZE * 4;
1951 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1952 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1953 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1954 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1958 chip_name = "KABINI";
1959 new_chip_name = "kabini";
1960 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1961 me_req_size = CIK_ME_UCODE_SIZE * 4;
1962 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1963 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1964 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1965 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1969 chip_name = "MULLINS";
1970 new_chip_name = "mullins";
1971 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1972 me_req_size = CIK_ME_UCODE_SIZE * 4;
1973 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1974 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1975 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1976 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1982 DRM_INFO("Loading %s Microcode\n", new_chip_name);
1984 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1985 err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1987 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1988 err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1991 if (rdev->pfp_fw->size != pfp_req_size) {
1993 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1994 rdev->pfp_fw->size, fw_name);
1999 err = radeon_ucode_validate(rdev->pfp_fw);
2002 "cik_fw: validation failed for firmware \"%s\"\n",
2010 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2011 err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2013 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2014 err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2017 if (rdev->me_fw->size != me_req_size) {
2019 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2020 rdev->me_fw->size, fw_name);
2024 err = radeon_ucode_validate(rdev->me_fw);
2027 "cik_fw: validation failed for firmware \"%s\"\n",
2035 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2036 err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2038 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2039 err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2042 if (rdev->ce_fw->size != ce_req_size) {
2044 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2045 rdev->ce_fw->size, fw_name);
2049 err = radeon_ucode_validate(rdev->ce_fw);
2052 "cik_fw: validation failed for firmware \"%s\"\n",
2060 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2061 err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2063 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2064 err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2067 if (rdev->mec_fw->size != mec_req_size) {
2069 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2070 rdev->mec_fw->size, fw_name);
2074 err = radeon_ucode_validate(rdev->mec_fw);
2077 "cik_fw: validation failed for firmware \"%s\"\n",
2085 if (rdev->family == CHIP_KAVERI) {
2086 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2087 err = reject_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2091 err = radeon_ucode_validate(rdev->mec2_fw);
2100 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2101 err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2103 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2104 err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2107 if (rdev->rlc_fw->size != rlc_req_size) {
2109 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2110 rdev->rlc_fw->size, fw_name);
2114 err = radeon_ucode_validate(rdev->rlc_fw);
2117 "cik_fw: validation failed for firmware \"%s\"\n",
2125 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2126 err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2128 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2129 err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2132 if (rdev->sdma_fw->size != sdma_req_size) {
2134 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2135 rdev->sdma_fw->size, fw_name);
2139 err = radeon_ucode_validate(rdev->sdma_fw);
2142 "cik_fw: validation failed for firmware \"%s\"\n",
2150 /* No SMC, MC ucode on APUs */
2151 if (!(rdev->flags & RADEON_IS_IGP)) {
2152 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2153 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2155 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2156 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2158 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2159 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2163 if ((rdev->mc_fw->size != mc_req_size) &&
2164 (rdev->mc_fw->size != mc2_req_size)){
2166 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2167 rdev->mc_fw->size, fw_name);
2170 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2172 err = radeon_ucode_validate(rdev->mc_fw);
2175 "cik_fw: validation failed for firmware \"%s\"\n",
2183 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2184 err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2186 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2187 err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2190 "smc: error loading firmware \"%s\"\n",
2192 release_firmware(rdev->smc_fw);
2193 rdev->smc_fw = NULL;
2195 } else if (rdev->smc_fw->size != smc_req_size) {
2197 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2198 rdev->smc_fw->size, fw_name);
2202 err = radeon_ucode_validate(rdev->smc_fw);
2205 "cik_fw: validation failed for firmware \"%s\"\n",
2215 rdev->new_fw = false;
2216 } else if (new_fw < num_fw) {
2217 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2220 rdev->new_fw = true;
2227 "cik_cp: Failed to load firmware \"%s\"\n",
2229 release_firmware(rdev->pfp_fw);
2230 rdev->pfp_fw = NULL;
2231 release_firmware(rdev->me_fw);
2233 release_firmware(rdev->ce_fw);
2235 release_firmware(rdev->mec_fw);
2236 rdev->mec_fw = NULL;
2237 release_firmware(rdev->mec2_fw);
2238 rdev->mec2_fw = NULL;
2239 release_firmware(rdev->rlc_fw);
2240 rdev->rlc_fw = NULL;
2241 release_firmware(rdev->sdma_fw);
2242 rdev->sdma_fw = NULL;
2243 release_firmware(rdev->mc_fw);
2245 release_firmware(rdev->smc_fw);
2246 rdev->smc_fw = NULL;
2255 * cik_tiling_mode_table_init - init the hw tiling table
2257 * @rdev: radeon_device pointer
2259 * Starting with SI, the tiling setup is done globally in a
2260 * set of 32 tiling modes. Rather than selecting each set of
2261 * parameters per surface as on older asics, we just select
2262 * which index in the tiling table we want to use, and the
2263 * surface uses those parameters (CIK).
2265 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2267 const u32 num_tile_mode_states = 32;
2268 const u32 num_secondary_tile_mode_states = 16;
2269 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2270 u32 num_pipe_configs;
2271 u32 num_rbs = rdev->config.cik.max_backends_per_se *
2272 rdev->config.cik.max_shader_engines;
2274 switch (rdev->config.cik.mem_row_size_in_kb) {
2276 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2280 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2283 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2287 num_pipe_configs = rdev->config.cik.max_tile_pipes;
2288 if (num_pipe_configs > 8)
2289 num_pipe_configs = 16;
2291 if (num_pipe_configs == 16) {
2292 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2293 switch (reg_offset) {
2295 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2296 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2297 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2298 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2301 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2303 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2307 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2308 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2309 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2310 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2313 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2314 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2315 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2319 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2321 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2322 TILE_SPLIT(split_equal_to_row_size));
2325 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2326 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2330 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2331 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2332 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2336 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339 TILE_SPLIT(split_equal_to_row_size));
2342 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2343 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2346 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2347 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2351 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2352 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2353 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2357 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2358 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2359 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2364 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2365 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2369 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2370 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2374 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2380 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2381 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2382 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2383 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2386 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2387 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2392 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2393 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2397 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2399 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2403 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2404 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2405 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2410 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2411 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2419 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2421 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2422 switch (reg_offset) {
2424 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2426 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2427 NUM_BANKS(ADDR_SURF_16_BANK));
2430 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2431 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2432 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2433 NUM_BANKS(ADDR_SURF_16_BANK));
2436 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2438 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2439 NUM_BANKS(ADDR_SURF_16_BANK));
2442 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2444 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2445 NUM_BANKS(ADDR_SURF_16_BANK));
2448 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2450 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2451 NUM_BANKS(ADDR_SURF_8_BANK));
2454 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2456 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2457 NUM_BANKS(ADDR_SURF_4_BANK));
2460 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463 NUM_BANKS(ADDR_SURF_2_BANK));
2466 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2468 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2469 NUM_BANKS(ADDR_SURF_16_BANK));
2472 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2474 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2475 NUM_BANKS(ADDR_SURF_16_BANK));
2478 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2481 NUM_BANKS(ADDR_SURF_16_BANK));
2484 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487 NUM_BANKS(ADDR_SURF_8_BANK));
2490 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2493 NUM_BANKS(ADDR_SURF_4_BANK));
2496 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499 NUM_BANKS(ADDR_SURF_2_BANK));
2502 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2505 NUM_BANKS(ADDR_SURF_2_BANK));
2511 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2512 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2514 } else if (num_pipe_configs == 8) {
2515 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2516 switch (reg_offset) {
2518 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2519 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2520 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2524 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2526 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2530 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2531 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2532 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2536 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2542 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2544 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2545 TILE_SPLIT(split_equal_to_row_size));
2548 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2553 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2554 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2555 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2559 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2561 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562 TILE_SPLIT(split_equal_to_row_size));
2565 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2566 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2569 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2570 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2574 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2575 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2576 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2580 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2581 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2582 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2583 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2586 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2587 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2592 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2593 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2597 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2599 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2603 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2604 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2605 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2606 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2609 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2610 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2611 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2615 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2616 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2617 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2620 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2626 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2627 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2628 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2629 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2632 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2633 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2634 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2641 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2642 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2644 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2645 switch (reg_offset) {
2647 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2649 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2650 NUM_BANKS(ADDR_SURF_16_BANK));
2653 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2655 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2656 NUM_BANKS(ADDR_SURF_16_BANK));
2659 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2662 NUM_BANKS(ADDR_SURF_16_BANK));
2665 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2666 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2667 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2668 NUM_BANKS(ADDR_SURF_16_BANK));
2671 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2674 NUM_BANKS(ADDR_SURF_8_BANK));
2677 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2678 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2679 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2680 NUM_BANKS(ADDR_SURF_4_BANK));
2683 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2685 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2686 NUM_BANKS(ADDR_SURF_2_BANK));
2689 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2690 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2691 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2692 NUM_BANKS(ADDR_SURF_16_BANK));
2695 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2696 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2697 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2698 NUM_BANKS(ADDR_SURF_16_BANK));
2701 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2703 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2704 NUM_BANKS(ADDR_SURF_16_BANK));
2707 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2708 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2709 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2710 NUM_BANKS(ADDR_SURF_16_BANK));
2713 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2715 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2716 NUM_BANKS(ADDR_SURF_8_BANK));
2719 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2721 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2722 NUM_BANKS(ADDR_SURF_4_BANK));
2725 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2728 NUM_BANKS(ADDR_SURF_2_BANK));
2734 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2735 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2737 } else if (num_pipe_configs == 4) {
2739 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2740 switch (reg_offset) {
2742 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2743 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2744 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2748 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2750 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2754 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2755 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2756 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2760 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2766 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2767 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2768 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769 TILE_SPLIT(split_equal_to_row_size));
2772 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2777 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2778 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2779 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2783 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2784 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2785 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786 TILE_SPLIT(split_equal_to_row_size));
2789 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2790 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2793 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2794 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2798 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2799 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2800 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2804 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2805 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2806 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2807 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2810 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2811 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2812 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2816 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2817 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2818 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2821 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2823 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2827 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2828 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2829 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2830 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2833 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2834 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2835 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2839 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2840 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2841 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2844 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2845 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2846 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2847 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2850 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2851 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2852 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2853 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2856 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2857 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2858 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2865 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2866 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2868 } else if (num_rbs < 4) {
2869 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2870 switch (reg_offset) {
2872 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2874 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2875 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2878 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2879 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2880 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2881 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2884 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2886 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2887 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2890 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2891 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2892 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2893 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2896 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2899 TILE_SPLIT(split_equal_to_row_size));
2902 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2903 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2904 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2907 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2908 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2909 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2910 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2913 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2914 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2915 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2916 TILE_SPLIT(split_equal_to_row_size));
2919 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2920 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2923 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2924 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2925 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2928 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2930 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2931 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2934 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2935 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2936 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2937 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2946 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2947 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2948 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2951 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2952 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2954 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2957 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2958 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2959 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2960 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2963 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2964 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2966 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2969 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2970 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2971 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2974 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2975 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2976 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2977 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2980 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2981 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2982 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2983 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2986 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2987 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2988 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2989 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2995 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2996 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2999 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3000 switch (reg_offset) {
3002 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3003 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3004 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3005 NUM_BANKS(ADDR_SURF_16_BANK));
3008 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3010 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3011 NUM_BANKS(ADDR_SURF_16_BANK));
3014 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3015 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3016 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3017 NUM_BANKS(ADDR_SURF_16_BANK));
3020 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3022 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3023 NUM_BANKS(ADDR_SURF_16_BANK));
3026 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3027 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3028 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3029 NUM_BANKS(ADDR_SURF_16_BANK));
3032 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3033 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3034 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3035 NUM_BANKS(ADDR_SURF_8_BANK));
3038 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3040 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3041 NUM_BANKS(ADDR_SURF_4_BANK));
3044 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3045 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3046 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3047 NUM_BANKS(ADDR_SURF_16_BANK));
3050 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3051 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3052 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3053 NUM_BANKS(ADDR_SURF_16_BANK));
3056 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3058 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3059 NUM_BANKS(ADDR_SURF_16_BANK));
3062 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3063 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3064 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3065 NUM_BANKS(ADDR_SURF_16_BANK));
3068 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3070 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3071 NUM_BANKS(ADDR_SURF_16_BANK));
3074 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3077 NUM_BANKS(ADDR_SURF_8_BANK));
3080 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3081 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3082 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3083 NUM_BANKS(ADDR_SURF_4_BANK));
3089 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3090 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3092 } else if (num_pipe_configs == 2) {
3093 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3094 switch (reg_offset) {
3096 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3097 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3098 PIPE_CONFIG(ADDR_SURF_P2) |
3099 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3102 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3103 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3104 PIPE_CONFIG(ADDR_SURF_P2) |
3105 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3108 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3109 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3110 PIPE_CONFIG(ADDR_SURF_P2) |
3111 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3114 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3116 PIPE_CONFIG(ADDR_SURF_P2) |
3117 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3120 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3121 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3122 PIPE_CONFIG(ADDR_SURF_P2) |
3123 TILE_SPLIT(split_equal_to_row_size));
3126 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3127 PIPE_CONFIG(ADDR_SURF_P2) |
3128 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3131 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3132 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3133 PIPE_CONFIG(ADDR_SURF_P2) |
3134 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3137 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3138 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3139 PIPE_CONFIG(ADDR_SURF_P2) |
3140 TILE_SPLIT(split_equal_to_row_size));
3143 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3144 PIPE_CONFIG(ADDR_SURF_P2);
3147 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3148 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3149 PIPE_CONFIG(ADDR_SURF_P2));
3152 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3153 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3154 PIPE_CONFIG(ADDR_SURF_P2) |
3155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3158 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3159 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3160 PIPE_CONFIG(ADDR_SURF_P2) |
3161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3164 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3165 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3166 PIPE_CONFIG(ADDR_SURF_P2) |
3167 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3170 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3171 PIPE_CONFIG(ADDR_SURF_P2) |
3172 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3175 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3176 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3177 PIPE_CONFIG(ADDR_SURF_P2) |
3178 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3181 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3182 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3183 PIPE_CONFIG(ADDR_SURF_P2) |
3184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3187 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3188 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3189 PIPE_CONFIG(ADDR_SURF_P2) |
3190 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3193 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3194 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3195 PIPE_CONFIG(ADDR_SURF_P2));
3198 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3199 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3200 PIPE_CONFIG(ADDR_SURF_P2) |
3201 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3204 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3205 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3206 PIPE_CONFIG(ADDR_SURF_P2) |
3207 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3210 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3211 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3212 PIPE_CONFIG(ADDR_SURF_P2) |
3213 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3219 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3220 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3222 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3223 switch (reg_offset) {
3225 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3226 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3227 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3228 NUM_BANKS(ADDR_SURF_16_BANK));
3231 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3232 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3233 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3234 NUM_BANKS(ADDR_SURF_16_BANK));
3237 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3238 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3239 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3240 NUM_BANKS(ADDR_SURF_16_BANK));
3243 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3244 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3245 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3246 NUM_BANKS(ADDR_SURF_16_BANK));
3249 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3250 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3251 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3252 NUM_BANKS(ADDR_SURF_16_BANK));
3255 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3258 NUM_BANKS(ADDR_SURF_16_BANK));
3261 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3262 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3263 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3264 NUM_BANKS(ADDR_SURF_8_BANK));
3267 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3268 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3269 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3270 NUM_BANKS(ADDR_SURF_16_BANK));
3273 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3274 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3275 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3276 NUM_BANKS(ADDR_SURF_16_BANK));
3279 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3282 NUM_BANKS(ADDR_SURF_16_BANK));
3285 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3286 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3287 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3288 NUM_BANKS(ADDR_SURF_16_BANK));
3291 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3294 NUM_BANKS(ADDR_SURF_16_BANK));
3297 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3298 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3299 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3300 NUM_BANKS(ADDR_SURF_16_BANK));
3303 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3306 NUM_BANKS(ADDR_SURF_8_BANK));
3312 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3313 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3316 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3320 * cik_select_se_sh - select which SE, SH to address
3322 * @rdev: radeon_device pointer
3323 * @se_num: shader engine to address
3324 * @sh_num: sh block to address
3326 * Select which SE, SH combinations to address. Certain
3327 * registers are instanced per SE or SH. 0xffffffff means
3328 * broadcast to all SEs or SHs (CIK).
3330 static void cik_select_se_sh(struct radeon_device *rdev,
3331 u32 se_num, u32 sh_num)
3333 u32 data = INSTANCE_BROADCAST_WRITES;
3335 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3336 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3337 else if (se_num == 0xffffffff)
3338 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3339 else if (sh_num == 0xffffffff)
3340 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3342 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3343 WREG32(GRBM_GFX_INDEX, data);
3347 * cik_create_bitmask - create a bitmask
3349 * @bit_width: length of the mask
3351 * create a variable length bit mask (CIK).
3352 * Returns the bitmask.
3354 static u32 cik_create_bitmask(u32 bit_width)
3358 for (i = 0; i < bit_width; i++) {
3366 * cik_get_rb_disabled - computes the mask of disabled RBs
3368 * @rdev: radeon_device pointer
3369 * @max_rb_num: max RBs (render backends) for the asic
3370 * @se_num: number of SEs (shader engines) for the asic
3371 * @sh_per_se: number of SH blocks per SE for the asic
3373 * Calculates the bitmask of disabled RBs (CIK).
3374 * Returns the disabled RB bitmask.
3376 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3377 u32 max_rb_num_per_se,
3382 data = RREG32(CC_RB_BACKEND_DISABLE);
3384 data &= BACKEND_DISABLE_MASK;
3387 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3389 data >>= BACKEND_DISABLE_SHIFT;
3391 mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3397 * cik_setup_rb - setup the RBs on the asic
3399 * @rdev: radeon_device pointer
3400 * @se_num: number of SEs (shader engines) for the asic
3401 * @sh_per_se: number of SH blocks per SE for the asic
3402 * @max_rb_num: max RBs (render backends) for the asic
3404 * Configures per-SE/SH RB registers (CIK).
3406 static void cik_setup_rb(struct radeon_device *rdev,
3407 u32 se_num, u32 sh_per_se,
3408 u32 max_rb_num_per_se)
3412 u32 disabled_rbs = 0;
3413 u32 enabled_rbs = 0;
3415 mutex_lock(&rdev->grbm_idx_mutex);
3416 for (i = 0; i < se_num; i++) {
3417 for (j = 0; j < sh_per_se; j++) {
3418 cik_select_se_sh(rdev, i, j);
3419 data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3420 if (rdev->family == CHIP_HAWAII)
3421 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3423 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3426 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3427 mutex_unlock(&rdev->grbm_idx_mutex);
3430 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3431 if (!(disabled_rbs & mask))
3432 enabled_rbs |= mask;
3436 rdev->config.cik.backend_enable_mask = enabled_rbs;
3438 mutex_lock(&rdev->grbm_idx_mutex);
3439 for (i = 0; i < se_num; i++) {
3440 cik_select_se_sh(rdev, i, 0xffffffff);
3442 for (j = 0; j < sh_per_se; j++) {
3443 switch (enabled_rbs & 3) {
3446 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3448 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3451 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3454 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3458 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3463 WREG32(PA_SC_RASTER_CONFIG, data);
3465 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3466 mutex_unlock(&rdev->grbm_idx_mutex);
3470 * cik_gpu_init - setup the 3D engine
3472 * @rdev: radeon_device pointer
3474 * Configures the 3D engine and tiling configuration
3475 * registers so that the 3D engine is usable.
3477 static void cik_gpu_init(struct radeon_device *rdev)
3479 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3480 u32 mc_shared_chmap, mc_arb_ramcfg;
3481 u32 hdp_host_path_cntl;
3485 switch (rdev->family) {
3487 rdev->config.cik.max_shader_engines = 2;
3488 rdev->config.cik.max_tile_pipes = 4;
3489 rdev->config.cik.max_cu_per_sh = 7;
3490 rdev->config.cik.max_sh_per_se = 1;
3491 rdev->config.cik.max_backends_per_se = 2;
3492 rdev->config.cik.max_texture_channel_caches = 4;
3493 rdev->config.cik.max_gprs = 256;
3494 rdev->config.cik.max_gs_threads = 32;
3495 rdev->config.cik.max_hw_contexts = 8;
3497 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3498 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3499 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3500 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3501 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3504 rdev->config.cik.max_shader_engines = 4;
3505 rdev->config.cik.max_tile_pipes = 16;
3506 rdev->config.cik.max_cu_per_sh = 11;
3507 rdev->config.cik.max_sh_per_se = 1;
3508 rdev->config.cik.max_backends_per_se = 4;
3509 rdev->config.cik.max_texture_channel_caches = 16;
3510 rdev->config.cik.max_gprs = 256;
3511 rdev->config.cik.max_gs_threads = 32;
3512 rdev->config.cik.max_hw_contexts = 8;
3514 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3515 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3516 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3517 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3518 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3521 rdev->config.cik.max_shader_engines = 1;
3522 rdev->config.cik.max_tile_pipes = 4;
3523 rdev->config.cik.max_cu_per_sh = 8;
3524 rdev->config.cik.max_backends_per_se = 2;
3525 rdev->config.cik.max_sh_per_se = 1;
3526 rdev->config.cik.max_texture_channel_caches = 4;
3527 rdev->config.cik.max_gprs = 256;
3528 rdev->config.cik.max_gs_threads = 16;
3529 rdev->config.cik.max_hw_contexts = 8;
3531 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3532 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3533 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3534 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3535 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3540 rdev->config.cik.max_shader_engines = 1;
3541 rdev->config.cik.max_tile_pipes = 2;
3542 rdev->config.cik.max_cu_per_sh = 2;
3543 rdev->config.cik.max_sh_per_se = 1;
3544 rdev->config.cik.max_backends_per_se = 1;
3545 rdev->config.cik.max_texture_channel_caches = 2;
3546 rdev->config.cik.max_gprs = 256;
3547 rdev->config.cik.max_gs_threads = 16;
3548 rdev->config.cik.max_hw_contexts = 8;
3550 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3551 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3552 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3553 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3554 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3558 /* Initialize HDP */
3559 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3560 WREG32((0x2c14 + j), 0x00000000);
3561 WREG32((0x2c18 + j), 0x00000000);
3562 WREG32((0x2c1c + j), 0x00000000);
3563 WREG32((0x2c20 + j), 0x00000000);
3564 WREG32((0x2c24 + j), 0x00000000);
3567 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3568 WREG32(SRBM_INT_CNTL, 0x1);
3569 WREG32(SRBM_INT_ACK, 0x1);
3571 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3573 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3574 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3576 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3577 rdev->config.cik.mem_max_burst_length_bytes = 256;
3578 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3579 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3580 if (rdev->config.cik.mem_row_size_in_kb > 4)
3581 rdev->config.cik.mem_row_size_in_kb = 4;
3582 /* XXX use MC settings? */
3583 rdev->config.cik.shader_engine_tile_size = 32;
3584 rdev->config.cik.num_gpus = 1;
3585 rdev->config.cik.multi_gpu_tile_size = 64;
3587 /* fix up row size */
3588 gb_addr_config &= ~ROW_SIZE_MASK;
3589 switch (rdev->config.cik.mem_row_size_in_kb) {
3592 gb_addr_config |= ROW_SIZE(0);
3595 gb_addr_config |= ROW_SIZE(1);
3598 gb_addr_config |= ROW_SIZE(2);
3602 /* setup tiling info dword. gb_addr_config is not adequate since it does
3603 * not have bank info, so create a custom tiling dword.
3604 * bits 3:0 num_pipes
3605 * bits 7:4 num_banks
3606 * bits 11:8 group_size
3607 * bits 15:12 row_size
3609 rdev->config.cik.tile_config = 0;
3610 switch (rdev->config.cik.num_tile_pipes) {
3612 rdev->config.cik.tile_config |= (0 << 0);
3615 rdev->config.cik.tile_config |= (1 << 0);
3618 rdev->config.cik.tile_config |= (2 << 0);
3622 /* XXX what about 12? */
3623 rdev->config.cik.tile_config |= (3 << 0);
3626 rdev->config.cik.tile_config |=
3627 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3628 rdev->config.cik.tile_config |=
3629 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3630 rdev->config.cik.tile_config |=
3631 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3633 WREG32(GB_ADDR_CONFIG, gb_addr_config);
3634 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3635 WREG32(DMIF_ADDR_CALC, gb_addr_config);
3636 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3637 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3638 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3639 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3640 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3642 cik_tiling_mode_table_init(rdev);
3644 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3645 rdev->config.cik.max_sh_per_se,
3646 rdev->config.cik.max_backends_per_se);
3648 rdev->config.cik.active_cus = 0;
3649 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3650 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3651 rdev->config.cik.active_cus +=
3652 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3656 /* set HW defaults for 3D engine */
3657 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3659 mutex_lock(&rdev->grbm_idx_mutex);
3661 * making sure that the following register writes will be broadcasted
3662 * to all the shaders
3664 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3665 WREG32(SX_DEBUG_1, 0x20);
3667 WREG32(TA_CNTL_AUX, 0x00010000);
3669 tmp = RREG32(SPI_CONFIG_CNTL);
3671 WREG32(SPI_CONFIG_CNTL, tmp);
3673 WREG32(SQ_CONFIG, 1);
3675 WREG32(DB_DEBUG, 0);
3677 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3679 WREG32(DB_DEBUG2, tmp);
3681 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3683 WREG32(DB_DEBUG3, tmp);
3685 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3687 WREG32(CB_HW_CONTROL, tmp);
3689 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3691 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3692 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3693 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3694 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3696 WREG32(VGT_NUM_INSTANCES, 1);
3698 WREG32(CP_PERFMON_CNTL, 0);
3700 WREG32(SQ_CONFIG, 0);
3702 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3703 FORCE_EOV_MAX_REZ_CNT(255)));
3705 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3706 AUTO_INVLD_EN(ES_AND_GS_AUTO));
3708 WREG32(VGT_GS_VERTEX_REUSE, 16);
3709 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3711 tmp = RREG32(HDP_MISC_CNTL);
3712 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3713 WREG32(HDP_MISC_CNTL, tmp);
3715 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3716 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3718 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3719 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3720 mutex_unlock(&rdev->grbm_idx_mutex);
3726 * GPU scratch registers helpers function.
3729 * cik_scratch_init - setup driver info for CP scratch regs
3731 * @rdev: radeon_device pointer
3733 * Set up the number and offset of the CP scratch registers.
3734 * NOTE: use of CP scratch registers is a legacy inferface and
3735 * is not used by default on newer asics (r6xx+). On newer asics,
3736 * memory buffers are used for fences rather than scratch regs.
3738 static void cik_scratch_init(struct radeon_device *rdev)
3742 rdev->scratch.num_reg = 7;
3743 rdev->scratch.reg_base = SCRATCH_REG0;
3744 for (i = 0; i < rdev->scratch.num_reg; i++) {
3745 rdev->scratch.free[i] = true;
3746 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3751 * cik_ring_test - basic gfx ring test
3753 * @rdev: radeon_device pointer
3754 * @ring: radeon_ring structure holding ring information
3756 * Allocate a scratch register and write to it using the gfx ring (CIK).
3757 * Provides a basic gfx ring test to verify that the ring is working.
3758 * Used by cik_cp_gfx_resume();
3759 * Returns 0 on success, error on failure.
3761 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3768 r = radeon_scratch_get(rdev, &scratch);
3770 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3773 WREG32(scratch, 0xCAFEDEAD);
3774 r = radeon_ring_lock(rdev, ring, 3);
3776 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3777 radeon_scratch_free(rdev, scratch);
3780 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3781 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3782 radeon_ring_write(ring, 0xDEADBEEF);
3783 radeon_ring_unlock_commit(rdev, ring, false);
3785 for (i = 0; i < rdev->usec_timeout; i++) {
3786 tmp = RREG32(scratch);
3787 if (tmp == 0xDEADBEEF)
3791 if (i < rdev->usec_timeout) {
3792 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3794 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3795 ring->idx, scratch, tmp);
3798 radeon_scratch_free(rdev, scratch);
3803 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3805 * @rdev: radeon_device pointer
3806 * @ridx: radeon ring index
3808 * Emits an hdp flush on the cp.
3810 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3813 struct radeon_ring *ring = &rdev->ring[ridx];
3816 switch (ring->idx) {
3817 case CAYMAN_RING_TYPE_CP1_INDEX:
3818 case CAYMAN_RING_TYPE_CP2_INDEX:
3822 ref_and_mask = CP2 << ring->pipe;
3825 ref_and_mask = CP6 << ring->pipe;
3831 case RADEON_RING_TYPE_GFX_INDEX:
3836 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3837 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3838 WAIT_REG_MEM_FUNCTION(3) | /* == */
3839 WAIT_REG_MEM_ENGINE(1))); /* pfp */
3840 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3841 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3842 radeon_ring_write(ring, ref_and_mask);
3843 radeon_ring_write(ring, ref_and_mask);
3844 radeon_ring_write(ring, 0x20); /* poll interval */
3848 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3850 * @rdev: radeon_device pointer
3851 * @fence: radeon fence object
3853 * Emits a fence sequnce number on the gfx ring and flushes
3856 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3857 struct radeon_fence *fence)
3859 struct radeon_ring *ring = &rdev->ring[fence->ring];
3860 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3862 /* Workaround for cache flush problems. First send a dummy EOP
3863 * event down the pipe with seq one below.
3865 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3866 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3868 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3870 radeon_ring_write(ring, addr & 0xfffffffc);
3871 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3872 DATA_SEL(1) | INT_SEL(0));
3873 radeon_ring_write(ring, fence->seq - 1);
3874 radeon_ring_write(ring, 0);
3876 /* Then send the real EOP event down the pipe. */
3877 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3878 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3880 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3882 radeon_ring_write(ring, addr & 0xfffffffc);
3883 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3884 radeon_ring_write(ring, fence->seq);
3885 radeon_ring_write(ring, 0);
3889 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3891 * @rdev: radeon_device pointer
3892 * @fence: radeon fence object
3894 * Emits a fence sequnce number on the compute ring and flushes
3897 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3898 struct radeon_fence *fence)
3900 struct radeon_ring *ring = &rdev->ring[fence->ring];
3901 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3903 /* RELEASE_MEM - flush caches, send int */
3904 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3905 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3907 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3909 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3910 radeon_ring_write(ring, addr & 0xfffffffc);
3911 radeon_ring_write(ring, upper_32_bits(addr));
3912 radeon_ring_write(ring, fence->seq);
3913 radeon_ring_write(ring, 0);
3917 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3919 * @rdev: radeon_device pointer
3920 * @ring: radeon ring buffer object
3921 * @semaphore: radeon semaphore object
3922 * @emit_wait: Is this a sempahore wait?
3924 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3925 * from running ahead of semaphore waits.
3927 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3928 struct radeon_ring *ring,
3929 struct radeon_semaphore *semaphore,
3932 uint64_t addr = semaphore->gpu_addr;
3933 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3935 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3936 radeon_ring_write(ring, lower_32_bits(addr));
3937 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3939 if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3940 /* Prevent the PFP from running ahead of the semaphore wait */
3941 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3942 radeon_ring_write(ring, 0x0);
3949 * cik_copy_cpdma - copy pages using the CP DMA engine
3951 * @rdev: radeon_device pointer
3952 * @src_offset: src GPU address
3953 * @dst_offset: dst GPU address
3954 * @num_gpu_pages: number of GPU pages to xfer
3955 * @resv: reservation object to sync to
3957 * Copy GPU paging using the CP DMA engine (CIK+).
3958 * Used by the radeon ttm implementation to move pages if
3959 * registered as the asic copy callback.
3961 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3962 uint64_t src_offset, uint64_t dst_offset,
3963 unsigned num_gpu_pages,
3964 struct reservation_object *resv)
3966 struct radeon_fence *fence;
3967 struct radeon_sync sync;
3968 int ring_index = rdev->asic->copy.blit_ring_index;
3969 struct radeon_ring *ring = &rdev->ring[ring_index];
3970 u32 size_in_bytes, cur_size_in_bytes, control;
3974 radeon_sync_create(&sync);
3976 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3977 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3978 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3980 DRM_ERROR("radeon: moving bo (%d).\n", r);
3981 radeon_sync_free(rdev, &sync, NULL);
3985 radeon_sync_resv(rdev, &sync, resv, false);
3986 radeon_sync_rings(rdev, &sync, ring->idx);
3988 for (i = 0; i < num_loops; i++) {
3989 cur_size_in_bytes = size_in_bytes;
3990 if (cur_size_in_bytes > 0x1fffff)
3991 cur_size_in_bytes = 0x1fffff;
3992 size_in_bytes -= cur_size_in_bytes;
3994 if (size_in_bytes == 0)
3995 control |= PACKET3_DMA_DATA_CP_SYNC;
3996 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3997 radeon_ring_write(ring, control);
3998 radeon_ring_write(ring, lower_32_bits(src_offset));
3999 radeon_ring_write(ring, upper_32_bits(src_offset));
4000 radeon_ring_write(ring, lower_32_bits(dst_offset));
4001 radeon_ring_write(ring, upper_32_bits(dst_offset));
4002 radeon_ring_write(ring, cur_size_in_bytes);
4003 src_offset += cur_size_in_bytes;
4004 dst_offset += cur_size_in_bytes;
4007 r = radeon_fence_emit(rdev, &fence, ring->idx);
4009 radeon_ring_unlock_undo(rdev, ring);
4010 radeon_sync_free(rdev, &sync, NULL);
4014 radeon_ring_unlock_commit(rdev, ring, false);
4015 radeon_sync_free(rdev, &sync, fence);
4024 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4026 * @rdev: radeon_device pointer
4027 * @ib: radeon indirect buffer object
4029 * Emits an DE (drawing engine) or CE (constant engine) IB
4030 * on the gfx ring. IBs are usually generated by userspace
4031 * acceleration drivers and submitted to the kernel for
4032 * sheduling on the ring. This function schedules the IB
4033 * on the gfx ring for execution by the GPU.
4035 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4037 struct radeon_ring *ring = &rdev->ring[ib->ring];
4038 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4039 u32 header, control = INDIRECT_BUFFER_VALID;
4041 if (ib->is_const_ib) {
4042 /* set switch buffer packet before const IB */
4043 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4044 radeon_ring_write(ring, 0);
4046 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4049 if (ring->rptr_save_reg) {
4050 next_rptr = ring->wptr + 3 + 4;
4051 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4052 radeon_ring_write(ring, ((ring->rptr_save_reg -
4053 PACKET3_SET_UCONFIG_REG_START) >> 2));
4054 radeon_ring_write(ring, next_rptr);
4055 } else if (rdev->wb.enabled) {
4056 next_rptr = ring->wptr + 5 + 4;
4057 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4058 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4059 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4060 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4061 radeon_ring_write(ring, next_rptr);
4064 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4067 control |= ib->length_dw | (vm_id << 24);
4069 radeon_ring_write(ring, header);
4070 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
4071 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4072 radeon_ring_write(ring, control);
4076 * cik_ib_test - basic gfx ring IB test
4078 * @rdev: radeon_device pointer
4079 * @ring: radeon_ring structure holding ring information
4081 * Allocate an IB and execute it on the gfx ring (CIK).
4082 * Provides a basic gfx ring test to verify that IBs are working.
4083 * Returns 0 on success, error on failure.
4085 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4087 struct radeon_ib ib;
4093 r = radeon_scratch_get(rdev, &scratch);
4095 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4098 WREG32(scratch, 0xCAFEDEAD);
4099 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4101 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4102 radeon_scratch_free(rdev, scratch);
4105 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4106 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4107 ib.ptr[2] = 0xDEADBEEF;
4109 r = radeon_ib_schedule(rdev, &ib, NULL, false);
4111 radeon_scratch_free(rdev, scratch);
4112 radeon_ib_free(rdev, &ib);
4113 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4116 r = radeon_fence_wait(ib.fence, false);
4118 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4119 radeon_scratch_free(rdev, scratch);
4120 radeon_ib_free(rdev, &ib);
4123 for (i = 0; i < rdev->usec_timeout; i++) {
4124 tmp = RREG32(scratch);
4125 if (tmp == 0xDEADBEEF)
4129 if (i < rdev->usec_timeout) {
4130 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4132 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4136 radeon_scratch_free(rdev, scratch);
4137 radeon_ib_free(rdev, &ib);
4143 * On CIK, gfx and compute now have independant command processors.
4146 * Gfx consists of a single ring and can process both gfx jobs and
4147 * compute jobs. The gfx CP consists of three microengines (ME):
4148 * PFP - Pre-Fetch Parser
4150 * CE - Constant Engine
4151 * The PFP and ME make up what is considered the Drawing Engine (DE).
4152 * The CE is an asynchronous engine used for updating buffer desciptors
4153 * used by the DE so that they can be loaded into cache in parallel
4154 * while the DE is processing state update packets.
4157 * The compute CP consists of two microengines (ME):
4158 * MEC1 - Compute MicroEngine 1
4159 * MEC2 - Compute MicroEngine 2
4160 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4161 * The queues are exposed to userspace and are programmed directly
4162 * by the compute runtime.
4165 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4167 * @rdev: radeon_device pointer
4168 * @enable: enable or disable the MEs
4170 * Halts or unhalts the gfx MEs.
4172 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4175 WREG32(CP_ME_CNTL, 0);
4177 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4178 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4179 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4180 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4186 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4188 * @rdev: radeon_device pointer
4190 * Loads the gfx PFP, ME, and CE ucode.
4191 * Returns 0 for success, -EINVAL if the ucode is not available.
4193 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4197 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4200 cik_cp_gfx_enable(rdev, false);
4203 const struct gfx_firmware_header_v1_0 *pfp_hdr =
4204 (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4205 const struct gfx_firmware_header_v1_0 *ce_hdr =
4206 (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4207 const struct gfx_firmware_header_v1_0 *me_hdr =
4208 (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4209 const __le32 *fw_data;
4212 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4213 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4214 radeon_ucode_print_gfx_hdr(&me_hdr->header);
4217 fw_data = (const __le32 *)
4218 (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4219 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4220 WREG32(CP_PFP_UCODE_ADDR, 0);
4221 for (i = 0; i < fw_size; i++)
4222 WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4223 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4226 fw_data = (const __le32 *)
4227 (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4228 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4229 WREG32(CP_CE_UCODE_ADDR, 0);
4230 for (i = 0; i < fw_size; i++)
4231 WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4232 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4235 fw_data = (const __be32 *)
4236 (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4237 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4238 WREG32(CP_ME_RAM_WADDR, 0);
4239 for (i = 0; i < fw_size; i++)
4240 WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4241 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4242 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4244 const __be32 *fw_data;
4247 fw_data = (const __be32 *)rdev->pfp_fw->data;
4248 WREG32(CP_PFP_UCODE_ADDR, 0);
4249 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4250 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4251 WREG32(CP_PFP_UCODE_ADDR, 0);
4254 fw_data = (const __be32 *)rdev->ce_fw->data;
4255 WREG32(CP_CE_UCODE_ADDR, 0);
4256 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4257 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4258 WREG32(CP_CE_UCODE_ADDR, 0);
4261 fw_data = (const __be32 *)rdev->me_fw->data;
4262 WREG32(CP_ME_RAM_WADDR, 0);
4263 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4264 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4265 WREG32(CP_ME_RAM_WADDR, 0);
4272 * cik_cp_gfx_start - start the gfx ring
4274 * @rdev: radeon_device pointer
4276 * Enables the ring and loads the clear state context and other
4277 * packets required to init the ring.
4278 * Returns 0 for success, error for failure.
4280 static int cik_cp_gfx_start(struct radeon_device *rdev)
4282 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4286 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4287 WREG32(CP_ENDIAN_SWAP, 0);
4288 WREG32(CP_DEVICE_ID, 1);
4290 cik_cp_gfx_enable(rdev, true);
4292 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4294 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4298 /* init the CE partitions. CE only used for gfx on CIK */
4299 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4300 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4301 radeon_ring_write(ring, 0x8000);
4302 radeon_ring_write(ring, 0x8000);
4304 /* setup clear context state */
4305 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4306 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4308 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4309 radeon_ring_write(ring, 0x80000000);
4310 radeon_ring_write(ring, 0x80000000);
4312 for (i = 0; i < cik_default_size; i++)
4313 radeon_ring_write(ring, cik_default_state[i]);
4315 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4316 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4318 /* set clear context state */
4319 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4320 radeon_ring_write(ring, 0);
4322 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4323 radeon_ring_write(ring, 0x00000316);
4324 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4325 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4327 radeon_ring_unlock_commit(rdev, ring, false);
4333 * cik_cp_gfx_fini - stop the gfx ring
4335 * @rdev: radeon_device pointer
4337 * Stop the gfx ring and tear down the driver ring
4340 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4342 cik_cp_gfx_enable(rdev, false);
4343 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4347 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4349 * @rdev: radeon_device pointer
4351 * Program the location and size of the gfx ring buffer
4352 * and test it to make sure it's working.
4353 * Returns 0 for success, error for failure.
4355 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4357 struct radeon_ring *ring;
4363 WREG32(CP_SEM_WAIT_TIMER, 0x0);
4364 if (rdev->family != CHIP_HAWAII)
4365 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4367 /* Set the write pointer delay */
4368 WREG32(CP_RB_WPTR_DELAY, 0);
4370 /* set the RB to use vmid 0 */
4371 WREG32(CP_RB_VMID, 0);
4373 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4375 /* ring 0 - compute and gfx */
4376 /* Set ring buffer size */
4377 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4378 rb_bufsz = order_base_2(ring->ring_size / 8);
4379 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4381 tmp |= BUF_SWAP_32BIT;
4383 WREG32(CP_RB0_CNTL, tmp);
4385 /* Initialize the ring buffer's read and write pointers */
4386 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4388 WREG32(CP_RB0_WPTR, ring->wptr);
4390 /* set the wb address wether it's enabled or not */
4391 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4392 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4394 /* scratch register shadowing is no longer supported */
4395 WREG32(SCRATCH_UMSK, 0);
4397 if (!rdev->wb.enabled)
4398 tmp |= RB_NO_UPDATE;
4401 WREG32(CP_RB0_CNTL, tmp);
4403 rb_addr = ring->gpu_addr >> 8;
4404 WREG32(CP_RB0_BASE, rb_addr);
4405 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4407 /* start the ring */
4408 cik_cp_gfx_start(rdev);
4409 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4410 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4412 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4416 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4417 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4422 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4423 struct radeon_ring *ring)
4427 if (rdev->wb.enabled)
4428 rptr = rdev->wb.wb[ring->rptr_offs/4];
4430 rptr = RREG32(CP_RB0_RPTR);
4435 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4436 struct radeon_ring *ring)
4440 wptr = RREG32(CP_RB0_WPTR);
4445 void cik_gfx_set_wptr(struct radeon_device *rdev,
4446 struct radeon_ring *ring)
4448 WREG32(CP_RB0_WPTR, ring->wptr);
4449 (void)RREG32(CP_RB0_WPTR);
4452 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4453 struct radeon_ring *ring)
4457 if (rdev->wb.enabled) {
4458 rptr = rdev->wb.wb[ring->rptr_offs/4];
4460 mutex_lock(&rdev->srbm_mutex);
4461 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4462 rptr = RREG32(CP_HQD_PQ_RPTR);
4463 cik_srbm_select(rdev, 0, 0, 0, 0);
4464 mutex_unlock(&rdev->srbm_mutex);
4470 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4471 struct radeon_ring *ring)
4475 if (rdev->wb.enabled) {
4476 /* XXX check if swapping is necessary on BE */
4477 wptr = rdev->wb.wb[ring->wptr_offs/4];
4479 mutex_lock(&rdev->srbm_mutex);
4480 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4481 wptr = RREG32(CP_HQD_PQ_WPTR);
4482 cik_srbm_select(rdev, 0, 0, 0, 0);
4483 mutex_unlock(&rdev->srbm_mutex);
4489 void cik_compute_set_wptr(struct radeon_device *rdev,
4490 struct radeon_ring *ring)
4492 /* XXX check if swapping is necessary on BE */
4493 rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4494 WDOORBELL32(ring->doorbell_index, ring->wptr);
4497 static void cik_compute_stop(struct radeon_device *rdev,
4498 struct radeon_ring *ring)
4502 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4503 /* Disable wptr polling. */
4504 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4505 tmp &= ~WPTR_POLL_EN;
4506 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4508 if (RREG32(CP_HQD_ACTIVE) & 1) {
4509 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4510 for (j = 0; j < rdev->usec_timeout; j++) {
4511 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4515 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4516 WREG32(CP_HQD_PQ_RPTR, 0);
4517 WREG32(CP_HQD_PQ_WPTR, 0);
4519 cik_srbm_select(rdev, 0, 0, 0, 0);
4523 * cik_cp_compute_enable - enable/disable the compute CP MEs
4525 * @rdev: radeon_device pointer
4526 * @enable: enable or disable the MEs
4528 * Halts or unhalts the compute MEs.
4530 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4533 WREG32(CP_MEC_CNTL, 0);
4536 * To make hibernation reliable we need to clear compute ring
4537 * configuration before halting the compute ring.
4539 mutex_lock(&rdev->srbm_mutex);
4540 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4541 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4542 mutex_unlock(&rdev->srbm_mutex);
4544 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4545 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4546 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4552 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4554 * @rdev: radeon_device pointer
4556 * Loads the compute MEC1&2 ucode.
4557 * Returns 0 for success, -EINVAL if the ucode is not available.
4559 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4566 cik_cp_compute_enable(rdev, false);
4569 const struct gfx_firmware_header_v1_0 *mec_hdr =
4570 (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4571 const __le32 *fw_data;
4574 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4577 fw_data = (const __le32 *)
4578 (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4579 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4580 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4581 for (i = 0; i < fw_size; i++)
4582 WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4583 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4586 if (rdev->family == CHIP_KAVERI) {
4587 const struct gfx_firmware_header_v1_0 *mec2_hdr =
4588 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4590 fw_data = (const __le32 *)
4591 (rdev->mec2_fw->data +
4592 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4593 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4594 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4595 for (i = 0; i < fw_size; i++)
4596 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4597 WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4600 const __be32 *fw_data;
4603 fw_data = (const __be32 *)rdev->mec_fw->data;
4604 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4605 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4606 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4607 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4609 if (rdev->family == CHIP_KAVERI) {
4611 fw_data = (const __be32 *)rdev->mec_fw->data;
4612 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4613 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4614 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4615 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4623 * cik_cp_compute_start - start the compute queues
4625 * @rdev: radeon_device pointer
4627 * Enable the compute queues.
4628 * Returns 0 for success, error for failure.
4630 static int cik_cp_compute_start(struct radeon_device *rdev)
4632 cik_cp_compute_enable(rdev, true);
4638 * cik_cp_compute_fini - stop the compute queues
4640 * @rdev: radeon_device pointer
4642 * Stop the compute queues and tear down the driver queue
4645 static void cik_cp_compute_fini(struct radeon_device *rdev)
4649 cik_cp_compute_enable(rdev, false);
4651 for (i = 0; i < 2; i++) {
4653 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4655 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4657 if (rdev->ring[idx].mqd_obj) {
4658 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4659 if (unlikely(r != 0))
4660 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4662 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4663 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4665 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4666 rdev->ring[idx].mqd_obj = NULL;
4671 static void cik_mec_fini(struct radeon_device *rdev)
4675 if (rdev->mec.hpd_eop_obj) {
4676 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4677 if (unlikely(r != 0))
4678 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4679 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4680 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4682 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4683 rdev->mec.hpd_eop_obj = NULL;
4687 #define MEC_HPD_SIZE 2048
4689 static int cik_mec_init(struct radeon_device *rdev)
4695 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4696 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4697 * Nonetheless, we assign only 1 pipe because all other pipes will
4700 rdev->mec.num_mec = 1;
4701 rdev->mec.num_pipe = 1;
4702 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4704 if (rdev->mec.hpd_eop_obj == NULL) {
4705 r = radeon_bo_create(rdev,
4706 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4708 RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4709 &rdev->mec.hpd_eop_obj);
4711 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4716 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4717 if (unlikely(r != 0)) {
4721 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4722 &rdev->mec.hpd_eop_gpu_addr);
4724 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4728 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4730 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4735 /* clear memory. Not sure if this is required or not */
4736 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4738 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4739 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4744 struct hqd_registers
4746 u32 cp_mqd_base_addr;
4747 u32 cp_mqd_base_addr_hi;
4750 u32 cp_hqd_persistent_state;
4751 u32 cp_hqd_pipe_priority;
4752 u32 cp_hqd_queue_priority;
4755 u32 cp_hqd_pq_base_hi;
4757 u32 cp_hqd_pq_rptr_report_addr;
4758 u32 cp_hqd_pq_rptr_report_addr_hi;
4759 u32 cp_hqd_pq_wptr_poll_addr;
4760 u32 cp_hqd_pq_wptr_poll_addr_hi;
4761 u32 cp_hqd_pq_doorbell_control;
4763 u32 cp_hqd_pq_control;
4764 u32 cp_hqd_ib_base_addr;
4765 u32 cp_hqd_ib_base_addr_hi;
4767 u32 cp_hqd_ib_control;
4768 u32 cp_hqd_iq_timer;
4770 u32 cp_hqd_dequeue_request;
4771 u32 cp_hqd_dma_offload;
4772 u32 cp_hqd_sema_cmd;
4773 u32 cp_hqd_msg_type;
4774 u32 cp_hqd_atomic0_preop_lo;
4775 u32 cp_hqd_atomic0_preop_hi;
4776 u32 cp_hqd_atomic1_preop_lo;
4777 u32 cp_hqd_atomic1_preop_hi;
4778 u32 cp_hqd_hq_scheduler0;
4779 u32 cp_hqd_hq_scheduler1;
4786 u32 dispatch_initiator;
4790 u32 pipeline_stat_enable;
4791 u32 perf_counter_enable;
4797 u32 resource_limits;
4798 u32 static_thread_mgmt01[2];
4800 u32 static_thread_mgmt23[2];
4802 u32 thread_trace_enable;
4805 u32 vgtcs_invoke_count[2];
4806 struct hqd_registers queue_state;
4808 u32 interrupt_queue[64];
4812 * cik_cp_compute_resume - setup the compute queue registers
4814 * @rdev: radeon_device pointer
4816 * Program the compute queues and test them to make sure they
4818 * Returns 0 for success, error for failure.
4820 static int cik_cp_compute_resume(struct radeon_device *rdev)
4824 bool use_doorbell = true;
4830 struct bonaire_mqd *mqd;
4832 r = cik_cp_compute_start(rdev);
4836 /* fix up chicken bits */
4837 tmp = RREG32(CP_CPF_DEBUG);
4839 WREG32(CP_CPF_DEBUG, tmp);
4841 /* init the pipes */
4842 mutex_lock(&rdev->srbm_mutex);
4844 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4846 cik_srbm_select(rdev, 0, 0, 0, 0);
4848 /* write the EOP addr */
4849 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4850 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4852 /* set the VMID assigned */
4853 WREG32(CP_HPD_EOP_VMID, 0);
4855 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4856 tmp = RREG32(CP_HPD_EOP_CONTROL);
4857 tmp &= ~EOP_SIZE_MASK;
4858 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4859 WREG32(CP_HPD_EOP_CONTROL, tmp);
4861 mutex_unlock(&rdev->srbm_mutex);
4863 /* init the queues. Just two for now. */
4864 for (i = 0; i < 2; i++) {
4866 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4868 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4870 if (rdev->ring[idx].mqd_obj == NULL) {
4871 r = radeon_bo_create(rdev,
4872 sizeof(struct bonaire_mqd),
4874 RADEON_GEM_DOMAIN_GTT, 0, NULL,
4875 NULL, &rdev->ring[idx].mqd_obj);
4877 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4882 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4883 if (unlikely(r != 0)) {
4884 cik_cp_compute_fini(rdev);
4887 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4890 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4891 cik_cp_compute_fini(rdev);
4894 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4896 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4897 cik_cp_compute_fini(rdev);
4901 /* init the mqd struct */
4902 memset(buf, 0, sizeof(struct bonaire_mqd));
4904 mqd = (struct bonaire_mqd *)buf;
4905 mqd->header = 0xC0310800;
4906 mqd->static_thread_mgmt01[0] = 0xffffffff;
4907 mqd->static_thread_mgmt01[1] = 0xffffffff;
4908 mqd->static_thread_mgmt23[0] = 0xffffffff;
4909 mqd->static_thread_mgmt23[1] = 0xffffffff;
4911 mutex_lock(&rdev->srbm_mutex);
4912 cik_srbm_select(rdev, rdev->ring[idx].me,
4913 rdev->ring[idx].pipe,
4914 rdev->ring[idx].queue, 0);
4916 /* disable wptr polling */
4917 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4918 tmp &= ~WPTR_POLL_EN;
4919 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4921 /* enable doorbell? */
4922 mqd->queue_state.cp_hqd_pq_doorbell_control =
4923 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4925 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4927 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4928 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4929 mqd->queue_state.cp_hqd_pq_doorbell_control);
4931 /* disable the queue if it's active */
4932 mqd->queue_state.cp_hqd_dequeue_request = 0;
4933 mqd->queue_state.cp_hqd_pq_rptr = 0;
4934 mqd->queue_state.cp_hqd_pq_wptr= 0;
4935 if (RREG32(CP_HQD_ACTIVE) & 1) {
4936 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4937 for (j = 0; j < rdev->usec_timeout; j++) {
4938 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4942 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4943 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4944 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4947 /* set the pointer to the MQD */
4948 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4949 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4950 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4951 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4952 /* set MQD vmid to 0 */
4953 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4954 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4955 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4957 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4958 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4959 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4960 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4961 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4962 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4964 /* set up the HQD, this is similar to CP_RB0_CNTL */
4965 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4966 mqd->queue_state.cp_hqd_pq_control &=
4967 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4969 mqd->queue_state.cp_hqd_pq_control |=
4970 order_base_2(rdev->ring[idx].ring_size / 8);
4971 mqd->queue_state.cp_hqd_pq_control |=
4972 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4974 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4976 mqd->queue_state.cp_hqd_pq_control &=
4977 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4978 mqd->queue_state.cp_hqd_pq_control |=
4979 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4980 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4982 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4984 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4986 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4987 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4988 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4989 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4990 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4991 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4993 /* set the wb address wether it's enabled or not */
4995 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4997 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4998 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4999 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5000 upper_32_bits(wb_gpu_addr) & 0xffff;
5001 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5002 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5003 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5004 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5006 /* enable the doorbell if requested */
5008 mqd->queue_state.cp_hqd_pq_doorbell_control =
5009 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5010 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5011 mqd->queue_state.cp_hqd_pq_doorbell_control |=
5012 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5013 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5014 mqd->queue_state.cp_hqd_pq_doorbell_control &=
5015 ~(DOORBELL_SOURCE | DOORBELL_HIT);
5018 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5020 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5021 mqd->queue_state.cp_hqd_pq_doorbell_control);
5023 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5024 rdev->ring[idx].wptr = 0;
5025 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5026 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5027 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5029 /* set the vmid for the queue */
5030 mqd->queue_state.cp_hqd_vmid = 0;
5031 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5033 /* activate the queue */
5034 mqd->queue_state.cp_hqd_active = 1;
5035 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5037 cik_srbm_select(rdev, 0, 0, 0, 0);
5038 mutex_unlock(&rdev->srbm_mutex);
5040 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5041 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5043 rdev->ring[idx].ready = true;
5044 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5046 rdev->ring[idx].ready = false;
5052 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5054 cik_cp_gfx_enable(rdev, enable);
5055 cik_cp_compute_enable(rdev, enable);
5058 static int cik_cp_load_microcode(struct radeon_device *rdev)
5062 r = cik_cp_gfx_load_microcode(rdev);
5065 r = cik_cp_compute_load_microcode(rdev);
5072 static void cik_cp_fini(struct radeon_device *rdev)
5074 cik_cp_gfx_fini(rdev);
5075 cik_cp_compute_fini(rdev);
5078 static int cik_cp_resume(struct radeon_device *rdev)
5082 cik_enable_gui_idle_interrupt(rdev, false);
5084 r = cik_cp_load_microcode(rdev);
5088 r = cik_cp_gfx_resume(rdev);
5091 r = cik_cp_compute_resume(rdev);
5095 cik_enable_gui_idle_interrupt(rdev, true);
5100 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5102 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
5103 RREG32(GRBM_STATUS));
5104 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
5105 RREG32(GRBM_STATUS2));
5106 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
5107 RREG32(GRBM_STATUS_SE0));
5108 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
5109 RREG32(GRBM_STATUS_SE1));
5110 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
5111 RREG32(GRBM_STATUS_SE2));
5112 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
5113 RREG32(GRBM_STATUS_SE3));
5114 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
5115 RREG32(SRBM_STATUS));
5116 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
5117 RREG32(SRBM_STATUS2));
5118 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
5119 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5120 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
5121 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5122 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5123 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
5124 RREG32(CP_STALLED_STAT1));
5125 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
5126 RREG32(CP_STALLED_STAT2));
5127 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
5128 RREG32(CP_STALLED_STAT3));
5129 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
5130 RREG32(CP_CPF_BUSY_STAT));
5131 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
5132 RREG32(CP_CPF_STALLED_STAT1));
5133 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5134 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5135 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
5136 RREG32(CP_CPC_STALLED_STAT1));
5137 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5141 * cik_gpu_check_soft_reset - check which blocks are busy
5143 * @rdev: radeon_device pointer
5145 * Check which blocks are busy and return the relevant reset
5146 * mask to be used by cik_gpu_soft_reset().
5147 * Returns a mask of the blocks to be reset.
5149 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5155 tmp = RREG32(GRBM_STATUS);
5156 if (tmp & (PA_BUSY | SC_BUSY |
5157 BCI_BUSY | SX_BUSY |
5158 TA_BUSY | VGT_BUSY |
5160 GDS_BUSY | SPI_BUSY |
5161 IA_BUSY | IA_BUSY_NO_DMA))
5162 reset_mask |= RADEON_RESET_GFX;
5164 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5165 reset_mask |= RADEON_RESET_CP;
5168 tmp = RREG32(GRBM_STATUS2);
5170 reset_mask |= RADEON_RESET_RLC;
5172 /* SDMA0_STATUS_REG */
5173 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5174 if (!(tmp & SDMA_IDLE))
5175 reset_mask |= RADEON_RESET_DMA;
5177 /* SDMA1_STATUS_REG */
5178 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5179 if (!(tmp & SDMA_IDLE))
5180 reset_mask |= RADEON_RESET_DMA1;
5183 tmp = RREG32(SRBM_STATUS2);
5184 if (tmp & SDMA_BUSY)
5185 reset_mask |= RADEON_RESET_DMA;
5187 if (tmp & SDMA1_BUSY)
5188 reset_mask |= RADEON_RESET_DMA1;
5191 tmp = RREG32(SRBM_STATUS);
5194 reset_mask |= RADEON_RESET_IH;
5197 reset_mask |= RADEON_RESET_SEM;
5199 if (tmp & GRBM_RQ_PENDING)
5200 reset_mask |= RADEON_RESET_GRBM;
5203 reset_mask |= RADEON_RESET_VMC;
5205 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5206 MCC_BUSY | MCD_BUSY))
5207 reset_mask |= RADEON_RESET_MC;
5209 if (evergreen_is_display_hung(rdev))
5210 reset_mask |= RADEON_RESET_DISPLAY;
5212 /* Skip MC reset as it's mostly likely not hung, just busy */
5213 if (reset_mask & RADEON_RESET_MC) {
5214 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5215 reset_mask &= ~RADEON_RESET_MC;
5222 * cik_gpu_soft_reset - soft reset GPU
5224 * @rdev: radeon_device pointer
5225 * @reset_mask: mask of which blocks to reset
5227 * Soft reset the blocks specified in @reset_mask.
5229 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5231 struct evergreen_mc_save save;
5232 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5235 if (reset_mask == 0)
5238 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5240 cik_print_gpu_status_regs(rdev);
5241 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
5242 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5243 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5244 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5253 /* Disable GFX parsing/prefetching */
5254 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5256 /* Disable MEC parsing/prefetching */
5257 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5259 if (reset_mask & RADEON_RESET_DMA) {
5261 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5263 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5265 if (reset_mask & RADEON_RESET_DMA1) {
5267 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5269 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5272 evergreen_mc_stop(rdev, &save);
5273 if (evergreen_mc_wait_for_idle(rdev)) {
5274 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5277 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5278 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5280 if (reset_mask & RADEON_RESET_CP) {
5281 grbm_soft_reset |= SOFT_RESET_CP;
5283 srbm_soft_reset |= SOFT_RESET_GRBM;
5286 if (reset_mask & RADEON_RESET_DMA)
5287 srbm_soft_reset |= SOFT_RESET_SDMA;
5289 if (reset_mask & RADEON_RESET_DMA1)
5290 srbm_soft_reset |= SOFT_RESET_SDMA1;
5292 if (reset_mask & RADEON_RESET_DISPLAY)
5293 srbm_soft_reset |= SOFT_RESET_DC;
5295 if (reset_mask & RADEON_RESET_RLC)
5296 grbm_soft_reset |= SOFT_RESET_RLC;
5298 if (reset_mask & RADEON_RESET_SEM)
5299 srbm_soft_reset |= SOFT_RESET_SEM;
5301 if (reset_mask & RADEON_RESET_IH)
5302 srbm_soft_reset |= SOFT_RESET_IH;
5304 if (reset_mask & RADEON_RESET_GRBM)
5305 srbm_soft_reset |= SOFT_RESET_GRBM;
5307 if (reset_mask & RADEON_RESET_VMC)
5308 srbm_soft_reset |= SOFT_RESET_VMC;
5310 if (!(rdev->flags & RADEON_IS_IGP)) {
5311 if (reset_mask & RADEON_RESET_MC)
5312 srbm_soft_reset |= SOFT_RESET_MC;
5315 if (grbm_soft_reset) {
5316 tmp = RREG32(GRBM_SOFT_RESET);
5317 tmp |= grbm_soft_reset;
5318 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5319 WREG32(GRBM_SOFT_RESET, tmp);
5320 tmp = RREG32(GRBM_SOFT_RESET);
5324 tmp &= ~grbm_soft_reset;
5325 WREG32(GRBM_SOFT_RESET, tmp);
5326 tmp = RREG32(GRBM_SOFT_RESET);
5329 if (srbm_soft_reset) {
5330 tmp = RREG32(SRBM_SOFT_RESET);
5331 tmp |= srbm_soft_reset;
5332 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5333 WREG32(SRBM_SOFT_RESET, tmp);
5334 tmp = RREG32(SRBM_SOFT_RESET);
5338 tmp &= ~srbm_soft_reset;
5339 WREG32(SRBM_SOFT_RESET, tmp);
5340 tmp = RREG32(SRBM_SOFT_RESET);
5343 /* Wait a little for things to settle down */
5346 evergreen_mc_resume(rdev, &save);
5349 cik_print_gpu_status_regs(rdev);
5352 struct kv_reset_save_regs {
5353 u32 gmcon_reng_execute;
5358 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5359 struct kv_reset_save_regs *save)
5361 save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5362 save->gmcon_misc = RREG32(GMCON_MISC);
5363 save->gmcon_misc3 = RREG32(GMCON_MISC3);
5365 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5366 WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5367 STCTRL_STUTTER_EN));
5370 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5371 struct kv_reset_save_regs *save)
5375 WREG32(GMCON_PGFSM_WRITE, 0);
5376 WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5378 for (i = 0; i < 5; i++)
5379 WREG32(GMCON_PGFSM_WRITE, 0);
5381 WREG32(GMCON_PGFSM_WRITE, 0);
5382 WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5384 for (i = 0; i < 5; i++)
5385 WREG32(GMCON_PGFSM_WRITE, 0);
5387 WREG32(GMCON_PGFSM_WRITE, 0x210000);
5388 WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5390 for (i = 0; i < 5; i++)
5391 WREG32(GMCON_PGFSM_WRITE, 0);
5393 WREG32(GMCON_PGFSM_WRITE, 0x21003);
5394 WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5396 for (i = 0; i < 5; i++)
5397 WREG32(GMCON_PGFSM_WRITE, 0);
5399 WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5400 WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5402 for (i = 0; i < 5; i++)
5403 WREG32(GMCON_PGFSM_WRITE, 0);
5405 WREG32(GMCON_PGFSM_WRITE, 0);
5406 WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5408 for (i = 0; i < 5; i++)
5409 WREG32(GMCON_PGFSM_WRITE, 0);
5411 WREG32(GMCON_PGFSM_WRITE, 0x420000);
5412 WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5414 for (i = 0; i < 5; i++)
5415 WREG32(GMCON_PGFSM_WRITE, 0);
5417 WREG32(GMCON_PGFSM_WRITE, 0x120202);
5418 WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5420 for (i = 0; i < 5; i++)
5421 WREG32(GMCON_PGFSM_WRITE, 0);
5423 WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5424 WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5426 for (i = 0; i < 5; i++)
5427 WREG32(GMCON_PGFSM_WRITE, 0);
5429 WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5430 WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5432 for (i = 0; i < 5; i++)
5433 WREG32(GMCON_PGFSM_WRITE, 0);
5435 WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5436 WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5438 WREG32(GMCON_MISC3, save->gmcon_misc3);
5439 WREG32(GMCON_MISC, save->gmcon_misc);
5440 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5443 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5445 struct evergreen_mc_save save;
5446 struct kv_reset_save_regs kv_save = { 0 };
5449 dev_info(rdev->dev, "GPU pci config reset\n");
5457 /* Disable GFX parsing/prefetching */
5458 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5460 /* Disable MEC parsing/prefetching */
5461 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5464 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5466 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5468 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5470 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5471 /* XXX other engines? */
5473 /* halt the rlc, disable cp internal ints */
5478 /* disable mem access */
5479 evergreen_mc_stop(rdev, &save);
5480 if (evergreen_mc_wait_for_idle(rdev)) {
5481 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5484 if (rdev->flags & RADEON_IS_IGP)
5485 kv_save_regs_for_reset(rdev, &kv_save);
5488 pci_clear_master(rdev->pdev);
5490 radeon_pci_config_reset(rdev);
5494 /* wait for asic to come out of reset */
5495 for (i = 0; i < rdev->usec_timeout; i++) {
5496 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5501 /* does asic init need to be run first??? */
5502 if (rdev->flags & RADEON_IS_IGP)
5503 kv_restore_regs_for_reset(rdev, &kv_save);
5507 * cik_asic_reset - soft reset GPU
5509 * @rdev: radeon_device pointer
5511 * Look up which blocks are hung and attempt
5513 * Returns 0 for success.
5515 int cik_asic_reset(struct radeon_device *rdev)
5519 reset_mask = cik_gpu_check_soft_reset(rdev);
5522 r600_set_bios_scratch_engine_hung(rdev, true);
5524 /* try soft reset */
5525 cik_gpu_soft_reset(rdev, reset_mask);
5527 reset_mask = cik_gpu_check_soft_reset(rdev);
5529 /* try pci config reset */
5530 if (reset_mask && radeon_hard_reset)
5531 cik_gpu_pci_config_reset(rdev);
5533 reset_mask = cik_gpu_check_soft_reset(rdev);
5536 r600_set_bios_scratch_engine_hung(rdev, false);
5542 * cik_gfx_is_lockup - check if the 3D engine is locked up
5544 * @rdev: radeon_device pointer
5545 * @ring: radeon_ring structure holding ring information
5547 * Check if the 3D engine is locked up (CIK).
5548 * Returns true if the engine is locked, false if not.
5550 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5552 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5554 if (!(reset_mask & (RADEON_RESET_GFX |
5555 RADEON_RESET_COMPUTE |
5556 RADEON_RESET_CP))) {
5557 radeon_ring_lockup_update(rdev, ring);
5560 return radeon_ring_test_lockup(rdev, ring);
5565 * cik_mc_program - program the GPU memory controller
5567 * @rdev: radeon_device pointer
5569 * Set the location of vram, gart, and AGP in the GPU's
5570 * physical address space (CIK).
5572 static void cik_mc_program(struct radeon_device *rdev)
5574 struct evergreen_mc_save save;
5578 /* Initialize HDP */
5579 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5580 WREG32((0x2c14 + j), 0x00000000);
5581 WREG32((0x2c18 + j), 0x00000000);
5582 WREG32((0x2c1c + j), 0x00000000);
5583 WREG32((0x2c20 + j), 0x00000000);
5584 WREG32((0x2c24 + j), 0x00000000);
5586 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5588 evergreen_mc_stop(rdev, &save);
5589 if (radeon_mc_wait_for_idle(rdev)) {
5590 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5592 /* Lockout access through VGA aperture*/
5593 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5594 /* Update configuration */
5595 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5596 rdev->mc.vram_start >> 12);
5597 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5598 rdev->mc.vram_end >> 12);
5599 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5600 rdev->vram_scratch.gpu_addr >> 12);
5601 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5602 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5603 WREG32(MC_VM_FB_LOCATION, tmp);
5604 /* XXX double check these! */
5605 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5606 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5607 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5608 WREG32(MC_VM_AGP_BASE, 0);
5609 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5610 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5611 if (radeon_mc_wait_for_idle(rdev)) {
5612 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5614 evergreen_mc_resume(rdev, &save);
5615 /* we need to own VRAM, so turn off the VGA renderer here
5616 * to stop it overwriting our objects */
5617 rv515_vga_render_disable(rdev);
5621 * cik_mc_init - initialize the memory controller driver params
5623 * @rdev: radeon_device pointer
5625 * Look up the amount of vram, vram width, and decide how to place
5626 * vram and gart within the GPU's physical address space (CIK).
5627 * Returns 0 for success.
5629 static int cik_mc_init(struct radeon_device *rdev)
5632 int chansize, numchan;
5634 /* Get VRAM informations */
5635 rdev->mc.vram_is_ddr = true;
5636 tmp = RREG32(MC_ARB_RAMCFG);
5637 if (tmp & CHANSIZE_MASK) {
5642 tmp = RREG32(MC_SHARED_CHMAP);
5643 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5673 rdev->mc.vram_width = numchan * chansize;
5674 /* Could aper size report 0 ? */
5675 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5676 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5677 /* size in MB on si */
5678 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5679 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5680 rdev->mc.visible_vram_size = rdev->mc.aper_size;
5681 si_vram_gtt_location(rdev, &rdev->mc);
5682 radeon_update_bandwidth_info(rdev);
5689 * VMID 0 is the physical GPU addresses as used by the kernel.
5690 * VMIDs 1-15 are used for userspace clients and are handled
5691 * by the radeon vm/hsa code.
5694 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5696 * @rdev: radeon_device pointer
5698 * Flush the TLB for the VMID 0 page table (CIK).
5700 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5702 /* flush hdp cache */
5703 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5705 /* bits 0-15 are the VM contexts0-15 */
5706 WREG32(VM_INVALIDATE_REQUEST, 0x1);
5709 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5712 uint32_t sh_mem_bases, sh_mem_config;
5714 sh_mem_bases = 0x6000 | 0x6000 << 16;
5715 sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5716 sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5718 mutex_lock(&rdev->srbm_mutex);
5719 for (i = 8; i < 16; i++) {
5720 cik_srbm_select(rdev, 0, 0, 0, i);
5721 /* CP and shaders */
5722 WREG32(SH_MEM_CONFIG, sh_mem_config);
5723 WREG32(SH_MEM_APE1_BASE, 1);
5724 WREG32(SH_MEM_APE1_LIMIT, 0);
5725 WREG32(SH_MEM_BASES, sh_mem_bases);
5727 cik_srbm_select(rdev, 0, 0, 0, 0);
5728 mutex_unlock(&rdev->srbm_mutex);
5732 * cik_pcie_gart_enable - gart enable
5734 * @rdev: radeon_device pointer
5736 * This sets up the TLBs, programs the page tables for VMID0,
5737 * sets up the hw for VMIDs 1-15 which are allocated on
5738 * demand, and sets up the global locations for the LDS, GDS,
5739 * and GPUVM for FSA64 clients (CIK).
5740 * Returns 0 for success, errors for failure.
5742 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5746 if (rdev->gart.robj == NULL) {
5747 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5750 r = radeon_gart_table_vram_pin(rdev);
5753 /* Setup TLB control */
5754 WREG32(MC_VM_MX_L1_TLB_CNTL,
5757 ENABLE_L1_FRAGMENT_PROCESSING |
5758 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5759 ENABLE_ADVANCED_DRIVER_MODEL |
5760 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5761 /* Setup L2 cache */
5762 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5763 ENABLE_L2_FRAGMENT_PROCESSING |
5764 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5765 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5766 EFFECTIVE_L2_QUEUE_SIZE(7) |
5767 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5768 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5769 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5771 L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5772 /* setup context0 */
5773 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5774 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5775 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5776 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5777 (u32)(rdev->dummy_page.addr >> 12));
5778 WREG32(VM_CONTEXT0_CNTL2, 0);
5779 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5780 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5786 /* restore context1-15 */
5787 /* set vm size, must be a multiple of 4 */
5788 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5789 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5790 for (i = 1; i < 16; i++) {
5792 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5793 rdev->vm_manager.saved_table_addr[i]);
5795 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5796 rdev->vm_manager.saved_table_addr[i]);
5799 /* enable context1-15 */
5800 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5801 (u32)(rdev->dummy_page.addr >> 12));
5802 WREG32(VM_CONTEXT1_CNTL2, 4);
5803 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5804 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5805 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5806 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5807 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5808 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5809 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5810 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5811 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5812 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5813 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5814 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5815 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5816 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5818 if (rdev->family == CHIP_KAVERI) {
5819 u32 tmp = RREG32(CHUB_CONTROL);
5821 WREG32(CHUB_CONTROL, tmp);
5824 /* XXX SH_MEM regs */
5825 /* where to put LDS, scratch, GPUVM in FSA64 space */
5826 mutex_lock(&rdev->srbm_mutex);
5827 for (i = 0; i < 16; i++) {
5828 cik_srbm_select(rdev, 0, 0, 0, i);
5829 /* CP and shaders */
5830 WREG32(SH_MEM_CONFIG, 0);
5831 WREG32(SH_MEM_APE1_BASE, 1);
5832 WREG32(SH_MEM_APE1_LIMIT, 0);
5833 WREG32(SH_MEM_BASES, 0);
5835 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5836 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5837 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5838 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5839 /* XXX SDMA RLC - todo */
5841 cik_srbm_select(rdev, 0, 0, 0, 0);
5842 mutex_unlock(&rdev->srbm_mutex);
5844 cik_pcie_init_compute_vmid(rdev);
5846 cik_pcie_gart_tlb_flush(rdev);
5847 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5848 (unsigned)(rdev->mc.gtt_size >> 20),
5849 (unsigned long long)rdev->gart.table_addr);
5850 rdev->gart.ready = true;
5855 * cik_pcie_gart_disable - gart disable
5857 * @rdev: radeon_device pointer
5859 * This disables all VM page table (CIK).
5861 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5865 for (i = 1; i < 16; ++i) {
5868 reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5870 reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5871 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5874 /* Disable all tables */
5875 WREG32(VM_CONTEXT0_CNTL, 0);
5876 WREG32(VM_CONTEXT1_CNTL, 0);
5877 /* Setup TLB control */
5878 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5879 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5880 /* Setup L2 cache */
5882 ENABLE_L2_FRAGMENT_PROCESSING |
5883 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5884 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5885 EFFECTIVE_L2_QUEUE_SIZE(7) |
5886 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5887 WREG32(VM_L2_CNTL2, 0);
5888 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5889 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5890 radeon_gart_table_vram_unpin(rdev);
5894 * cik_pcie_gart_fini - vm fini callback
5896 * @rdev: radeon_device pointer
5898 * Tears down the driver GART/VM setup (CIK).
5900 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5902 cik_pcie_gart_disable(rdev);
5903 radeon_gart_table_vram_free(rdev);
5904 radeon_gart_fini(rdev);
5909 * cik_ib_parse - vm ib_parse callback
5911 * @rdev: radeon_device pointer
5912 * @ib: indirect buffer pointer
5914 * CIK uses hw IB checking so this is a nop (CIK).
5916 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5923 * VMID 0 is the physical GPU addresses as used by the kernel.
5924 * VMIDs 1-15 are used for userspace clients and are handled
5925 * by the radeon vm/hsa code.
5928 * cik_vm_init - cik vm init callback
5930 * @rdev: radeon_device pointer
5932 * Inits cik specific vm parameters (number of VMs, base of vram for
5933 * VMIDs 1-15) (CIK).
5934 * Returns 0 for success.
5936 int cik_vm_init(struct radeon_device *rdev)
5940 * VMID 0 is reserved for System
5941 * radeon graphics/compute will use VMIDs 1-7
5942 * amdkfd will use VMIDs 8-15
5944 rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5945 /* base offset of vram pages */
5946 if (rdev->flags & RADEON_IS_IGP) {
5947 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5949 rdev->vm_manager.vram_base_offset = tmp;
5951 rdev->vm_manager.vram_base_offset = 0;
5957 * cik_vm_fini - cik vm fini callback
5959 * @rdev: radeon_device pointer
5961 * Tear down any asic specific VM setup (CIK).
5963 void cik_vm_fini(struct radeon_device *rdev)
5968 * cik_vm_decode_fault - print human readable fault info
5970 * @rdev: radeon_device pointer
5971 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5972 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5974 * Print human readable fault information (CIK).
5976 static void cik_vm_decode_fault(struct radeon_device *rdev,
5977 u32 status, u32 addr, u32 mc_client)
5980 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5981 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5982 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5983 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5985 if (rdev->family == CHIP_HAWAII)
5986 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5988 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5990 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5991 protections, vmid, addr,
5992 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5993 block, mc_client, mc_id);
5997 * cik_vm_flush - cik vm flush using the CP
5999 * @rdev: radeon_device pointer
6001 * Update the page table base and flush the VM TLB
6002 * using the CP (CIK).
6004 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6005 unsigned vm_id, uint64_t pd_addr)
6007 int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6009 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6010 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6011 WRITE_DATA_DST_SEL(0)));
6013 radeon_ring_write(ring,
6014 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6016 radeon_ring_write(ring,
6017 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6019 radeon_ring_write(ring, 0);
6020 radeon_ring_write(ring, pd_addr >> 12);
6022 /* update SH_MEM_* regs */
6023 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6024 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6025 WRITE_DATA_DST_SEL(0)));
6026 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6027 radeon_ring_write(ring, 0);
6028 radeon_ring_write(ring, VMID(vm_id));
6030 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6031 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6032 WRITE_DATA_DST_SEL(0)));
6033 radeon_ring_write(ring, SH_MEM_BASES >> 2);
6034 radeon_ring_write(ring, 0);
6036 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6037 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6038 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6039 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6041 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6042 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6043 WRITE_DATA_DST_SEL(0)));
6044 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6045 radeon_ring_write(ring, 0);
6046 radeon_ring_write(ring, VMID(0));
6049 cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6051 /* bits 0-15 are the VM contexts0-15 */
6052 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6053 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6054 WRITE_DATA_DST_SEL(0)));
6055 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6056 radeon_ring_write(ring, 0);
6057 radeon_ring_write(ring, 1 << vm_id);
6059 /* wait for the invalidate to complete */
6060 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6061 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6062 WAIT_REG_MEM_FUNCTION(0) | /* always */
6063 WAIT_REG_MEM_ENGINE(0))); /* me */
6064 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6065 radeon_ring_write(ring, 0);
6066 radeon_ring_write(ring, 0); /* ref */
6067 radeon_ring_write(ring, 0); /* mask */
6068 radeon_ring_write(ring, 0x20); /* poll interval */
6070 /* compute doesn't have PFP */
6072 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6073 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6074 radeon_ring_write(ring, 0x0);
6080 * The RLC is a multi-purpose microengine that handles a
6081 * variety of functions, the most important of which is
6082 * the interrupt controller.
6084 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6087 u32 tmp = RREG32(CP_INT_CNTL_RING0);
6090 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6092 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6093 WREG32(CP_INT_CNTL_RING0, tmp);
6096 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6100 tmp = RREG32(RLC_LB_CNTL);
6102 tmp |= LOAD_BALANCE_ENABLE;
6104 tmp &= ~LOAD_BALANCE_ENABLE;
6105 WREG32(RLC_LB_CNTL, tmp);
6108 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6113 mutex_lock(&rdev->grbm_idx_mutex);
6114 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6115 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6116 cik_select_se_sh(rdev, i, j);
6117 for (k = 0; k < rdev->usec_timeout; k++) {
6118 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6124 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6125 mutex_unlock(&rdev->grbm_idx_mutex);
6127 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6128 for (k = 0; k < rdev->usec_timeout; k++) {
6129 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6135 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6139 tmp = RREG32(RLC_CNTL);
6141 WREG32(RLC_CNTL, rlc);
6144 static u32 cik_halt_rlc(struct radeon_device *rdev)
6148 orig = data = RREG32(RLC_CNTL);
6150 if (data & RLC_ENABLE) {
6153 data &= ~RLC_ENABLE;
6154 WREG32(RLC_CNTL, data);
6156 for (i = 0; i < rdev->usec_timeout; i++) {
6157 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6162 cik_wait_for_rlc_serdes(rdev);
6168 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6172 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6173 WREG32(RLC_GPR_REG2, tmp);
6175 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6176 for (i = 0; i < rdev->usec_timeout; i++) {
6177 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6182 for (i = 0; i < rdev->usec_timeout; i++) {
6183 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6189 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6193 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6194 WREG32(RLC_GPR_REG2, tmp);
6198 * cik_rlc_stop - stop the RLC ME
6200 * @rdev: radeon_device pointer
6202 * Halt the RLC ME (MicroEngine) (CIK).
6204 static void cik_rlc_stop(struct radeon_device *rdev)
6206 WREG32(RLC_CNTL, 0);
6208 cik_enable_gui_idle_interrupt(rdev, false);
6210 cik_wait_for_rlc_serdes(rdev);
6214 * cik_rlc_start - start the RLC ME
6216 * @rdev: radeon_device pointer
6218 * Unhalt the RLC ME (MicroEngine) (CIK).
6220 static void cik_rlc_start(struct radeon_device *rdev)
6222 WREG32(RLC_CNTL, RLC_ENABLE);
6224 cik_enable_gui_idle_interrupt(rdev, true);
6230 * cik_rlc_resume - setup the RLC hw
6232 * @rdev: radeon_device pointer
6234 * Initialize the RLC registers, load the ucode,
6235 * and start the RLC (CIK).
6236 * Returns 0 for success, -EINVAL if the ucode is not available.
6238 static int cik_rlc_resume(struct radeon_device *rdev)
6248 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6249 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6257 WREG32(RLC_LB_CNTR_INIT, 0);
6258 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6260 mutex_lock(&rdev->grbm_idx_mutex);
6261 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6262 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6263 WREG32(RLC_LB_PARAMS, 0x00600408);
6264 WREG32(RLC_LB_CNTL, 0x80000004);
6265 mutex_unlock(&rdev->grbm_idx_mutex);
6267 WREG32(RLC_MC_CNTL, 0);
6268 WREG32(RLC_UCODE_CNTL, 0);
6271 const struct rlc_firmware_header_v1_0 *hdr =
6272 (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6273 const __le32 *fw_data = (const __le32 *)
6274 (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6276 radeon_ucode_print_rlc_hdr(&hdr->header);
6278 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6279 WREG32(RLC_GPM_UCODE_ADDR, 0);
6280 for (i = 0; i < size; i++)
6281 WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6282 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6284 const __be32 *fw_data;
6286 switch (rdev->family) {
6290 size = BONAIRE_RLC_UCODE_SIZE;
6293 size = KV_RLC_UCODE_SIZE;
6296 size = KB_RLC_UCODE_SIZE;
6299 size = ML_RLC_UCODE_SIZE;
6303 fw_data = (const __be32 *)rdev->rlc_fw->data;
6304 WREG32(RLC_GPM_UCODE_ADDR, 0);
6305 for (i = 0; i < size; i++)
6306 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6307 WREG32(RLC_GPM_UCODE_ADDR, 0);
6310 /* XXX - find out what chips support lbpw */
6311 cik_enable_lbpw(rdev, false);
6313 if (rdev->family == CHIP_BONAIRE)
6314 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6316 cik_rlc_start(rdev);
6321 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6323 u32 data, orig, tmp, tmp2;
6325 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6327 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6328 cik_enable_gui_idle_interrupt(rdev, true);
6330 tmp = cik_halt_rlc(rdev);
6332 mutex_lock(&rdev->grbm_idx_mutex);
6333 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6334 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6335 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6336 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6337 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6338 mutex_unlock(&rdev->grbm_idx_mutex);
6340 cik_update_rlc(rdev, tmp);
6342 data |= CGCG_EN | CGLS_EN;
6344 cik_enable_gui_idle_interrupt(rdev, false);
6346 RREG32(CB_CGTT_SCLK_CTRL);
6347 RREG32(CB_CGTT_SCLK_CTRL);
6348 RREG32(CB_CGTT_SCLK_CTRL);
6349 RREG32(CB_CGTT_SCLK_CTRL);
6351 data &= ~(CGCG_EN | CGLS_EN);
6355 WREG32(RLC_CGCG_CGLS_CTRL, data);
6359 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6361 u32 data, orig, tmp = 0;
6363 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6364 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6365 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6366 orig = data = RREG32(CP_MEM_SLP_CNTL);
6367 data |= CP_MEM_LS_EN;
6369 WREG32(CP_MEM_SLP_CNTL, data);
6373 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6377 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6379 tmp = cik_halt_rlc(rdev);
6381 mutex_lock(&rdev->grbm_idx_mutex);
6382 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6383 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6384 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6385 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6386 WREG32(RLC_SERDES_WR_CTRL, data);
6387 mutex_unlock(&rdev->grbm_idx_mutex);
6389 cik_update_rlc(rdev, tmp);
6391 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6392 orig = data = RREG32(CGTS_SM_CTRL_REG);
6393 data &= ~SM_MODE_MASK;
6394 data |= SM_MODE(0x2);
6395 data |= SM_MODE_ENABLE;
6396 data &= ~CGTS_OVERRIDE;
6397 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6398 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6399 data &= ~CGTS_LS_OVERRIDE;
6400 data &= ~ON_MONITOR_ADD_MASK;
6401 data |= ON_MONITOR_ADD_EN;
6402 data |= ON_MONITOR_ADD(0x96);
6404 WREG32(CGTS_SM_CTRL_REG, data);
6407 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6410 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6412 data = RREG32(RLC_MEM_SLP_CNTL);
6413 if (data & RLC_MEM_LS_EN) {
6414 data &= ~RLC_MEM_LS_EN;
6415 WREG32(RLC_MEM_SLP_CNTL, data);
6418 data = RREG32(CP_MEM_SLP_CNTL);
6419 if (data & CP_MEM_LS_EN) {
6420 data &= ~CP_MEM_LS_EN;
6421 WREG32(CP_MEM_SLP_CNTL, data);
6424 orig = data = RREG32(CGTS_SM_CTRL_REG);
6425 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6427 WREG32(CGTS_SM_CTRL_REG, data);
6429 tmp = cik_halt_rlc(rdev);
6431 mutex_lock(&rdev->grbm_idx_mutex);
6432 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6433 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6434 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6435 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6436 WREG32(RLC_SERDES_WR_CTRL, data);
6437 mutex_unlock(&rdev->grbm_idx_mutex);
6439 cik_update_rlc(rdev, tmp);
6443 static const u32 mc_cg_registers[] =
6456 static void cik_enable_mc_ls(struct radeon_device *rdev,
6462 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6463 orig = data = RREG32(mc_cg_registers[i]);
6464 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6465 data |= MC_LS_ENABLE;
6467 data &= ~MC_LS_ENABLE;
6469 WREG32(mc_cg_registers[i], data);
6473 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6479 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6480 orig = data = RREG32(mc_cg_registers[i]);
6481 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6482 data |= MC_CG_ENABLE;
6484 data &= ~MC_CG_ENABLE;
6486 WREG32(mc_cg_registers[i], data);
6490 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6495 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6496 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6497 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6499 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6502 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6504 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6507 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6511 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6516 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6517 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6520 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6522 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6525 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6527 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6530 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6532 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6535 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6539 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6544 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6545 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6547 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6549 orig = data = RREG32(UVD_CGC_CTRL);
6552 WREG32(UVD_CGC_CTRL, data);
6554 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6556 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6558 orig = data = RREG32(UVD_CGC_CTRL);
6561 WREG32(UVD_CGC_CTRL, data);
6565 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6570 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6572 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6573 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6574 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6576 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6577 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6580 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6583 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6588 orig = data = RREG32(HDP_HOST_PATH_CNTL);
6590 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6591 data &= ~CLOCK_GATING_DIS;
6593 data |= CLOCK_GATING_DIS;
6596 WREG32(HDP_HOST_PATH_CNTL, data);
6599 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6604 orig = data = RREG32(HDP_MEM_POWER_LS);
6606 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6607 data |= HDP_LS_ENABLE;
6609 data &= ~HDP_LS_ENABLE;
6612 WREG32(HDP_MEM_POWER_LS, data);
6615 void cik_update_cg(struct radeon_device *rdev,
6616 u32 block, bool enable)
6619 if (block & RADEON_CG_BLOCK_GFX) {
6620 cik_enable_gui_idle_interrupt(rdev, false);
6621 /* order matters! */
6623 cik_enable_mgcg(rdev, true);
6624 cik_enable_cgcg(rdev, true);
6626 cik_enable_cgcg(rdev, false);
6627 cik_enable_mgcg(rdev, false);
6629 cik_enable_gui_idle_interrupt(rdev, true);
6632 if (block & RADEON_CG_BLOCK_MC) {
6633 if (!(rdev->flags & RADEON_IS_IGP)) {
6634 cik_enable_mc_mgcg(rdev, enable);
6635 cik_enable_mc_ls(rdev, enable);
6639 if (block & RADEON_CG_BLOCK_SDMA) {
6640 cik_enable_sdma_mgcg(rdev, enable);
6641 cik_enable_sdma_mgls(rdev, enable);
6644 if (block & RADEON_CG_BLOCK_BIF) {
6645 cik_enable_bif_mgls(rdev, enable);
6648 if (block & RADEON_CG_BLOCK_UVD) {
6650 cik_enable_uvd_mgcg(rdev, enable);
6653 if (block & RADEON_CG_BLOCK_HDP) {
6654 cik_enable_hdp_mgcg(rdev, enable);
6655 cik_enable_hdp_ls(rdev, enable);
6658 if (block & RADEON_CG_BLOCK_VCE) {
6659 vce_v2_0_enable_mgcg(rdev, enable);
6663 static void cik_init_cg(struct radeon_device *rdev)
6666 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6669 si_init_uvd_internal_cg(rdev);
6671 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6672 RADEON_CG_BLOCK_SDMA |
6673 RADEON_CG_BLOCK_BIF |
6674 RADEON_CG_BLOCK_UVD |
6675 RADEON_CG_BLOCK_HDP), true);
6678 static void cik_fini_cg(struct radeon_device *rdev)
6680 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6681 RADEON_CG_BLOCK_SDMA |
6682 RADEON_CG_BLOCK_BIF |
6683 RADEON_CG_BLOCK_UVD |
6684 RADEON_CG_BLOCK_HDP), false);
6686 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6689 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6694 orig = data = RREG32(RLC_PG_CNTL);
6695 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6696 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6698 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6700 WREG32(RLC_PG_CNTL, data);
6703 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6708 orig = data = RREG32(RLC_PG_CNTL);
6709 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6710 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6712 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6714 WREG32(RLC_PG_CNTL, data);
6717 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6721 orig = data = RREG32(RLC_PG_CNTL);
6722 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6723 data &= ~DISABLE_CP_PG;
6725 data |= DISABLE_CP_PG;
6727 WREG32(RLC_PG_CNTL, data);
6730 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6734 orig = data = RREG32(RLC_PG_CNTL);
6735 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6736 data &= ~DISABLE_GDS_PG;
6738 data |= DISABLE_GDS_PG;
6740 WREG32(RLC_PG_CNTL, data);
6743 #define CP_ME_TABLE_SIZE 96
6744 #define CP_ME_TABLE_OFFSET 2048
6745 #define CP_MEC_TABLE_OFFSET 4096
6747 void cik_init_cp_pg_table(struct radeon_device *rdev)
6749 volatile u32 *dst_ptr;
6750 int me, i, max_me = 4;
6752 u32 table_offset, table_size;
6754 if (rdev->family == CHIP_KAVERI)
6757 if (rdev->rlc.cp_table_ptr == NULL)
6760 /* write the cp table buffer */
6761 dst_ptr = rdev->rlc.cp_table_ptr;
6762 for (me = 0; me < max_me; me++) {
6764 const __le32 *fw_data;
6765 const struct gfx_firmware_header_v1_0 *hdr;
6768 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6769 fw_data = (const __le32 *)
6770 (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6771 table_offset = le32_to_cpu(hdr->jt_offset);
6772 table_size = le32_to_cpu(hdr->jt_size);
6773 } else if (me == 1) {
6774 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6775 fw_data = (const __le32 *)
6776 (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6777 table_offset = le32_to_cpu(hdr->jt_offset);
6778 table_size = le32_to_cpu(hdr->jt_size);
6779 } else if (me == 2) {
6780 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6781 fw_data = (const __le32 *)
6782 (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6783 table_offset = le32_to_cpu(hdr->jt_offset);
6784 table_size = le32_to_cpu(hdr->jt_size);
6785 } else if (me == 3) {
6786 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6787 fw_data = (const __le32 *)
6788 (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6789 table_offset = le32_to_cpu(hdr->jt_offset);
6790 table_size = le32_to_cpu(hdr->jt_size);
6792 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6793 fw_data = (const __le32 *)
6794 (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6795 table_offset = le32_to_cpu(hdr->jt_offset);
6796 table_size = le32_to_cpu(hdr->jt_size);
6799 for (i = 0; i < table_size; i ++) {
6800 dst_ptr[bo_offset + i] =
6801 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6803 bo_offset += table_size;
6805 const __be32 *fw_data;
6806 table_size = CP_ME_TABLE_SIZE;
6809 fw_data = (const __be32 *)rdev->ce_fw->data;
6810 table_offset = CP_ME_TABLE_OFFSET;
6811 } else if (me == 1) {
6812 fw_data = (const __be32 *)rdev->pfp_fw->data;
6813 table_offset = CP_ME_TABLE_OFFSET;
6814 } else if (me == 2) {
6815 fw_data = (const __be32 *)rdev->me_fw->data;
6816 table_offset = CP_ME_TABLE_OFFSET;
6818 fw_data = (const __be32 *)rdev->mec_fw->data;
6819 table_offset = CP_MEC_TABLE_OFFSET;
6822 for (i = 0; i < table_size; i ++) {
6823 dst_ptr[bo_offset + i] =
6824 cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6826 bo_offset += table_size;
6831 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6836 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6837 orig = data = RREG32(RLC_PG_CNTL);
6838 data |= GFX_PG_ENABLE;
6840 WREG32(RLC_PG_CNTL, data);
6842 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6845 WREG32(RLC_AUTO_PG_CTRL, data);
6847 orig = data = RREG32(RLC_PG_CNTL);
6848 data &= ~GFX_PG_ENABLE;
6850 WREG32(RLC_PG_CNTL, data);
6852 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6853 data &= ~AUTO_PG_EN;
6855 WREG32(RLC_AUTO_PG_CTRL, data);
6857 data = RREG32(DB_RENDER_CONTROL);
6861 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6863 u32 mask = 0, tmp, tmp1;
6866 mutex_lock(&rdev->grbm_idx_mutex);
6867 cik_select_se_sh(rdev, se, sh);
6868 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6869 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6870 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6871 mutex_unlock(&rdev->grbm_idx_mutex);
6878 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6883 return (~tmp) & mask;
6886 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6888 u32 i, j, k, active_cu_number = 0;
6889 u32 mask, counter, cu_bitmap;
6892 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6893 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6897 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6898 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6906 active_cu_number += counter;
6907 tmp |= (cu_bitmap << (i * 16 + j * 8));
6911 WREG32(RLC_PG_AO_CU_MASK, tmp);
6913 tmp = RREG32(RLC_MAX_PG_CU);
6914 tmp &= ~MAX_PU_CU_MASK;
6915 tmp |= MAX_PU_CU(active_cu_number);
6916 WREG32(RLC_MAX_PG_CU, tmp);
6919 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6924 orig = data = RREG32(RLC_PG_CNTL);
6925 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6926 data |= STATIC_PER_CU_PG_ENABLE;
6928 data &= ~STATIC_PER_CU_PG_ENABLE;
6930 WREG32(RLC_PG_CNTL, data);
6933 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6938 orig = data = RREG32(RLC_PG_CNTL);
6939 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6940 data |= DYN_PER_CU_PG_ENABLE;
6942 data &= ~DYN_PER_CU_PG_ENABLE;
6944 WREG32(RLC_PG_CNTL, data);
6947 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6948 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6950 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6955 if (rdev->rlc.cs_data) {
6956 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6957 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6958 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6959 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6961 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6962 for (i = 0; i < 3; i++)
6963 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6965 if (rdev->rlc.reg_list) {
6966 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6967 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6968 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6971 orig = data = RREG32(RLC_PG_CNTL);
6974 WREG32(RLC_PG_CNTL, data);
6976 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6977 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6979 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6980 data &= ~IDLE_POLL_COUNT_MASK;
6981 data |= IDLE_POLL_COUNT(0x60);
6982 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6985 WREG32(RLC_PG_DELAY, data);
6987 data = RREG32(RLC_PG_DELAY_2);
6990 WREG32(RLC_PG_DELAY_2, data);
6992 data = RREG32(RLC_AUTO_PG_CTRL);
6993 data &= ~GRBM_REG_SGIT_MASK;
6994 data |= GRBM_REG_SGIT(0x700);
6995 WREG32(RLC_AUTO_PG_CTRL, data);
6999 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7001 cik_enable_gfx_cgpg(rdev, enable);
7002 cik_enable_gfx_static_mgpg(rdev, enable);
7003 cik_enable_gfx_dynamic_mgpg(rdev, enable);
7006 u32 cik_get_csb_size(struct radeon_device *rdev)
7009 const struct cs_section_def *sect = NULL;
7010 const struct cs_extent_def *ext = NULL;
7012 if (rdev->rlc.cs_data == NULL)
7015 /* begin clear state */
7017 /* context control state */
7020 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7021 for (ext = sect->section; ext->extent != NULL; ++ext) {
7022 if (sect->id == SECT_CONTEXT)
7023 count += 2 + ext->reg_count;
7028 /* pa_sc_raster_config/pa_sc_raster_config1 */
7030 /* end clear state */
7038 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7041 const struct cs_section_def *sect = NULL;
7042 const struct cs_extent_def *ext = NULL;
7044 if (rdev->rlc.cs_data == NULL)
7049 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7050 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7052 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7053 buffer[count++] = cpu_to_le32(0x80000000);
7054 buffer[count++] = cpu_to_le32(0x80000000);
7056 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7057 for (ext = sect->section; ext->extent != NULL; ++ext) {
7058 if (sect->id == SECT_CONTEXT) {
7060 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7061 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7062 for (i = 0; i < ext->reg_count; i++)
7063 buffer[count++] = cpu_to_le32(ext->extent[i]);
7070 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7071 buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7072 switch (rdev->family) {
7074 buffer[count++] = cpu_to_le32(0x16000012);
7075 buffer[count++] = cpu_to_le32(0x00000000);
7078 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7079 buffer[count++] = cpu_to_le32(0x00000000);
7083 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7084 buffer[count++] = cpu_to_le32(0x00000000);
7087 buffer[count++] = cpu_to_le32(0x3a00161a);
7088 buffer[count++] = cpu_to_le32(0x0000002e);
7091 buffer[count++] = cpu_to_le32(0x00000000);
7092 buffer[count++] = cpu_to_le32(0x00000000);
7096 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7097 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7099 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7100 buffer[count++] = cpu_to_le32(0);
7103 static void cik_init_pg(struct radeon_device *rdev)
7105 if (rdev->pg_flags) {
7106 cik_enable_sck_slowdown_on_pu(rdev, true);
7107 cik_enable_sck_slowdown_on_pd(rdev, true);
7108 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7109 cik_init_gfx_cgpg(rdev);
7110 cik_enable_cp_pg(rdev, true);
7111 cik_enable_gds_pg(rdev, true);
7113 cik_init_ao_cu_mask(rdev);
7114 cik_update_gfx_pg(rdev, true);
7118 static void cik_fini_pg(struct radeon_device *rdev)
7120 if (rdev->pg_flags) {
7121 cik_update_gfx_pg(rdev, false);
7122 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7123 cik_enable_cp_pg(rdev, false);
7124 cik_enable_gds_pg(rdev, false);
7131 * Starting with r6xx, interrupts are handled via a ring buffer.
7132 * Ring buffers are areas of GPU accessible memory that the GPU
7133 * writes interrupt vectors into and the host reads vectors out of.
7134 * There is a rptr (read pointer) that determines where the
7135 * host is currently reading, and a wptr (write pointer)
7136 * which determines where the GPU has written. When the
7137 * pointers are equal, the ring is idle. When the GPU
7138 * writes vectors to the ring buffer, it increments the
7139 * wptr. When there is an interrupt, the host then starts
7140 * fetching commands and processing them until the pointers are
7141 * equal again at which point it updates the rptr.
7145 * cik_enable_interrupts - Enable the interrupt ring buffer
7147 * @rdev: radeon_device pointer
7149 * Enable the interrupt ring buffer (CIK).
7151 static void cik_enable_interrupts(struct radeon_device *rdev)
7153 u32 ih_cntl = RREG32(IH_CNTL);
7154 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7156 ih_cntl |= ENABLE_INTR;
7157 ih_rb_cntl |= IH_RB_ENABLE;
7158 WREG32(IH_CNTL, ih_cntl);
7159 WREG32(IH_RB_CNTL, ih_rb_cntl);
7160 rdev->ih.enabled = true;
7164 * cik_disable_interrupts - Disable the interrupt ring buffer
7166 * @rdev: radeon_device pointer
7168 * Disable the interrupt ring buffer (CIK).
7170 static void cik_disable_interrupts(struct radeon_device *rdev)
7172 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7173 u32 ih_cntl = RREG32(IH_CNTL);
7175 ih_rb_cntl &= ~IH_RB_ENABLE;
7176 ih_cntl &= ~ENABLE_INTR;
7177 WREG32(IH_RB_CNTL, ih_rb_cntl);
7178 WREG32(IH_CNTL, ih_cntl);
7179 /* set rptr, wptr to 0 */
7180 WREG32(IH_RB_RPTR, 0);
7181 WREG32(IH_RB_WPTR, 0);
7182 rdev->ih.enabled = false;
7187 * cik_disable_interrupt_state - Disable all interrupt sources
7189 * @rdev: radeon_device pointer
7191 * Clear all interrupt enable bits used by the driver (CIK).
7193 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7198 tmp = RREG32(CP_INT_CNTL_RING0) &
7199 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7200 WREG32(CP_INT_CNTL_RING0, tmp);
7202 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7203 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7204 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7205 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7206 /* compute queues */
7207 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7208 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7209 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7210 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7211 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7212 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7213 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7214 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7216 WREG32(GRBM_INT_CNTL, 0);
7218 WREG32(SRBM_INT_CNTL, 0);
7219 /* vline/vblank, etc. */
7220 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7221 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7222 if (rdev->num_crtc >= 4) {
7223 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7224 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7226 if (rdev->num_crtc >= 6) {
7227 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7228 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7231 if (rdev->num_crtc >= 2) {
7232 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7233 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7235 if (rdev->num_crtc >= 4) {
7236 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7237 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7239 if (rdev->num_crtc >= 6) {
7240 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7241 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7245 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7247 /* digital hotplug */
7248 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7249 WREG32(DC_HPD1_INT_CONTROL, tmp);
7250 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7251 WREG32(DC_HPD2_INT_CONTROL, tmp);
7252 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7253 WREG32(DC_HPD3_INT_CONTROL, tmp);
7254 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7255 WREG32(DC_HPD4_INT_CONTROL, tmp);
7256 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7257 WREG32(DC_HPD5_INT_CONTROL, tmp);
7258 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7259 WREG32(DC_HPD6_INT_CONTROL, tmp);
7264 * cik_irq_init - init and enable the interrupt ring
7266 * @rdev: radeon_device pointer
7268 * Allocate a ring buffer for the interrupt controller,
7269 * enable the RLC, disable interrupts, enable the IH
7270 * ring buffer and enable it (CIK).
7271 * Called at device load and reume.
7272 * Returns 0 for success, errors for failure.
7274 static int cik_irq_init(struct radeon_device *rdev)
7278 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7281 ret = r600_ih_ring_alloc(rdev);
7286 cik_disable_interrupts(rdev);
7289 ret = cik_rlc_resume(rdev);
7291 r600_ih_ring_fini(rdev);
7295 /* setup interrupt control */
7296 /* set dummy read address to dummy page address */
7297 WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
7298 interrupt_cntl = RREG32(INTERRUPT_CNTL);
7299 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7300 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7302 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7303 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7304 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7305 WREG32(INTERRUPT_CNTL, interrupt_cntl);
7307 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7308 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7310 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7311 IH_WPTR_OVERFLOW_CLEAR |
7314 if (rdev->wb.enabled)
7315 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7317 /* set the writeback address whether it's enabled or not */
7318 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7319 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7321 WREG32(IH_RB_CNTL, ih_rb_cntl);
7323 /* set rptr, wptr to 0 */
7324 WREG32(IH_RB_RPTR, 0);
7325 WREG32(IH_RB_WPTR, 0);
7327 /* Default settings for IH_CNTL (disabled at first) */
7328 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7329 /* RPTR_REARM only works if msi's are enabled */
7330 if (rdev->msi_enabled)
7331 ih_cntl |= RPTR_REARM;
7332 WREG32(IH_CNTL, ih_cntl);
7334 /* force the active interrupt state to all disabled */
7335 cik_disable_interrupt_state(rdev);
7337 pci_set_master(rdev->pdev);
7340 cik_enable_interrupts(rdev);
7346 * cik_irq_set - enable/disable interrupt sources
7348 * @rdev: radeon_device pointer
7350 * Enable interrupt sources on the GPU (vblanks, hpd,
7352 * Returns 0 for success, errors for failure.
7354 int cik_irq_set(struct radeon_device *rdev)
7358 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7359 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7360 u32 grbm_int_cntl = 0;
7361 u32 dma_cntl, dma_cntl1;
7363 if (!rdev->irq.installed) {
7364 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7367 /* don't enable anything if the ih is disabled */
7368 if (!rdev->ih.enabled) {
7369 cik_disable_interrupts(rdev);
7370 /* force the active interrupt state to all disabled */
7371 cik_disable_interrupt_state(rdev);
7375 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7376 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7377 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7379 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7380 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7381 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7382 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7383 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7384 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7386 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7387 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7389 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7391 /* enable CP interrupts on all rings */
7392 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7393 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7394 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7396 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7397 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7398 DRM_DEBUG("si_irq_set: sw int cp1\n");
7399 if (ring->me == 1) {
7400 switch (ring->pipe) {
7402 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7405 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7409 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7412 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7413 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7414 DRM_DEBUG("si_irq_set: sw int cp2\n");
7415 if (ring->me == 1) {
7416 switch (ring->pipe) {
7418 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7421 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7425 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7429 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7430 DRM_DEBUG("cik_irq_set: sw int dma\n");
7431 dma_cntl |= TRAP_ENABLE;
7434 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7435 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7436 dma_cntl1 |= TRAP_ENABLE;
7439 if (rdev->irq.crtc_vblank_int[0] ||
7440 atomic_read(&rdev->irq.pflip[0])) {
7441 DRM_DEBUG("cik_irq_set: vblank 0\n");
7442 crtc1 |= VBLANK_INTERRUPT_MASK;
7444 if (rdev->irq.crtc_vblank_int[1] ||
7445 atomic_read(&rdev->irq.pflip[1])) {
7446 DRM_DEBUG("cik_irq_set: vblank 1\n");
7447 crtc2 |= VBLANK_INTERRUPT_MASK;
7449 if (rdev->irq.crtc_vblank_int[2] ||
7450 atomic_read(&rdev->irq.pflip[2])) {
7451 DRM_DEBUG("cik_irq_set: vblank 2\n");
7452 crtc3 |= VBLANK_INTERRUPT_MASK;
7454 if (rdev->irq.crtc_vblank_int[3] ||
7455 atomic_read(&rdev->irq.pflip[3])) {
7456 DRM_DEBUG("cik_irq_set: vblank 3\n");
7457 crtc4 |= VBLANK_INTERRUPT_MASK;
7459 if (rdev->irq.crtc_vblank_int[4] ||
7460 atomic_read(&rdev->irq.pflip[4])) {
7461 DRM_DEBUG("cik_irq_set: vblank 4\n");
7462 crtc5 |= VBLANK_INTERRUPT_MASK;
7464 if (rdev->irq.crtc_vblank_int[5] ||
7465 atomic_read(&rdev->irq.pflip[5])) {
7466 DRM_DEBUG("cik_irq_set: vblank 5\n");
7467 crtc6 |= VBLANK_INTERRUPT_MASK;
7469 if (rdev->irq.hpd[0]) {
7470 DRM_DEBUG("cik_irq_set: hpd 1\n");
7471 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7473 if (rdev->irq.hpd[1]) {
7474 DRM_DEBUG("cik_irq_set: hpd 2\n");
7475 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7477 if (rdev->irq.hpd[2]) {
7478 DRM_DEBUG("cik_irq_set: hpd 3\n");
7479 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7481 if (rdev->irq.hpd[3]) {
7482 DRM_DEBUG("cik_irq_set: hpd 4\n");
7483 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7485 if (rdev->irq.hpd[4]) {
7486 DRM_DEBUG("cik_irq_set: hpd 5\n");
7487 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7489 if (rdev->irq.hpd[5]) {
7490 DRM_DEBUG("cik_irq_set: hpd 6\n");
7491 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7494 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7496 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7497 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7499 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7501 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7503 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7504 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7505 if (rdev->num_crtc >= 4) {
7506 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7507 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7509 if (rdev->num_crtc >= 6) {
7510 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7511 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7514 if (rdev->num_crtc >= 2) {
7515 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7516 GRPH_PFLIP_INT_MASK);
7517 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7518 GRPH_PFLIP_INT_MASK);
7520 if (rdev->num_crtc >= 4) {
7521 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7522 GRPH_PFLIP_INT_MASK);
7523 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7524 GRPH_PFLIP_INT_MASK);
7526 if (rdev->num_crtc >= 6) {
7527 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7528 GRPH_PFLIP_INT_MASK);
7529 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7530 GRPH_PFLIP_INT_MASK);
7533 WREG32(DC_HPD1_INT_CONTROL, hpd1);
7534 WREG32(DC_HPD2_INT_CONTROL, hpd2);
7535 WREG32(DC_HPD3_INT_CONTROL, hpd3);
7536 WREG32(DC_HPD4_INT_CONTROL, hpd4);
7537 WREG32(DC_HPD5_INT_CONTROL, hpd5);
7538 WREG32(DC_HPD6_INT_CONTROL, hpd6);
7541 RREG32(SRBM_STATUS);
7547 * cik_irq_ack - ack interrupt sources
7549 * @rdev: radeon_device pointer
7551 * Ack interrupt sources on the GPU (vblanks, hpd,
7552 * etc.) (CIK). Certain interrupts sources are sw
7553 * generated and do not require an explicit ack.
7555 static inline void cik_irq_ack(struct radeon_device *rdev)
7559 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7560 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7561 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7562 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7563 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7564 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7565 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7567 rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7568 EVERGREEN_CRTC0_REGISTER_OFFSET);
7569 rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7570 EVERGREEN_CRTC1_REGISTER_OFFSET);
7571 if (rdev->num_crtc >= 4) {
7572 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7573 EVERGREEN_CRTC2_REGISTER_OFFSET);
7574 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7575 EVERGREEN_CRTC3_REGISTER_OFFSET);
7577 if (rdev->num_crtc >= 6) {
7578 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7579 EVERGREEN_CRTC4_REGISTER_OFFSET);
7580 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7581 EVERGREEN_CRTC5_REGISTER_OFFSET);
7584 if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7585 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7586 GRPH_PFLIP_INT_CLEAR);
7587 if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7588 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7589 GRPH_PFLIP_INT_CLEAR);
7590 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7591 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7592 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7593 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7594 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7595 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7596 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7597 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7599 if (rdev->num_crtc >= 4) {
7600 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7601 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7602 GRPH_PFLIP_INT_CLEAR);
7603 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7604 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7605 GRPH_PFLIP_INT_CLEAR);
7606 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7607 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7608 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7609 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7610 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7611 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7612 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7613 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7616 if (rdev->num_crtc >= 6) {
7617 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7618 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7619 GRPH_PFLIP_INT_CLEAR);
7620 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7621 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7622 GRPH_PFLIP_INT_CLEAR);
7623 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7624 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7625 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7626 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7627 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7628 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7629 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7630 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7633 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7634 tmp = RREG32(DC_HPD1_INT_CONTROL);
7635 tmp |= DC_HPDx_INT_ACK;
7636 WREG32(DC_HPD1_INT_CONTROL, tmp);
7638 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7639 tmp = RREG32(DC_HPD2_INT_CONTROL);
7640 tmp |= DC_HPDx_INT_ACK;
7641 WREG32(DC_HPD2_INT_CONTROL, tmp);
7643 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7644 tmp = RREG32(DC_HPD3_INT_CONTROL);
7645 tmp |= DC_HPDx_INT_ACK;
7646 WREG32(DC_HPD3_INT_CONTROL, tmp);
7648 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7649 tmp = RREG32(DC_HPD4_INT_CONTROL);
7650 tmp |= DC_HPDx_INT_ACK;
7651 WREG32(DC_HPD4_INT_CONTROL, tmp);
7653 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7654 tmp = RREG32(DC_HPD5_INT_CONTROL);
7655 tmp |= DC_HPDx_INT_ACK;
7656 WREG32(DC_HPD5_INT_CONTROL, tmp);
7658 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7659 tmp = RREG32(DC_HPD6_INT_CONTROL);
7660 tmp |= DC_HPDx_INT_ACK;
7661 WREG32(DC_HPD6_INT_CONTROL, tmp);
7663 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7664 tmp = RREG32(DC_HPD1_INT_CONTROL);
7665 tmp |= DC_HPDx_RX_INT_ACK;
7666 WREG32(DC_HPD1_INT_CONTROL, tmp);
7668 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7669 tmp = RREG32(DC_HPD2_INT_CONTROL);
7670 tmp |= DC_HPDx_RX_INT_ACK;
7671 WREG32(DC_HPD2_INT_CONTROL, tmp);
7673 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7674 tmp = RREG32(DC_HPD3_INT_CONTROL);
7675 tmp |= DC_HPDx_RX_INT_ACK;
7676 WREG32(DC_HPD3_INT_CONTROL, tmp);
7678 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7679 tmp = RREG32(DC_HPD4_INT_CONTROL);
7680 tmp |= DC_HPDx_RX_INT_ACK;
7681 WREG32(DC_HPD4_INT_CONTROL, tmp);
7683 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7684 tmp = RREG32(DC_HPD5_INT_CONTROL);
7685 tmp |= DC_HPDx_RX_INT_ACK;
7686 WREG32(DC_HPD5_INT_CONTROL, tmp);
7688 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7689 tmp = RREG32(DC_HPD6_INT_CONTROL);
7690 tmp |= DC_HPDx_RX_INT_ACK;
7691 WREG32(DC_HPD6_INT_CONTROL, tmp);
7696 * cik_irq_disable - disable interrupts
7698 * @rdev: radeon_device pointer
7700 * Disable interrupts on the hw (CIK).
7702 static void cik_irq_disable(struct radeon_device *rdev)
7704 cik_disable_interrupts(rdev);
7705 /* Wait and acknowledge irq */
7708 cik_disable_interrupt_state(rdev);
7712 * cik_irq_disable - disable interrupts for suspend
7714 * @rdev: radeon_device pointer
7716 * Disable interrupts and stop the RLC (CIK).
7719 static void cik_irq_suspend(struct radeon_device *rdev)
7721 cik_irq_disable(rdev);
7726 * cik_irq_fini - tear down interrupt support
7728 * @rdev: radeon_device pointer
7730 * Disable interrupts on the hw and free the IH ring
7732 * Used for driver unload.
7734 static void cik_irq_fini(struct radeon_device *rdev)
7736 cik_irq_suspend(rdev);
7737 r600_ih_ring_fini(rdev);
7741 * cik_get_ih_wptr - get the IH ring buffer wptr
7743 * @rdev: radeon_device pointer
7745 * Get the IH ring buffer wptr from either the register
7746 * or the writeback memory buffer (CIK). Also check for
7747 * ring buffer overflow and deal with it.
7748 * Used by cik_irq_process().
7749 * Returns the value of the wptr.
7751 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7755 if (rdev->wb.enabled)
7756 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7758 wptr = RREG32(IH_RB_WPTR);
7760 if (wptr & RB_OVERFLOW) {
7761 wptr &= ~RB_OVERFLOW;
7762 /* When a ring buffer overflow happen start parsing interrupt
7763 * from the last not overwritten vector (wptr + 16). Hopefully
7764 * this should allow us to catchup.
7766 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7767 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7768 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7769 tmp = RREG32(IH_RB_CNTL);
7770 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7771 WREG32(IH_RB_CNTL, tmp);
7773 return (wptr & rdev->ih.ptr_mask);
7777 * Each IV ring entry is 128 bits:
7778 * [7:0] - interrupt source id
7780 * [59:32] - interrupt source data
7781 * [63:60] - reserved
7784 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7785 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7786 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7787 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7788 * PIPE_ID - ME0 0=3D
7789 * - ME1&2 compute dispatcher (4 pipes each)
7791 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7792 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7793 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7796 * [127:96] - reserved
7799 * cik_irq_process - interrupt handler
7801 * @rdev: radeon_device pointer
7803 * Interrupt hander (CIK). Walk the IH ring,
7804 * ack interrupts and schedule work to handle
7806 * Returns irq process return code.
7808 int cik_irq_process(struct radeon_device *rdev)
7810 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7811 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7814 u32 src_id, src_data, ring_id;
7815 u8 me_id, pipe_id, queue_id;
7817 bool queue_hotplug = false;
7818 bool queue_dp = false;
7819 bool queue_reset = false;
7820 u32 addr, status, mc_client;
7821 bool queue_thermal = false;
7823 if (!rdev->ih.enabled || rdev->shutdown)
7826 wptr = cik_get_ih_wptr(rdev);
7829 /* is somebody else already processing irqs? */
7830 if (atomic_xchg(&rdev->ih.lock, 1))
7833 rptr = rdev->ih.rptr;
7834 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7836 /* Order reading of wptr vs. reading of IH ring data */
7839 /* display interrupts */
7842 while (rptr != wptr) {
7843 /* wptr/rptr are in bytes! */
7844 ring_index = rptr / 4;
7846 radeon_kfd_interrupt(rdev,
7847 (const void *) &rdev->ih.ring[ring_index]);
7849 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7850 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7851 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7854 case 1: /* D1 vblank/vline */
7856 case 0: /* D1 vblank */
7857 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7858 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7860 if (rdev->irq.crtc_vblank_int[0]) {
7861 drm_handle_vblank(rdev->ddev, 0);
7862 rdev->pm.vblank_sync = true;
7863 wake_up(&rdev->irq.vblank_queue);
7865 if (atomic_read(&rdev->irq.pflip[0]))
7866 radeon_crtc_handle_vblank(rdev, 0);
7867 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7868 DRM_DEBUG("IH: D1 vblank\n");
7871 case 1: /* D1 vline */
7872 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7873 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7875 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7876 DRM_DEBUG("IH: D1 vline\n");
7880 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7884 case 2: /* D2 vblank/vline */
7886 case 0: /* D2 vblank */
7887 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7888 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7890 if (rdev->irq.crtc_vblank_int[1]) {
7891 drm_handle_vblank(rdev->ddev, 1);
7892 rdev->pm.vblank_sync = true;
7893 wake_up(&rdev->irq.vblank_queue);
7895 if (atomic_read(&rdev->irq.pflip[1]))
7896 radeon_crtc_handle_vblank(rdev, 1);
7897 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7898 DRM_DEBUG("IH: D2 vblank\n");
7901 case 1: /* D2 vline */
7902 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7903 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7905 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7906 DRM_DEBUG("IH: D2 vline\n");
7910 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7914 case 3: /* D3 vblank/vline */
7916 case 0: /* D3 vblank */
7917 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7918 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7920 if (rdev->irq.crtc_vblank_int[2]) {
7921 drm_handle_vblank(rdev->ddev, 2);
7922 rdev->pm.vblank_sync = true;
7923 wake_up(&rdev->irq.vblank_queue);
7925 if (atomic_read(&rdev->irq.pflip[2]))
7926 radeon_crtc_handle_vblank(rdev, 2);
7927 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7928 DRM_DEBUG("IH: D3 vblank\n");
7931 case 1: /* D3 vline */
7932 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7933 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7935 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7936 DRM_DEBUG("IH: D3 vline\n");
7940 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7944 case 4: /* D4 vblank/vline */
7946 case 0: /* D4 vblank */
7947 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7948 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7950 if (rdev->irq.crtc_vblank_int[3]) {
7951 drm_handle_vblank(rdev->ddev, 3);
7952 rdev->pm.vblank_sync = true;
7953 wake_up(&rdev->irq.vblank_queue);
7955 if (atomic_read(&rdev->irq.pflip[3]))
7956 radeon_crtc_handle_vblank(rdev, 3);
7957 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7958 DRM_DEBUG("IH: D4 vblank\n");
7961 case 1: /* D4 vline */
7962 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7963 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7965 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7966 DRM_DEBUG("IH: D4 vline\n");
7970 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7974 case 5: /* D5 vblank/vline */
7976 case 0: /* D5 vblank */
7977 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7978 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7980 if (rdev->irq.crtc_vblank_int[4]) {
7981 drm_handle_vblank(rdev->ddev, 4);
7982 rdev->pm.vblank_sync = true;
7983 wake_up(&rdev->irq.vblank_queue);
7985 if (atomic_read(&rdev->irq.pflip[4]))
7986 radeon_crtc_handle_vblank(rdev, 4);
7987 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7988 DRM_DEBUG("IH: D5 vblank\n");
7991 case 1: /* D5 vline */
7992 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7993 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7995 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7996 DRM_DEBUG("IH: D5 vline\n");
8000 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8004 case 6: /* D6 vblank/vline */
8006 case 0: /* D6 vblank */
8007 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
8008 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8010 if (rdev->irq.crtc_vblank_int[5]) {
8011 drm_handle_vblank(rdev->ddev, 5);
8012 rdev->pm.vblank_sync = true;
8013 wake_up(&rdev->irq.vblank_queue);
8015 if (atomic_read(&rdev->irq.pflip[5]))
8016 radeon_crtc_handle_vblank(rdev, 5);
8017 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8018 DRM_DEBUG("IH: D6 vblank\n");
8021 case 1: /* D6 vline */
8022 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
8023 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8025 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8026 DRM_DEBUG("IH: D6 vline\n");
8030 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8034 case 8: /* D1 page flip */
8035 case 10: /* D2 page flip */
8036 case 12: /* D3 page flip */
8037 case 14: /* D4 page flip */
8038 case 16: /* D5 page flip */
8039 case 18: /* D6 page flip */
8040 DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8041 if (radeon_use_pflipirq > 0)
8042 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8044 case 42: /* HPD hotplug */
8047 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
8048 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8050 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8051 queue_hotplug = true;
8052 DRM_DEBUG("IH: HPD1\n");
8056 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
8057 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8059 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8060 queue_hotplug = true;
8061 DRM_DEBUG("IH: HPD2\n");
8065 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
8066 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8068 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8069 queue_hotplug = true;
8070 DRM_DEBUG("IH: HPD3\n");
8074 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
8075 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8077 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8078 queue_hotplug = true;
8079 DRM_DEBUG("IH: HPD4\n");
8083 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
8084 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8086 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8087 queue_hotplug = true;
8088 DRM_DEBUG("IH: HPD5\n");
8092 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
8093 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8095 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8096 queue_hotplug = true;
8097 DRM_DEBUG("IH: HPD6\n");
8101 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
8102 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8104 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8106 DRM_DEBUG("IH: HPD_RX 1\n");
8110 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
8111 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8113 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8115 DRM_DEBUG("IH: HPD_RX 2\n");
8119 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
8120 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8122 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8124 DRM_DEBUG("IH: HPD_RX 3\n");
8128 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
8129 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8131 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8133 DRM_DEBUG("IH: HPD_RX 4\n");
8137 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
8138 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8140 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8142 DRM_DEBUG("IH: HPD_RX 5\n");
8146 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
8147 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8149 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8151 DRM_DEBUG("IH: HPD_RX 6\n");
8155 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8160 DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8161 WREG32(SRBM_INT_ACK, 0x1);
8164 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8165 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8169 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8170 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8171 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8172 /* reset addr and status */
8173 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8174 if (addr == 0x0 && status == 0x0)
8176 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8177 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
8179 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8181 cik_vm_decode_fault(rdev, status, addr, mc_client);
8184 DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8187 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8190 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8193 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8197 case 176: /* GFX RB CP_INT */
8198 case 177: /* GFX IB CP_INT */
8199 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8201 case 181: /* CP EOP event */
8202 DRM_DEBUG("IH: CP EOP\n");
8203 /* XXX check the bitfield order! */
8204 me_id = (ring_id & 0x60) >> 5;
8205 pipe_id = (ring_id & 0x18) >> 3;
8206 queue_id = (ring_id & 0x7) >> 0;
8209 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8213 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8214 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8215 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8216 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8220 case 184: /* CP Privileged reg access */
8221 DRM_ERROR("Illegal register access in command stream\n");
8222 /* XXX check the bitfield order! */
8223 me_id = (ring_id & 0x60) >> 5;
8224 pipe_id = (ring_id & 0x18) >> 3;
8225 queue_id = (ring_id & 0x7) >> 0;
8228 /* This results in a full GPU reset, but all we need to do is soft
8229 * reset the CP for gfx
8243 case 185: /* CP Privileged inst */
8244 DRM_ERROR("Illegal instruction in command stream\n");
8245 /* XXX check the bitfield order! */
8246 me_id = (ring_id & 0x60) >> 5;
8247 pipe_id = (ring_id & 0x18) >> 3;
8248 queue_id = (ring_id & 0x7) >> 0;
8251 /* This results in a full GPU reset, but all we need to do is soft
8252 * reset the CP for gfx
8266 case 224: /* SDMA trap event */
8267 /* XXX check the bitfield order! */
8268 me_id = (ring_id & 0x3) >> 0;
8269 queue_id = (ring_id & 0xc) >> 2;
8270 DRM_DEBUG("IH: SDMA trap\n");
8275 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8288 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8300 case 230: /* thermal low to high */
8301 DRM_DEBUG("IH: thermal low to high\n");
8302 rdev->pm.dpm.thermal.high_to_low = false;
8303 queue_thermal = true;
8305 case 231: /* thermal high to low */
8306 DRM_DEBUG("IH: thermal high to low\n");
8307 rdev->pm.dpm.thermal.high_to_low = true;
8308 queue_thermal = true;
8310 case 233: /* GUI IDLE */
8311 DRM_DEBUG("IH: GUI idle\n");
8313 case 241: /* SDMA Privileged inst */
8314 case 247: /* SDMA Privileged inst */
8315 DRM_ERROR("Illegal instruction in SDMA command stream\n");
8316 /* XXX check the bitfield order! */
8317 me_id = (ring_id & 0x3) >> 0;
8318 queue_id = (ring_id & 0xc) >> 2;
8353 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8357 /* wptr/rptr are in bytes! */
8359 rptr &= rdev->ih.ptr_mask;
8360 WREG32(IH_RB_RPTR, rptr);
8363 schedule_work(&rdev->dp_work);
8365 schedule_delayed_work(&rdev->hotplug_work, 0);
8367 rdev->needs_reset = true;
8368 wake_up_all(&rdev->fence_queue);
8371 schedule_work(&rdev->pm.dpm.thermal.work);
8372 rdev->ih.rptr = rptr;
8373 atomic_set(&rdev->ih.lock, 0);
8375 /* make sure wptr hasn't changed while processing */
8376 wptr = cik_get_ih_wptr(rdev);
8384 * startup/shutdown callbacks
8387 * cik_startup - program the asic to a functional state
8389 * @rdev: radeon_device pointer
8391 * Programs the asic to a functional state (CIK).
8392 * Called by cik_init() and cik_resume().
8393 * Returns 0 for success, error for failure.
8395 static int cik_startup(struct radeon_device *rdev)
8397 struct radeon_ring *ring;
8401 /* enable pcie gen2/3 link */
8402 cik_pcie_gen3_enable(rdev);
8404 cik_program_aspm(rdev);
8406 /* scratch needs to be initialized before MC */
8407 r = r600_vram_scratch_init(rdev);
8411 cik_mc_program(rdev);
8413 if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8414 r = ci_mc_load_microcode(rdev);
8416 DRM_ERROR("Failed to load MC firmware!\n");
8421 r = cik_pcie_gart_enable(rdev);
8426 /* allocate rlc buffers */
8427 if (rdev->flags & RADEON_IS_IGP) {
8428 if (rdev->family == CHIP_KAVERI) {
8429 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8430 rdev->rlc.reg_list_size =
8431 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8433 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8434 rdev->rlc.reg_list_size =
8435 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8438 rdev->rlc.cs_data = ci_cs_data;
8439 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8440 r = sumo_rlc_init(rdev);
8442 DRM_ERROR("Failed to init rlc BOs!\n");
8446 /* allocate wb buffer */
8447 r = radeon_wb_init(rdev);
8451 /* allocate mec buffers */
8452 r = cik_mec_init(rdev);
8454 DRM_ERROR("Failed to init MEC BOs!\n");
8458 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8460 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8464 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8466 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8470 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8472 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8476 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8478 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8482 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8484 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8488 r = radeon_uvd_resume(rdev);
8490 r = uvd_v4_2_resume(rdev);
8492 r = radeon_fence_driver_start_ring(rdev,
8493 R600_RING_TYPE_UVD_INDEX);
8495 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8499 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8501 r = radeon_vce_resume(rdev);
8503 r = vce_v2_0_resume(rdev);
8505 r = radeon_fence_driver_start_ring(rdev,
8506 TN_RING_TYPE_VCE1_INDEX);
8508 r = radeon_fence_driver_start_ring(rdev,
8509 TN_RING_TYPE_VCE2_INDEX);
8512 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8513 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8514 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8518 if (!rdev->irq.installed) {
8519 r = radeon_irq_kms_init(rdev);
8524 r = cik_irq_init(rdev);
8526 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8527 radeon_irq_kms_fini(rdev);
8532 if (rdev->family == CHIP_HAWAII) {
8534 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8536 nop = RADEON_CP_PACKET2;
8538 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8541 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8542 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8547 /* set up the compute queues */
8548 /* type-2 packets are deprecated on MEC, use type-3 instead */
8549 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8550 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8554 ring->me = 1; /* first MEC */
8555 ring->pipe = 0; /* first pipe */
8556 ring->queue = 0; /* first queue */
8557 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8559 /* type-2 packets are deprecated on MEC, use type-3 instead */
8560 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8561 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8565 /* dGPU only have 1 MEC */
8566 ring->me = 1; /* first MEC */
8567 ring->pipe = 0; /* first pipe */
8568 ring->queue = 1; /* second queue */
8569 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8571 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8572 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8573 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8577 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8578 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8579 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8583 r = cik_cp_resume(rdev);
8587 r = cik_sdma_resume(rdev);
8591 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8592 if (ring->ring_size) {
8593 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8596 r = uvd_v1_0_init(rdev);
8598 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8603 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8604 if (ring->ring_size)
8605 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8608 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8609 if (ring->ring_size)
8610 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8614 r = vce_v1_0_init(rdev);
8615 else if (r != -ENOENT)
8616 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8618 r = radeon_ib_pool_init(rdev);
8620 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8624 r = radeon_vm_manager_init(rdev);
8626 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8630 r = radeon_audio_init(rdev);
8634 r = radeon_kfd_resume(rdev);
8642 * cik_resume - resume the asic to a functional state
8644 * @rdev: radeon_device pointer
8646 * Programs the asic to a functional state (CIK).
8648 * Returns 0 for success, error for failure.
8650 int cik_resume(struct radeon_device *rdev)
8655 atom_asic_init(rdev->mode_info.atom_context);
8657 /* init golden registers */
8658 cik_init_golden_registers(rdev);
8660 if (rdev->pm.pm_method == PM_METHOD_DPM)
8661 radeon_pm_resume(rdev);
8663 rdev->accel_working = true;
8664 r = cik_startup(rdev);
8666 DRM_ERROR("cik startup failed on resume\n");
8667 rdev->accel_working = false;
8676 * cik_suspend - suspend the asic
8678 * @rdev: radeon_device pointer
8680 * Bring the chip into a state suitable for suspend (CIK).
8681 * Called at suspend.
8682 * Returns 0 for success.
8684 int cik_suspend(struct radeon_device *rdev)
8686 radeon_kfd_suspend(rdev);
8687 radeon_pm_suspend(rdev);
8688 radeon_audio_fini(rdev);
8689 radeon_vm_manager_fini(rdev);
8690 cik_cp_enable(rdev, false);
8691 cik_sdma_enable(rdev, false);
8692 uvd_v1_0_fini(rdev);
8693 radeon_uvd_suspend(rdev);
8694 radeon_vce_suspend(rdev);
8697 cik_irq_suspend(rdev);
8698 radeon_wb_disable(rdev);
8699 cik_pcie_gart_disable(rdev);
8703 /* Plan is to move initialization in that function and use
8704 * helper function so that radeon_device_init pretty much
8705 * do nothing more than calling asic specific function. This
8706 * should also allow to remove a bunch of callback function
8710 * cik_init - asic specific driver and hw init
8712 * @rdev: radeon_device pointer
8714 * Setup asic specific driver variables and program the hw
8715 * to a functional state (CIK).
8716 * Called at driver startup.
8717 * Returns 0 for success, errors for failure.
8719 int cik_init(struct radeon_device *rdev)
8721 struct radeon_ring *ring;
8725 if (!radeon_get_bios(rdev)) {
8726 if (ASIC_IS_AVIVO(rdev))
8729 /* Must be an ATOMBIOS */
8730 if (!rdev->is_atom_bios) {
8731 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8734 r = radeon_atombios_init(rdev);
8738 /* Post card if necessary */
8739 if (!radeon_card_posted(rdev)) {
8741 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8744 DRM_INFO("GPU not posted. posting now...\n");
8745 atom_asic_init(rdev->mode_info.atom_context);
8747 /* init golden registers */
8748 cik_init_golden_registers(rdev);
8749 /* Initialize scratch registers */
8750 cik_scratch_init(rdev);
8751 /* Initialize surface registers */
8752 radeon_surface_init(rdev);
8753 /* Initialize clocks */
8754 radeon_get_clock_info(rdev->ddev);
8757 r = radeon_fence_driver_init(rdev);
8761 /* initialize memory controller */
8762 r = cik_mc_init(rdev);
8765 /* Memory manager */
8766 r = radeon_bo_init(rdev);
8770 if (rdev->flags & RADEON_IS_IGP) {
8771 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8772 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8773 r = cik_init_microcode(rdev);
8775 DRM_ERROR("Failed to load firmware!\n");
8780 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8781 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8783 r = cik_init_microcode(rdev);
8785 DRM_ERROR("Failed to load firmware!\n");
8791 /* Initialize power management */
8792 radeon_pm_init(rdev);
8794 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8795 ring->ring_obj = NULL;
8796 r600_ring_init(rdev, ring, 1024 * 1024);
8798 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8799 ring->ring_obj = NULL;
8800 r600_ring_init(rdev, ring, 1024 * 1024);
8801 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8805 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8806 ring->ring_obj = NULL;
8807 r600_ring_init(rdev, ring, 1024 * 1024);
8808 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8812 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8813 ring->ring_obj = NULL;
8814 r600_ring_init(rdev, ring, 256 * 1024);
8816 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8817 ring->ring_obj = NULL;
8818 r600_ring_init(rdev, ring, 256 * 1024);
8820 r = radeon_uvd_init(rdev);
8822 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8823 ring->ring_obj = NULL;
8824 r600_ring_init(rdev, ring, 4096);
8827 r = radeon_vce_init(rdev);
8829 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8830 ring->ring_obj = NULL;
8831 r600_ring_init(rdev, ring, 4096);
8833 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8834 ring->ring_obj = NULL;
8835 r600_ring_init(rdev, ring, 4096);
8838 rdev->ih.ring_obj = NULL;
8839 r600_ih_ring_init(rdev, 64 * 1024);
8841 r = r600_pcie_gart_init(rdev);
8845 rdev->accel_working = true;
8846 r = cik_startup(rdev);
8848 dev_err(rdev->dev, "disabling GPU acceleration\n");
8850 cik_sdma_fini(rdev);
8852 sumo_rlc_fini(rdev);
8854 radeon_wb_fini(rdev);
8855 radeon_ib_pool_fini(rdev);
8856 radeon_vm_manager_fini(rdev);
8857 radeon_irq_kms_fini(rdev);
8858 cik_pcie_gart_fini(rdev);
8859 rdev->accel_working = false;
8862 /* Don't start up if the MC ucode is missing.
8863 * The default clocks and voltages before the MC ucode
8864 * is loaded are not suffient for advanced operations.
8866 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8867 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8875 * cik_fini - asic specific driver and hw fini
8877 * @rdev: radeon_device pointer
8879 * Tear down the asic specific driver variables and program the hw
8880 * to an idle state (CIK).
8881 * Called at driver unload.
8883 void cik_fini(struct radeon_device *rdev)
8885 radeon_pm_fini(rdev);
8887 cik_sdma_fini(rdev);
8891 sumo_rlc_fini(rdev);
8893 radeon_wb_fini(rdev);
8894 radeon_vm_manager_fini(rdev);
8895 radeon_ib_pool_fini(rdev);
8896 radeon_irq_kms_fini(rdev);
8897 uvd_v1_0_fini(rdev);
8898 radeon_uvd_fini(rdev);
8899 radeon_vce_fini(rdev);
8900 cik_pcie_gart_fini(rdev);
8901 r600_vram_scratch_fini(rdev);
8902 radeon_gem_fini(rdev);
8903 radeon_fence_driver_fini(rdev);
8904 radeon_bo_fini(rdev);
8905 radeon_atombios_fini(rdev);
8910 void dce8_program_fmt(struct drm_encoder *encoder)
8912 struct drm_device *dev = encoder->dev;
8913 struct radeon_device *rdev = dev->dev_private;
8914 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8915 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8916 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8919 enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8922 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8923 bpc = radeon_get_monitor_bpc(connector);
8924 dither = radeon_connector->dither;
8927 /* LVDS/eDP FMT is set up by atom */
8928 if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8931 /* not needed for analog */
8932 if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8933 (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8941 if (dither == RADEON_FMT_DITHER_ENABLE)
8942 /* XXX sort out optimal dither settings */
8943 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8944 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8946 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8949 if (dither == RADEON_FMT_DITHER_ENABLE)
8950 /* XXX sort out optimal dither settings */
8951 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8952 FMT_RGB_RANDOM_ENABLE |
8953 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8955 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8958 if (dither == RADEON_FMT_DITHER_ENABLE)
8959 /* XXX sort out optimal dither settings */
8960 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8961 FMT_RGB_RANDOM_ENABLE |
8962 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8964 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8971 WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8974 /* display watermark setup */
8976 * dce8_line_buffer_adjust - Set up the line buffer
8978 * @rdev: radeon_device pointer
8979 * @radeon_crtc: the selected display controller
8980 * @mode: the current display mode on the selected display
8983 * Setup up the line buffer allocation for
8984 * the selected display controller (CIK).
8985 * Returns the line buffer size in pixels.
8987 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8988 struct radeon_crtc *radeon_crtc,
8989 struct drm_display_mode *mode)
8991 u32 tmp, buffer_alloc, i;
8992 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8995 * There are 6 line buffers, one for each display controllers.
8996 * There are 3 partitions per LB. Select the number of partitions
8997 * to enable based on the display width. For display widths larger
8998 * than 4096, you need use to use 2 display controllers and combine
8999 * them using the stereo blender.
9001 if (radeon_crtc->base.enabled && mode) {
9002 if (mode->crtc_hdisplay < 1920) {
9005 } else if (mode->crtc_hdisplay < 2560) {
9008 } else if (mode->crtc_hdisplay < 4096) {
9010 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9012 DRM_DEBUG_KMS("Mode too big for LB!\n");
9014 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9021 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9022 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9024 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9025 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9026 for (i = 0; i < rdev->usec_timeout; i++) {
9027 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9028 DMIF_BUFFERS_ALLOCATED_COMPLETED)
9033 if (radeon_crtc->base.enabled && mode) {
9045 /* controller not enabled, so no lb used */
9050 * cik_get_number_of_dram_channels - get the number of dram channels
9052 * @rdev: radeon_device pointer
9054 * Look up the number of video ram channels (CIK).
9055 * Used for display watermark bandwidth calculations
9056 * Returns the number of dram channels
9058 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9060 u32 tmp = RREG32(MC_SHARED_CHMAP);
9062 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9085 struct dce8_wm_params {
9086 u32 dram_channels; /* number of dram channels */
9087 u32 yclk; /* bandwidth per dram data pin in kHz */
9088 u32 sclk; /* engine clock in kHz */
9089 u32 disp_clk; /* display clock in kHz */
9090 u32 src_width; /* viewport width */
9091 u32 active_time; /* active display time in ns */
9092 u32 blank_time; /* blank time in ns */
9093 bool interlaced; /* mode is interlaced */
9094 fixed20_12 vsc; /* vertical scale ratio */
9095 u32 num_heads; /* number of active crtcs */
9096 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9097 u32 lb_size; /* line buffer allocated to pipe */
9098 u32 vtaps; /* vertical scaler taps */
9102 * dce8_dram_bandwidth - get the dram bandwidth
9104 * @wm: watermark calculation data
9106 * Calculate the raw dram bandwidth (CIK).
9107 * Used for display watermark bandwidth calculations
9108 * Returns the dram bandwidth in MBytes/s
9110 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9112 /* Calculate raw DRAM Bandwidth */
9113 fixed20_12 dram_efficiency; /* 0.7 */
9114 fixed20_12 yclk, dram_channels, bandwidth;
9117 a.full = dfixed_const(1000);
9118 yclk.full = dfixed_const(wm->yclk);
9119 yclk.full = dfixed_div(yclk, a);
9120 dram_channels.full = dfixed_const(wm->dram_channels * 4);
9121 a.full = dfixed_const(10);
9122 dram_efficiency.full = dfixed_const(7);
9123 dram_efficiency.full = dfixed_div(dram_efficiency, a);
9124 bandwidth.full = dfixed_mul(dram_channels, yclk);
9125 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9127 return dfixed_trunc(bandwidth);
9131 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9133 * @wm: watermark calculation data
9135 * Calculate the dram bandwidth used for display (CIK).
9136 * Used for display watermark bandwidth calculations
9137 * Returns the dram bandwidth for display in MBytes/s
9139 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9141 /* Calculate DRAM Bandwidth and the part allocated to display. */
9142 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9143 fixed20_12 yclk, dram_channels, bandwidth;
9146 a.full = dfixed_const(1000);
9147 yclk.full = dfixed_const(wm->yclk);
9148 yclk.full = dfixed_div(yclk, a);
9149 dram_channels.full = dfixed_const(wm->dram_channels * 4);
9150 a.full = dfixed_const(10);
9151 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9152 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9153 bandwidth.full = dfixed_mul(dram_channels, yclk);
9154 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9156 return dfixed_trunc(bandwidth);
9160 * dce8_data_return_bandwidth - get the data return bandwidth
9162 * @wm: watermark calculation data
9164 * Calculate the data return bandwidth used for display (CIK).
9165 * Used for display watermark bandwidth calculations
9166 * Returns the data return bandwidth in MBytes/s
9168 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9170 /* Calculate the display Data return Bandwidth */
9171 fixed20_12 return_efficiency; /* 0.8 */
9172 fixed20_12 sclk, bandwidth;
9175 a.full = dfixed_const(1000);
9176 sclk.full = dfixed_const(wm->sclk);
9177 sclk.full = dfixed_div(sclk, a);
9178 a.full = dfixed_const(10);
9179 return_efficiency.full = dfixed_const(8);
9180 return_efficiency.full = dfixed_div(return_efficiency, a);
9181 a.full = dfixed_const(32);
9182 bandwidth.full = dfixed_mul(a, sclk);
9183 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9185 return dfixed_trunc(bandwidth);
9189 * dce8_dmif_request_bandwidth - get the dmif bandwidth
9191 * @wm: watermark calculation data
9193 * Calculate the dmif bandwidth used for display (CIK).
9194 * Used for display watermark bandwidth calculations
9195 * Returns the dmif bandwidth in MBytes/s
9197 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9199 /* Calculate the DMIF Request Bandwidth */
9200 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9201 fixed20_12 disp_clk, bandwidth;
9204 a.full = dfixed_const(1000);
9205 disp_clk.full = dfixed_const(wm->disp_clk);
9206 disp_clk.full = dfixed_div(disp_clk, a);
9207 a.full = dfixed_const(32);
9208 b.full = dfixed_mul(a, disp_clk);
9210 a.full = dfixed_const(10);
9211 disp_clk_request_efficiency.full = dfixed_const(8);
9212 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9214 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9216 return dfixed_trunc(bandwidth);
9220 * dce8_available_bandwidth - get the min available bandwidth
9222 * @wm: watermark calculation data
9224 * Calculate the min available bandwidth used for display (CIK).
9225 * Used for display watermark bandwidth calculations
9226 * Returns the min available bandwidth in MBytes/s
9228 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9230 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9231 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9232 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9233 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9235 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9239 * dce8_average_bandwidth - get the average available bandwidth
9241 * @wm: watermark calculation data
9243 * Calculate the average available bandwidth used for display (CIK).
9244 * Used for display watermark bandwidth calculations
9245 * Returns the average available bandwidth in MBytes/s
9247 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9249 /* Calculate the display mode Average Bandwidth
9250 * DisplayMode should contain the source and destination dimensions,
9254 fixed20_12 line_time;
9255 fixed20_12 src_width;
9256 fixed20_12 bandwidth;
9259 a.full = dfixed_const(1000);
9260 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9261 line_time.full = dfixed_div(line_time, a);
9262 bpp.full = dfixed_const(wm->bytes_per_pixel);
9263 src_width.full = dfixed_const(wm->src_width);
9264 bandwidth.full = dfixed_mul(src_width, bpp);
9265 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9266 bandwidth.full = dfixed_div(bandwidth, line_time);
9268 return dfixed_trunc(bandwidth);
9272 * dce8_latency_watermark - get the latency watermark
9274 * @wm: watermark calculation data
9276 * Calculate the latency watermark (CIK).
9277 * Used for display watermark bandwidth calculations
9278 * Returns the latency watermark in ns
9280 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9282 /* First calculate the latency in ns */
9283 u32 mc_latency = 2000; /* 2000 ns. */
9284 u32 available_bandwidth = dce8_available_bandwidth(wm);
9285 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9286 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9287 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9288 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9289 (wm->num_heads * cursor_line_pair_return_time);
9290 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9291 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9292 u32 tmp, dmif_size = 12288;
9295 if (wm->num_heads == 0)
9298 a.full = dfixed_const(2);
9299 b.full = dfixed_const(1);
9300 if ((wm->vsc.full > a.full) ||
9301 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9303 ((wm->vsc.full >= a.full) && wm->interlaced))
9304 max_src_lines_per_dst_line = 4;
9306 max_src_lines_per_dst_line = 2;
9308 a.full = dfixed_const(available_bandwidth);
9309 b.full = dfixed_const(wm->num_heads);
9310 a.full = dfixed_div(a, b);
9312 b.full = dfixed_const(mc_latency + 512);
9313 c.full = dfixed_const(wm->disp_clk);
9314 b.full = dfixed_div(b, c);
9316 c.full = dfixed_const(dmif_size);
9317 b.full = dfixed_div(c, b);
9319 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9321 b.full = dfixed_const(1000);
9322 c.full = dfixed_const(wm->disp_clk);
9323 b.full = dfixed_div(c, b);
9324 c.full = dfixed_const(wm->bytes_per_pixel);
9325 b.full = dfixed_mul(b, c);
9327 lb_fill_bw = min(tmp, dfixed_trunc(b));
9329 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9330 b.full = dfixed_const(1000);
9331 c.full = dfixed_const(lb_fill_bw);
9332 b.full = dfixed_div(c, b);
9333 a.full = dfixed_div(a, b);
9334 line_fill_time = dfixed_trunc(a);
9336 if (line_fill_time < wm->active_time)
9339 return latency + (line_fill_time - wm->active_time);
9344 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9345 * average and available dram bandwidth
9347 * @wm: watermark calculation data
9349 * Check if the display average bandwidth fits in the display
9350 * dram bandwidth (CIK).
9351 * Used for display watermark bandwidth calculations
9352 * Returns true if the display fits, false if not.
9354 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9356 if (dce8_average_bandwidth(wm) <=
9357 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9364 * dce8_average_bandwidth_vs_available_bandwidth - check
9365 * average and available bandwidth
9367 * @wm: watermark calculation data
9369 * Check if the display average bandwidth fits in the display
9370 * available bandwidth (CIK).
9371 * Used for display watermark bandwidth calculations
9372 * Returns true if the display fits, false if not.
9374 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9376 if (dce8_average_bandwidth(wm) <=
9377 (dce8_available_bandwidth(wm) / wm->num_heads))
9384 * dce8_check_latency_hiding - check latency hiding
9386 * @wm: watermark calculation data
9388 * Check latency hiding (CIK).
9389 * Used for display watermark bandwidth calculations
9390 * Returns true if the display fits, false if not.
9392 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9394 u32 lb_partitions = wm->lb_size / wm->src_width;
9395 u32 line_time = wm->active_time + wm->blank_time;
9396 u32 latency_tolerant_lines;
9400 a.full = dfixed_const(1);
9401 if (wm->vsc.full > a.full)
9402 latency_tolerant_lines = 1;
9404 if (lb_partitions <= (wm->vtaps + 1))
9405 latency_tolerant_lines = 1;
9407 latency_tolerant_lines = 2;
9410 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9412 if (dce8_latency_watermark(wm) <= latency_hiding)
9419 * dce8_program_watermarks - program display watermarks
9421 * @rdev: radeon_device pointer
9422 * @radeon_crtc: the selected display controller
9423 * @lb_size: line buffer size
9424 * @num_heads: number of display controllers in use
9426 * Calculate and program the display watermarks for the
9427 * selected display controller (CIK).
9429 static void dce8_program_watermarks(struct radeon_device *rdev,
9430 struct radeon_crtc *radeon_crtc,
9431 u32 lb_size, u32 num_heads)
9433 struct drm_display_mode *mode = &radeon_crtc->base.mode;
9434 struct dce8_wm_params wm_low, wm_high;
9437 u32 latency_watermark_a = 0, latency_watermark_b = 0;
9440 if (radeon_crtc->base.enabled && num_heads && mode) {
9441 pixel_period = 1000000 / (u32)mode->clock;
9442 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9444 /* watermark for high clocks */
9445 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9446 rdev->pm.dpm_enabled) {
9448 radeon_dpm_get_mclk(rdev, false) * 10;
9450 radeon_dpm_get_sclk(rdev, false) * 10;
9452 wm_high.yclk = rdev->pm.current_mclk * 10;
9453 wm_high.sclk = rdev->pm.current_sclk * 10;
9456 wm_high.disp_clk = mode->clock;
9457 wm_high.src_width = mode->crtc_hdisplay;
9458 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9459 wm_high.blank_time = line_time - wm_high.active_time;
9460 wm_high.interlaced = false;
9461 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9462 wm_high.interlaced = true;
9463 wm_high.vsc = radeon_crtc->vsc;
9465 if (radeon_crtc->rmx_type != RMX_OFF)
9467 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9468 wm_high.lb_size = lb_size;
9469 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9470 wm_high.num_heads = num_heads;
9472 /* set for high clocks */
9473 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9475 /* possibly force display priority to high */
9476 /* should really do this at mode validation time... */
9477 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9478 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9479 !dce8_check_latency_hiding(&wm_high) ||
9480 (rdev->disp_priority == 2)) {
9481 DRM_DEBUG_KMS("force priority to high\n");
9484 /* watermark for low clocks */
9485 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9486 rdev->pm.dpm_enabled) {
9488 radeon_dpm_get_mclk(rdev, true) * 10;
9490 radeon_dpm_get_sclk(rdev, true) * 10;
9492 wm_low.yclk = rdev->pm.current_mclk * 10;
9493 wm_low.sclk = rdev->pm.current_sclk * 10;
9496 wm_low.disp_clk = mode->clock;
9497 wm_low.src_width = mode->crtc_hdisplay;
9498 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9499 wm_low.blank_time = line_time - wm_low.active_time;
9500 wm_low.interlaced = false;
9501 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9502 wm_low.interlaced = true;
9503 wm_low.vsc = radeon_crtc->vsc;
9505 if (radeon_crtc->rmx_type != RMX_OFF)
9507 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9508 wm_low.lb_size = lb_size;
9509 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9510 wm_low.num_heads = num_heads;
9512 /* set for low clocks */
9513 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9515 /* possibly force display priority to high */
9516 /* should really do this at mode validation time... */
9517 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9518 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9519 !dce8_check_latency_hiding(&wm_low) ||
9520 (rdev->disp_priority == 2)) {
9521 DRM_DEBUG_KMS("force priority to high\n");
9524 /* Save number of lines the linebuffer leads before the scanout */
9525 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9529 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9531 tmp &= ~LATENCY_WATERMARK_MASK(3);
9532 tmp |= LATENCY_WATERMARK_MASK(1);
9533 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9534 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9535 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9536 LATENCY_HIGH_WATERMARK(line_time)));
9538 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9539 tmp &= ~LATENCY_WATERMARK_MASK(3);
9540 tmp |= LATENCY_WATERMARK_MASK(2);
9541 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9542 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9543 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9544 LATENCY_HIGH_WATERMARK(line_time)));
9545 /* restore original selection */
9546 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9548 /* save values for DPM */
9549 radeon_crtc->line_time = line_time;
9550 radeon_crtc->wm_high = latency_watermark_a;
9551 radeon_crtc->wm_low = latency_watermark_b;
9555 * dce8_bandwidth_update - program display watermarks
9557 * @rdev: radeon_device pointer
9559 * Calculate and program the display watermarks and line
9560 * buffer allocation (CIK).
9562 void dce8_bandwidth_update(struct radeon_device *rdev)
9564 struct drm_display_mode *mode = NULL;
9565 u32 num_heads = 0, lb_size;
9568 if (!rdev->mode_info.mode_config_initialized)
9571 radeon_update_display_priority(rdev);
9573 for (i = 0; i < rdev->num_crtc; i++) {
9574 if (rdev->mode_info.crtcs[i]->base.enabled)
9577 for (i = 0; i < rdev->num_crtc; i++) {
9578 mode = &rdev->mode_info.crtcs[i]->base.mode;
9579 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9580 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9585 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9587 * @rdev: radeon_device pointer
9589 * Fetches a GPU clock counter snapshot (SI).
9590 * Returns the 64 bit clock counter snapshot.
9592 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9596 mutex_lock(&rdev->gpu_clock_mutex);
9597 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9598 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9599 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9600 mutex_unlock(&rdev->gpu_clock_mutex);
9604 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9605 u32 cntl_reg, u32 status_reg)
9608 struct atom_clock_dividers dividers;
9611 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9612 clock, false, ÷rs);
9616 tmp = RREG32_SMC(cntl_reg);
9617 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9618 tmp |= dividers.post_divider;
9619 WREG32_SMC(cntl_reg, tmp);
9621 for (i = 0; i < 100; i++) {
9622 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9632 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9636 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9640 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9644 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9647 struct atom_clock_dividers dividers;
9650 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9651 ecclk, false, ÷rs);
9655 for (i = 0; i < 100; i++) {
9656 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9663 tmp = RREG32_SMC(CG_ECLK_CNTL);
9664 tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9665 tmp |= dividers.post_divider;
9666 WREG32_SMC(CG_ECLK_CNTL, tmp);
9668 for (i = 0; i < 100; i++) {
9669 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9679 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9681 struct pci_dev *root = rdev->pdev->bus->self;
9682 int bridge_pos, gpu_pos;
9683 u32 speed_cntl, mask, current_data_rate;
9687 if (pci_is_root_bus(rdev->pdev->bus))
9690 if (radeon_pcie_gen2 == 0)
9693 if (rdev->flags & RADEON_IS_IGP)
9696 if (!(rdev->flags & RADEON_IS_PCIE))
9699 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9703 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9706 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9707 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9708 LC_CURRENT_DATA_RATE_SHIFT;
9709 if (mask & DRM_PCIE_SPEED_80) {
9710 if (current_data_rate == 2) {
9711 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9714 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9715 } else if (mask & DRM_PCIE_SPEED_50) {
9716 if (current_data_rate == 1) {
9717 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9720 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9723 bridge_pos = pci_pcie_cap(root);
9727 gpu_pos = pci_pcie_cap(rdev->pdev);
9731 if (mask & DRM_PCIE_SPEED_80) {
9732 /* re-try equalization if gen3 is not already enabled */
9733 if (current_data_rate != 2) {
9734 u16 bridge_cfg, gpu_cfg;
9735 u16 bridge_cfg2, gpu_cfg2;
9736 u32 max_lw, current_lw, tmp;
9738 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9739 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9741 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9742 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9744 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9745 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9747 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9748 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9749 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9751 if (current_lw < max_lw) {
9752 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9753 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9754 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9755 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9756 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9757 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9761 for (i = 0; i < 10; i++) {
9763 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9764 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9767 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9768 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9770 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9771 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9773 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9774 tmp |= LC_SET_QUIESCE;
9775 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9777 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9779 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9784 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9785 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9786 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9787 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9789 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9790 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9791 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9792 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9795 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9796 tmp16 &= ~((1 << 4) | (7 << 9));
9797 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9798 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9800 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9801 tmp16 &= ~((1 << 4) | (7 << 9));
9802 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9803 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9805 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9806 tmp &= ~LC_SET_QUIESCE;
9807 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9812 /* set the link speed */
9813 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9814 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9815 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9817 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9819 if (mask & DRM_PCIE_SPEED_80)
9820 tmp16 |= 3; /* gen3 */
9821 else if (mask & DRM_PCIE_SPEED_50)
9822 tmp16 |= 2; /* gen2 */
9824 tmp16 |= 1; /* gen1 */
9825 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9827 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9828 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9829 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9831 for (i = 0; i < rdev->usec_timeout; i++) {
9832 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9833 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9839 static void cik_program_aspm(struct radeon_device *rdev)
9842 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9843 bool disable_clkreq = false;
9845 if (radeon_aspm == 0)
9848 /* XXX double check IGPs */
9849 if (rdev->flags & RADEON_IS_IGP)
9852 if (!(rdev->flags & RADEON_IS_PCIE))
9855 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9856 data &= ~LC_XMIT_N_FTS_MASK;
9857 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9859 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9861 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9862 data |= LC_GO_TO_RECOVERY;
9864 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9866 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9867 data |= P_IGNORE_EDB_ERR;
9869 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9871 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9872 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9873 data |= LC_PMI_TO_L1_DIS;
9875 data |= LC_L0S_INACTIVITY(7);
9878 data |= LC_L1_INACTIVITY(7);
9879 data &= ~LC_PMI_TO_L1_DIS;
9881 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9883 if (!disable_plloff_in_l1) {
9884 bool clk_req_support;
9886 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9887 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9888 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9890 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9892 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9893 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9894 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9896 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9898 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9899 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9900 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9902 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9904 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9905 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9906 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9908 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9910 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9911 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9912 data |= LC_DYN_LANES_PWR_STATE(3);
9914 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9916 if (!disable_clkreq &&
9917 !pci_is_root_bus(rdev->pdev->bus)) {
9918 struct pci_dev *root = rdev->pdev->bus->self;
9921 clk_req_support = false;
9922 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9923 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9924 clk_req_support = true;
9926 clk_req_support = false;
9929 if (clk_req_support) {
9930 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9931 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9933 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9935 orig = data = RREG32_SMC(THM_CLK_CNTL);
9936 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9937 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9939 WREG32_SMC(THM_CLK_CNTL, data);
9941 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9942 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9943 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9945 WREG32_SMC(MISC_CLK_CTRL, data);
9947 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9948 data &= ~BCLK_AS_XCLK;
9950 WREG32_SMC(CG_CLKPIN_CNTL, data);
9952 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9953 data &= ~FORCE_BIF_REFCLK_EN;
9955 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9957 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9958 data &= ~MPLL_CLKOUT_SEL_MASK;
9959 data |= MPLL_CLKOUT_SEL(4);
9961 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9966 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9969 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9970 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9972 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9975 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9976 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9977 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9978 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9979 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9980 data &= ~LC_L0S_INACTIVITY_MASK;
9982 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);