GNU Linux-libre 4.4.290-gnu1
[releases.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 /*(DEBLOBBED)*/
39
40 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
41 extern void r600_ih_ring_fini(struct radeon_device *rdev);
42 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
43 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
44 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
45 extern void sumo_rlc_fini(struct radeon_device *rdev);
46 extern int sumo_rlc_init(struct radeon_device *rdev);
47 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
48 extern void si_rlc_reset(struct radeon_device *rdev);
49 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
50 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
51 extern int cik_sdma_resume(struct radeon_device *rdev);
52 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
53 extern void cik_sdma_fini(struct radeon_device *rdev);
54 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
55 static void cik_rlc_stop(struct radeon_device *rdev);
56 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
57 static void cik_program_aspm(struct radeon_device *rdev);
58 static void cik_init_pg(struct radeon_device *rdev);
59 static void cik_init_cg(struct radeon_device *rdev);
60 static void cik_fini_pg(struct radeon_device *rdev);
61 static void cik_fini_cg(struct radeon_device *rdev);
62 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
63                                           bool enable);
64
65 /**
66  * cik_get_allowed_info_register - fetch the register for the info ioctl
67  *
68  * @rdev: radeon_device pointer
69  * @reg: register offset in bytes
70  * @val: register value
71  *
72  * Returns 0 for success or -EINVAL for an invalid register
73  *
74  */
75 int cik_get_allowed_info_register(struct radeon_device *rdev,
76                                   u32 reg, u32 *val)
77 {
78         switch (reg) {
79         case GRBM_STATUS:
80         case GRBM_STATUS2:
81         case GRBM_STATUS_SE0:
82         case GRBM_STATUS_SE1:
83         case GRBM_STATUS_SE2:
84         case GRBM_STATUS_SE3:
85         case SRBM_STATUS:
86         case SRBM_STATUS2:
87         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
88         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
89         case UVD_STATUS:
90         /* TODO VCE */
91                 *val = RREG32(reg);
92                 return 0;
93         default:
94                 return -EINVAL;
95         }
96 }
97
98 /*
99  * Indirect registers accessor
100  */
101 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
102 {
103         unsigned long flags;
104         u32 r;
105
106         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
107         WREG32(CIK_DIDT_IND_INDEX, (reg));
108         r = RREG32(CIK_DIDT_IND_DATA);
109         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
110         return r;
111 }
112
113 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
114 {
115         unsigned long flags;
116
117         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
118         WREG32(CIK_DIDT_IND_INDEX, (reg));
119         WREG32(CIK_DIDT_IND_DATA, (v));
120         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
121 }
122
123 /* get temperature in millidegrees */
124 int ci_get_temp(struct radeon_device *rdev)
125 {
126         u32 temp;
127         int actual_temp = 0;
128
129         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
130                 CTF_TEMP_SHIFT;
131
132         if (temp & 0x200)
133                 actual_temp = 255;
134         else
135                 actual_temp = temp & 0x1ff;
136
137         actual_temp = actual_temp * 1000;
138
139         return actual_temp;
140 }
141
142 /* get temperature in millidegrees */
143 int kv_get_temp(struct radeon_device *rdev)
144 {
145         u32 temp;
146         int actual_temp = 0;
147
148         temp = RREG32_SMC(0xC0300E0C);
149
150         if (temp)
151                 actual_temp = (temp / 8) - 49;
152         else
153                 actual_temp = 0;
154
155         actual_temp = actual_temp * 1000;
156
157         return actual_temp;
158 }
159
160 /*
161  * Indirect registers accessor
162  */
163 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
164 {
165         unsigned long flags;
166         u32 r;
167
168         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
169         WREG32(PCIE_INDEX, reg);
170         (void)RREG32(PCIE_INDEX);
171         r = RREG32(PCIE_DATA);
172         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
173         return r;
174 }
175
176 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
177 {
178         unsigned long flags;
179
180         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
181         WREG32(PCIE_INDEX, reg);
182         (void)RREG32(PCIE_INDEX);
183         WREG32(PCIE_DATA, v);
184         (void)RREG32(PCIE_DATA);
185         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
186 }
187
188 static const u32 spectre_rlc_save_restore_register_list[] =
189 {
190         (0x0e00 << 16) | (0xc12c >> 2),
191         0x00000000,
192         (0x0e00 << 16) | (0xc140 >> 2),
193         0x00000000,
194         (0x0e00 << 16) | (0xc150 >> 2),
195         0x00000000,
196         (0x0e00 << 16) | (0xc15c >> 2),
197         0x00000000,
198         (0x0e00 << 16) | (0xc168 >> 2),
199         0x00000000,
200         (0x0e00 << 16) | (0xc170 >> 2),
201         0x00000000,
202         (0x0e00 << 16) | (0xc178 >> 2),
203         0x00000000,
204         (0x0e00 << 16) | (0xc204 >> 2),
205         0x00000000,
206         (0x0e00 << 16) | (0xc2b4 >> 2),
207         0x00000000,
208         (0x0e00 << 16) | (0xc2b8 >> 2),
209         0x00000000,
210         (0x0e00 << 16) | (0xc2bc >> 2),
211         0x00000000,
212         (0x0e00 << 16) | (0xc2c0 >> 2),
213         0x00000000,
214         (0x0e00 << 16) | (0x8228 >> 2),
215         0x00000000,
216         (0x0e00 << 16) | (0x829c >> 2),
217         0x00000000,
218         (0x0e00 << 16) | (0x869c >> 2),
219         0x00000000,
220         (0x0600 << 16) | (0x98f4 >> 2),
221         0x00000000,
222         (0x0e00 << 16) | (0x98f8 >> 2),
223         0x00000000,
224         (0x0e00 << 16) | (0x9900 >> 2),
225         0x00000000,
226         (0x0e00 << 16) | (0xc260 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0x90e8 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0x3c000 >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0x3c00c >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0x8c1c >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0x9700 >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0xcd20 >> 2),
239         0x00000000,
240         (0x4e00 << 16) | (0xcd20 >> 2),
241         0x00000000,
242         (0x5e00 << 16) | (0xcd20 >> 2),
243         0x00000000,
244         (0x6e00 << 16) | (0xcd20 >> 2),
245         0x00000000,
246         (0x7e00 << 16) | (0xcd20 >> 2),
247         0x00000000,
248         (0x8e00 << 16) | (0xcd20 >> 2),
249         0x00000000,
250         (0x9e00 << 16) | (0xcd20 >> 2),
251         0x00000000,
252         (0xae00 << 16) | (0xcd20 >> 2),
253         0x00000000,
254         (0xbe00 << 16) | (0xcd20 >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0x89bc >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0x8900 >> 2),
259         0x00000000,
260         0x3,
261         (0x0e00 << 16) | (0xc130 >> 2),
262         0x00000000,
263         (0x0e00 << 16) | (0xc134 >> 2),
264         0x00000000,
265         (0x0e00 << 16) | (0xc1fc >> 2),
266         0x00000000,
267         (0x0e00 << 16) | (0xc208 >> 2),
268         0x00000000,
269         (0x0e00 << 16) | (0xc264 >> 2),
270         0x00000000,
271         (0x0e00 << 16) | (0xc268 >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0xc26c >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc270 >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0xc274 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0xc278 >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0xc27c >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc280 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc284 >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc288 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc28c >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc290 >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0xc294 >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0xc298 >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0xc29c >> 2),
298         0x00000000,
299         (0x0e00 << 16) | (0xc2a0 >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0xc2a4 >> 2),
302         0x00000000,
303         (0x0e00 << 16) | (0xc2a8 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0xc2ac  >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0xc2b0 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0x301d0 >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0x30238 >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0x30250 >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0x30254 >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0x30258 >> 2),
318         0x00000000,
319         (0x0e00 << 16) | (0x3025c >> 2),
320         0x00000000,
321         (0x4e00 << 16) | (0xc900 >> 2),
322         0x00000000,
323         (0x5e00 << 16) | (0xc900 >> 2),
324         0x00000000,
325         (0x6e00 << 16) | (0xc900 >> 2),
326         0x00000000,
327         (0x7e00 << 16) | (0xc900 >> 2),
328         0x00000000,
329         (0x8e00 << 16) | (0xc900 >> 2),
330         0x00000000,
331         (0x9e00 << 16) | (0xc900 >> 2),
332         0x00000000,
333         (0xae00 << 16) | (0xc900 >> 2),
334         0x00000000,
335         (0xbe00 << 16) | (0xc900 >> 2),
336         0x00000000,
337         (0x4e00 << 16) | (0xc904 >> 2),
338         0x00000000,
339         (0x5e00 << 16) | (0xc904 >> 2),
340         0x00000000,
341         (0x6e00 << 16) | (0xc904 >> 2),
342         0x00000000,
343         (0x7e00 << 16) | (0xc904 >> 2),
344         0x00000000,
345         (0x8e00 << 16) | (0xc904 >> 2),
346         0x00000000,
347         (0x9e00 << 16) | (0xc904 >> 2),
348         0x00000000,
349         (0xae00 << 16) | (0xc904 >> 2),
350         0x00000000,
351         (0xbe00 << 16) | (0xc904 >> 2),
352         0x00000000,
353         (0x4e00 << 16) | (0xc908 >> 2),
354         0x00000000,
355         (0x5e00 << 16) | (0xc908 >> 2),
356         0x00000000,
357         (0x6e00 << 16) | (0xc908 >> 2),
358         0x00000000,
359         (0x7e00 << 16) | (0xc908 >> 2),
360         0x00000000,
361         (0x8e00 << 16) | (0xc908 >> 2),
362         0x00000000,
363         (0x9e00 << 16) | (0xc908 >> 2),
364         0x00000000,
365         (0xae00 << 16) | (0xc908 >> 2),
366         0x00000000,
367         (0xbe00 << 16) | (0xc908 >> 2),
368         0x00000000,
369         (0x4e00 << 16) | (0xc90c >> 2),
370         0x00000000,
371         (0x5e00 << 16) | (0xc90c >> 2),
372         0x00000000,
373         (0x6e00 << 16) | (0xc90c >> 2),
374         0x00000000,
375         (0x7e00 << 16) | (0xc90c >> 2),
376         0x00000000,
377         (0x8e00 << 16) | (0xc90c >> 2),
378         0x00000000,
379         (0x9e00 << 16) | (0xc90c >> 2),
380         0x00000000,
381         (0xae00 << 16) | (0xc90c >> 2),
382         0x00000000,
383         (0xbe00 << 16) | (0xc90c >> 2),
384         0x00000000,
385         (0x4e00 << 16) | (0xc910 >> 2),
386         0x00000000,
387         (0x5e00 << 16) | (0xc910 >> 2),
388         0x00000000,
389         (0x6e00 << 16) | (0xc910 >> 2),
390         0x00000000,
391         (0x7e00 << 16) | (0xc910 >> 2),
392         0x00000000,
393         (0x8e00 << 16) | (0xc910 >> 2),
394         0x00000000,
395         (0x9e00 << 16) | (0xc910 >> 2),
396         0x00000000,
397         (0xae00 << 16) | (0xc910 >> 2),
398         0x00000000,
399         (0xbe00 << 16) | (0xc910 >> 2),
400         0x00000000,
401         (0x0e00 << 16) | (0xc99c >> 2),
402         0x00000000,
403         (0x0e00 << 16) | (0x9834 >> 2),
404         0x00000000,
405         (0x0000 << 16) | (0x30f00 >> 2),
406         0x00000000,
407         (0x0001 << 16) | (0x30f00 >> 2),
408         0x00000000,
409         (0x0000 << 16) | (0x30f04 >> 2),
410         0x00000000,
411         (0x0001 << 16) | (0x30f04 >> 2),
412         0x00000000,
413         (0x0000 << 16) | (0x30f08 >> 2),
414         0x00000000,
415         (0x0001 << 16) | (0x30f08 >> 2),
416         0x00000000,
417         (0x0000 << 16) | (0x30f0c >> 2),
418         0x00000000,
419         (0x0001 << 16) | (0x30f0c >> 2),
420         0x00000000,
421         (0x0600 << 16) | (0x9b7c >> 2),
422         0x00000000,
423         (0x0e00 << 16) | (0x8a14 >> 2),
424         0x00000000,
425         (0x0e00 << 16) | (0x8a18 >> 2),
426         0x00000000,
427         (0x0600 << 16) | (0x30a00 >> 2),
428         0x00000000,
429         (0x0e00 << 16) | (0x8bf0 >> 2),
430         0x00000000,
431         (0x0e00 << 16) | (0x8bcc >> 2),
432         0x00000000,
433         (0x0e00 << 16) | (0x8b24 >> 2),
434         0x00000000,
435         (0x0e00 << 16) | (0x30a04 >> 2),
436         0x00000000,
437         (0x0600 << 16) | (0x30a10 >> 2),
438         0x00000000,
439         (0x0600 << 16) | (0x30a14 >> 2),
440         0x00000000,
441         (0x0600 << 16) | (0x30a18 >> 2),
442         0x00000000,
443         (0x0600 << 16) | (0x30a2c >> 2),
444         0x00000000,
445         (0x0e00 << 16) | (0xc700 >> 2),
446         0x00000000,
447         (0x0e00 << 16) | (0xc704 >> 2),
448         0x00000000,
449         (0x0e00 << 16) | (0xc708 >> 2),
450         0x00000000,
451         (0x0e00 << 16) | (0xc768 >> 2),
452         0x00000000,
453         (0x0400 << 16) | (0xc770 >> 2),
454         0x00000000,
455         (0x0400 << 16) | (0xc774 >> 2),
456         0x00000000,
457         (0x0400 << 16) | (0xc778 >> 2),
458         0x00000000,
459         (0x0400 << 16) | (0xc77c >> 2),
460         0x00000000,
461         (0x0400 << 16) | (0xc780 >> 2),
462         0x00000000,
463         (0x0400 << 16) | (0xc784 >> 2),
464         0x00000000,
465         (0x0400 << 16) | (0xc788 >> 2),
466         0x00000000,
467         (0x0400 << 16) | (0xc78c >> 2),
468         0x00000000,
469         (0x0400 << 16) | (0xc798 >> 2),
470         0x00000000,
471         (0x0400 << 16) | (0xc79c >> 2),
472         0x00000000,
473         (0x0400 << 16) | (0xc7a0 >> 2),
474         0x00000000,
475         (0x0400 << 16) | (0xc7a4 >> 2),
476         0x00000000,
477         (0x0400 << 16) | (0xc7a8 >> 2),
478         0x00000000,
479         (0x0400 << 16) | (0xc7ac >> 2),
480         0x00000000,
481         (0x0400 << 16) | (0xc7b0 >> 2),
482         0x00000000,
483         (0x0400 << 16) | (0xc7b4 >> 2),
484         0x00000000,
485         (0x0e00 << 16) | (0x9100 >> 2),
486         0x00000000,
487         (0x0e00 << 16) | (0x3c010 >> 2),
488         0x00000000,
489         (0x0e00 << 16) | (0x92a8 >> 2),
490         0x00000000,
491         (0x0e00 << 16) | (0x92ac >> 2),
492         0x00000000,
493         (0x0e00 << 16) | (0x92b4 >> 2),
494         0x00000000,
495         (0x0e00 << 16) | (0x92b8 >> 2),
496         0x00000000,
497         (0x0e00 << 16) | (0x92bc >> 2),
498         0x00000000,
499         (0x0e00 << 16) | (0x92c0 >> 2),
500         0x00000000,
501         (0x0e00 << 16) | (0x92c4 >> 2),
502         0x00000000,
503         (0x0e00 << 16) | (0x92c8 >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0x92cc >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0x92d0 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0x8c00 >> 2),
510         0x00000000,
511         (0x0e00 << 16) | (0x8c04 >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0x8c20 >> 2),
514         0x00000000,
515         (0x0e00 << 16) | (0x8c38 >> 2),
516         0x00000000,
517         (0x0e00 << 16) | (0x8c3c >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0xae00 >> 2),
520         0x00000000,
521         (0x0e00 << 16) | (0x9604 >> 2),
522         0x00000000,
523         (0x0e00 << 16) | (0xac08 >> 2),
524         0x00000000,
525         (0x0e00 << 16) | (0xac0c >> 2),
526         0x00000000,
527         (0x0e00 << 16) | (0xac10 >> 2),
528         0x00000000,
529         (0x0e00 << 16) | (0xac14 >> 2),
530         0x00000000,
531         (0x0e00 << 16) | (0xac58 >> 2),
532         0x00000000,
533         (0x0e00 << 16) | (0xac68 >> 2),
534         0x00000000,
535         (0x0e00 << 16) | (0xac6c >> 2),
536         0x00000000,
537         (0x0e00 << 16) | (0xac70 >> 2),
538         0x00000000,
539         (0x0e00 << 16) | (0xac74 >> 2),
540         0x00000000,
541         (0x0e00 << 16) | (0xac78 >> 2),
542         0x00000000,
543         (0x0e00 << 16) | (0xac7c >> 2),
544         0x00000000,
545         (0x0e00 << 16) | (0xac80 >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0xac84 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0xac88 >> 2),
550         0x00000000,
551         (0x0e00 << 16) | (0xac8c >> 2),
552         0x00000000,
553         (0x0e00 << 16) | (0x970c >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0x9714 >> 2),
556         0x00000000,
557         (0x0e00 << 16) | (0x9718 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0x971c >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0x31068 >> 2),
562         0x00000000,
563         (0x4e00 << 16) | (0x31068 >> 2),
564         0x00000000,
565         (0x5e00 << 16) | (0x31068 >> 2),
566         0x00000000,
567         (0x6e00 << 16) | (0x31068 >> 2),
568         0x00000000,
569         (0x7e00 << 16) | (0x31068 >> 2),
570         0x00000000,
571         (0x8e00 << 16) | (0x31068 >> 2),
572         0x00000000,
573         (0x9e00 << 16) | (0x31068 >> 2),
574         0x00000000,
575         (0xae00 << 16) | (0x31068 >> 2),
576         0x00000000,
577         (0xbe00 << 16) | (0x31068 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0xcd10 >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0xcd14 >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0x88b0 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0x88b4 >> 2),
586         0x00000000,
587         (0x0e00 << 16) | (0x88b8 >> 2),
588         0x00000000,
589         (0x0e00 << 16) | (0x88bc >> 2),
590         0x00000000,
591         (0x0400 << 16) | (0x89c0 >> 2),
592         0x00000000,
593         (0x0e00 << 16) | (0x88c4 >> 2),
594         0x00000000,
595         (0x0e00 << 16) | (0x88c8 >> 2),
596         0x00000000,
597         (0x0e00 << 16) | (0x88d0 >> 2),
598         0x00000000,
599         (0x0e00 << 16) | (0x88d4 >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0x88d8 >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0x8980 >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0x30938 >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0x3093c >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0x30940 >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0x89a0 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0x30900 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0x30904 >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0x89b4 >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0x3c210 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0x3c214 >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0x3c218 >> 2),
624         0x00000000,
625         (0x0e00 << 16) | (0x8904 >> 2),
626         0x00000000,
627         0x5,
628         (0x0e00 << 16) | (0x8c28 >> 2),
629         (0x0e00 << 16) | (0x8c2c >> 2),
630         (0x0e00 << 16) | (0x8c30 >> 2),
631         (0x0e00 << 16) | (0x8c34 >> 2),
632         (0x0e00 << 16) | (0x9600 >> 2),
633 };
634
635 static const u32 kalindi_rlc_save_restore_register_list[] =
636 {
637         (0x0e00 << 16) | (0xc12c >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0xc140 >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0xc150 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0xc15c >> 2),
644         0x00000000,
645         (0x0e00 << 16) | (0xc168 >> 2),
646         0x00000000,
647         (0x0e00 << 16) | (0xc170 >> 2),
648         0x00000000,
649         (0x0e00 << 16) | (0xc204 >> 2),
650         0x00000000,
651         (0x0e00 << 16) | (0xc2b4 >> 2),
652         0x00000000,
653         (0x0e00 << 16) | (0xc2b8 >> 2),
654         0x00000000,
655         (0x0e00 << 16) | (0xc2bc >> 2),
656         0x00000000,
657         (0x0e00 << 16) | (0xc2c0 >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0x8228 >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0x829c >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0x869c >> 2),
664         0x00000000,
665         (0x0600 << 16) | (0x98f4 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0x98f8 >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0x9900 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc260 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0x90e8 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0x3c000 >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0x3c00c >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0x8c1c >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0x9700 >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0xcd20 >> 2),
684         0x00000000,
685         (0x4e00 << 16) | (0xcd20 >> 2),
686         0x00000000,
687         (0x5e00 << 16) | (0xcd20 >> 2),
688         0x00000000,
689         (0x6e00 << 16) | (0xcd20 >> 2),
690         0x00000000,
691         (0x7e00 << 16) | (0xcd20 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0x89bc >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0x8900 >> 2),
696         0x00000000,
697         0x3,
698         (0x0e00 << 16) | (0xc130 >> 2),
699         0x00000000,
700         (0x0e00 << 16) | (0xc134 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0xc1fc >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0xc208 >> 2),
705         0x00000000,
706         (0x0e00 << 16) | (0xc264 >> 2),
707         0x00000000,
708         (0x0e00 << 16) | (0xc268 >> 2),
709         0x00000000,
710         (0x0e00 << 16) | (0xc26c >> 2),
711         0x00000000,
712         (0x0e00 << 16) | (0xc270 >> 2),
713         0x00000000,
714         (0x0e00 << 16) | (0xc274 >> 2),
715         0x00000000,
716         (0x0e00 << 16) | (0xc28c >> 2),
717         0x00000000,
718         (0x0e00 << 16) | (0xc290 >> 2),
719         0x00000000,
720         (0x0e00 << 16) | (0xc294 >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc298 >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc2a0 >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc2a4 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc2a8 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc2ac >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0x301d0 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0x30238 >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0x30250 >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0x30254 >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0x30258 >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x3025c >> 2),
743         0x00000000,
744         (0x4e00 << 16) | (0xc900 >> 2),
745         0x00000000,
746         (0x5e00 << 16) | (0xc900 >> 2),
747         0x00000000,
748         (0x6e00 << 16) | (0xc900 >> 2),
749         0x00000000,
750         (0x7e00 << 16) | (0xc900 >> 2),
751         0x00000000,
752         (0x4e00 << 16) | (0xc904 >> 2),
753         0x00000000,
754         (0x5e00 << 16) | (0xc904 >> 2),
755         0x00000000,
756         (0x6e00 << 16) | (0xc904 >> 2),
757         0x00000000,
758         (0x7e00 << 16) | (0xc904 >> 2),
759         0x00000000,
760         (0x4e00 << 16) | (0xc908 >> 2),
761         0x00000000,
762         (0x5e00 << 16) | (0xc908 >> 2),
763         0x00000000,
764         (0x6e00 << 16) | (0xc908 >> 2),
765         0x00000000,
766         (0x7e00 << 16) | (0xc908 >> 2),
767         0x00000000,
768         (0x4e00 << 16) | (0xc90c >> 2),
769         0x00000000,
770         (0x5e00 << 16) | (0xc90c >> 2),
771         0x00000000,
772         (0x6e00 << 16) | (0xc90c >> 2),
773         0x00000000,
774         (0x7e00 << 16) | (0xc90c >> 2),
775         0x00000000,
776         (0x4e00 << 16) | (0xc910 >> 2),
777         0x00000000,
778         (0x5e00 << 16) | (0xc910 >> 2),
779         0x00000000,
780         (0x6e00 << 16) | (0xc910 >> 2),
781         0x00000000,
782         (0x7e00 << 16) | (0xc910 >> 2),
783         0x00000000,
784         (0x0e00 << 16) | (0xc99c >> 2),
785         0x00000000,
786         (0x0e00 << 16) | (0x9834 >> 2),
787         0x00000000,
788         (0x0000 << 16) | (0x30f00 >> 2),
789         0x00000000,
790         (0x0000 << 16) | (0x30f04 >> 2),
791         0x00000000,
792         (0x0000 << 16) | (0x30f08 >> 2),
793         0x00000000,
794         (0x0000 << 16) | (0x30f0c >> 2),
795         0x00000000,
796         (0x0600 << 16) | (0x9b7c >> 2),
797         0x00000000,
798         (0x0e00 << 16) | (0x8a14 >> 2),
799         0x00000000,
800         (0x0e00 << 16) | (0x8a18 >> 2),
801         0x00000000,
802         (0x0600 << 16) | (0x30a00 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0x8bf0 >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0x8bcc >> 2),
807         0x00000000,
808         (0x0e00 << 16) | (0x8b24 >> 2),
809         0x00000000,
810         (0x0e00 << 16) | (0x30a04 >> 2),
811         0x00000000,
812         (0x0600 << 16) | (0x30a10 >> 2),
813         0x00000000,
814         (0x0600 << 16) | (0x30a14 >> 2),
815         0x00000000,
816         (0x0600 << 16) | (0x30a18 >> 2),
817         0x00000000,
818         (0x0600 << 16) | (0x30a2c >> 2),
819         0x00000000,
820         (0x0e00 << 16) | (0xc700 >> 2),
821         0x00000000,
822         (0x0e00 << 16) | (0xc704 >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0xc708 >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0xc768 >> 2),
827         0x00000000,
828         (0x0400 << 16) | (0xc770 >> 2),
829         0x00000000,
830         (0x0400 << 16) | (0xc774 >> 2),
831         0x00000000,
832         (0x0400 << 16) | (0xc798 >> 2),
833         0x00000000,
834         (0x0400 << 16) | (0xc79c >> 2),
835         0x00000000,
836         (0x0e00 << 16) | (0x9100 >> 2),
837         0x00000000,
838         (0x0e00 << 16) | (0x3c010 >> 2),
839         0x00000000,
840         (0x0e00 << 16) | (0x8c00 >> 2),
841         0x00000000,
842         (0x0e00 << 16) | (0x8c04 >> 2),
843         0x00000000,
844         (0x0e00 << 16) | (0x8c20 >> 2),
845         0x00000000,
846         (0x0e00 << 16) | (0x8c38 >> 2),
847         0x00000000,
848         (0x0e00 << 16) | (0x8c3c >> 2),
849         0x00000000,
850         (0x0e00 << 16) | (0xae00 >> 2),
851         0x00000000,
852         (0x0e00 << 16) | (0x9604 >> 2),
853         0x00000000,
854         (0x0e00 << 16) | (0xac08 >> 2),
855         0x00000000,
856         (0x0e00 << 16) | (0xac0c >> 2),
857         0x00000000,
858         (0x0e00 << 16) | (0xac10 >> 2),
859         0x00000000,
860         (0x0e00 << 16) | (0xac14 >> 2),
861         0x00000000,
862         (0x0e00 << 16) | (0xac58 >> 2),
863         0x00000000,
864         (0x0e00 << 16) | (0xac68 >> 2),
865         0x00000000,
866         (0x0e00 << 16) | (0xac6c >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0xac70 >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0xac74 >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0xac78 >> 2),
873         0x00000000,
874         (0x0e00 << 16) | (0xac7c >> 2),
875         0x00000000,
876         (0x0e00 << 16) | (0xac80 >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0xac84 >> 2),
879         0x00000000,
880         (0x0e00 << 16) | (0xac88 >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0xac8c >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0x970c >> 2),
885         0x00000000,
886         (0x0e00 << 16) | (0x9714 >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0x9718 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0x971c >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0x31068 >> 2),
893         0x00000000,
894         (0x4e00 << 16) | (0x31068 >> 2),
895         0x00000000,
896         (0x5e00 << 16) | (0x31068 >> 2),
897         0x00000000,
898         (0x6e00 << 16) | (0x31068 >> 2),
899         0x00000000,
900         (0x7e00 << 16) | (0x31068 >> 2),
901         0x00000000,
902         (0x0e00 << 16) | (0xcd10 >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0xcd14 >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0x88b0 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0x88b4 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0x88b8 >> 2),
911         0x00000000,
912         (0x0e00 << 16) | (0x88bc >> 2),
913         0x00000000,
914         (0x0400 << 16) | (0x89c0 >> 2),
915         0x00000000,
916         (0x0e00 << 16) | (0x88c4 >> 2),
917         0x00000000,
918         (0x0e00 << 16) | (0x88c8 >> 2),
919         0x00000000,
920         (0x0e00 << 16) | (0x88d0 >> 2),
921         0x00000000,
922         (0x0e00 << 16) | (0x88d4 >> 2),
923         0x00000000,
924         (0x0e00 << 16) | (0x88d8 >> 2),
925         0x00000000,
926         (0x0e00 << 16) | (0x8980 >> 2),
927         0x00000000,
928         (0x0e00 << 16) | (0x30938 >> 2),
929         0x00000000,
930         (0x0e00 << 16) | (0x3093c >> 2),
931         0x00000000,
932         (0x0e00 << 16) | (0x30940 >> 2),
933         0x00000000,
934         (0x0e00 << 16) | (0x89a0 >> 2),
935         0x00000000,
936         (0x0e00 << 16) | (0x30900 >> 2),
937         0x00000000,
938         (0x0e00 << 16) | (0x30904 >> 2),
939         0x00000000,
940         (0x0e00 << 16) | (0x89b4 >> 2),
941         0x00000000,
942         (0x0e00 << 16) | (0x3e1fc >> 2),
943         0x00000000,
944         (0x0e00 << 16) | (0x3c210 >> 2),
945         0x00000000,
946         (0x0e00 << 16) | (0x3c214 >> 2),
947         0x00000000,
948         (0x0e00 << 16) | (0x3c218 >> 2),
949         0x00000000,
950         (0x0e00 << 16) | (0x8904 >> 2),
951         0x00000000,
952         0x5,
953         (0x0e00 << 16) | (0x8c28 >> 2),
954         (0x0e00 << 16) | (0x8c2c >> 2),
955         (0x0e00 << 16) | (0x8c30 >> 2),
956         (0x0e00 << 16) | (0x8c34 >> 2),
957         (0x0e00 << 16) | (0x9600 >> 2),
958 };
959
960 static const u32 bonaire_golden_spm_registers[] =
961 {
962         0x30800, 0xe0ffffff, 0xe0000000
963 };
964
965 static const u32 bonaire_golden_common_registers[] =
966 {
967         0xc770, 0xffffffff, 0x00000800,
968         0xc774, 0xffffffff, 0x00000800,
969         0xc798, 0xffffffff, 0x00007fbf,
970         0xc79c, 0xffffffff, 0x00007faf
971 };
972
973 static const u32 bonaire_golden_registers[] =
974 {
975         0x3354, 0x00000333, 0x00000333,
976         0x3350, 0x000c0fc0, 0x00040200,
977         0x9a10, 0x00010000, 0x00058208,
978         0x3c000, 0xffff1fff, 0x00140000,
979         0x3c200, 0xfdfc0fff, 0x00000100,
980         0x3c234, 0x40000000, 0x40000200,
981         0x9830, 0xffffffff, 0x00000000,
982         0x9834, 0xf00fffff, 0x00000400,
983         0x9838, 0x0002021c, 0x00020200,
984         0xc78, 0x00000080, 0x00000000,
985         0x5bb0, 0x000000f0, 0x00000070,
986         0x5bc0, 0xf0311fff, 0x80300000,
987         0x98f8, 0x73773777, 0x12010001,
988         0x350c, 0x00810000, 0x408af000,
989         0x7030, 0x31000111, 0x00000011,
990         0x2f48, 0x73773777, 0x12010001,
991         0x220c, 0x00007fb6, 0x0021a1b1,
992         0x2210, 0x00007fb6, 0x002021b1,
993         0x2180, 0x00007fb6, 0x00002191,
994         0x2218, 0x00007fb6, 0x002121b1,
995         0x221c, 0x00007fb6, 0x002021b1,
996         0x21dc, 0x00007fb6, 0x00002191,
997         0x21e0, 0x00007fb6, 0x00002191,
998         0x3628, 0x0000003f, 0x0000000a,
999         0x362c, 0x0000003f, 0x0000000a,
1000         0x2ae4, 0x00073ffe, 0x000022a2,
1001         0x240c, 0x000007ff, 0x00000000,
1002         0x8a14, 0xf000003f, 0x00000007,
1003         0x8bf0, 0x00002001, 0x00000001,
1004         0x8b24, 0xffffffff, 0x00ffffff,
1005         0x30a04, 0x0000ff0f, 0x00000000,
1006         0x28a4c, 0x07ffffff, 0x06000000,
1007         0x4d8, 0x00000fff, 0x00000100,
1008         0x3e78, 0x00000001, 0x00000002,
1009         0x9100, 0x03000000, 0x0362c688,
1010         0x8c00, 0x000000ff, 0x00000001,
1011         0xe40, 0x00001fff, 0x00001fff,
1012         0x9060, 0x0000007f, 0x00000020,
1013         0x9508, 0x00010000, 0x00010000,
1014         0xac14, 0x000003ff, 0x000000f3,
1015         0xac0c, 0xffffffff, 0x00001032
1016 };
1017
1018 static const u32 bonaire_mgcg_cgcg_init[] =
1019 {
1020         0xc420, 0xffffffff, 0xfffffffc,
1021         0x30800, 0xffffffff, 0xe0000000,
1022         0x3c2a0, 0xffffffff, 0x00000100,
1023         0x3c208, 0xffffffff, 0x00000100,
1024         0x3c2c0, 0xffffffff, 0xc0000100,
1025         0x3c2c8, 0xffffffff, 0xc0000100,
1026         0x3c2c4, 0xffffffff, 0xc0000100,
1027         0x55e4, 0xffffffff, 0x00600100,
1028         0x3c280, 0xffffffff, 0x00000100,
1029         0x3c214, 0xffffffff, 0x06000100,
1030         0x3c220, 0xffffffff, 0x00000100,
1031         0x3c218, 0xffffffff, 0x06000100,
1032         0x3c204, 0xffffffff, 0x00000100,
1033         0x3c2e0, 0xffffffff, 0x00000100,
1034         0x3c224, 0xffffffff, 0x00000100,
1035         0x3c200, 0xffffffff, 0x00000100,
1036         0x3c230, 0xffffffff, 0x00000100,
1037         0x3c234, 0xffffffff, 0x00000100,
1038         0x3c250, 0xffffffff, 0x00000100,
1039         0x3c254, 0xffffffff, 0x00000100,
1040         0x3c258, 0xffffffff, 0x00000100,
1041         0x3c25c, 0xffffffff, 0x00000100,
1042         0x3c260, 0xffffffff, 0x00000100,
1043         0x3c27c, 0xffffffff, 0x00000100,
1044         0x3c278, 0xffffffff, 0x00000100,
1045         0x3c210, 0xffffffff, 0x06000100,
1046         0x3c290, 0xffffffff, 0x00000100,
1047         0x3c274, 0xffffffff, 0x00000100,
1048         0x3c2b4, 0xffffffff, 0x00000100,
1049         0x3c2b0, 0xffffffff, 0x00000100,
1050         0x3c270, 0xffffffff, 0x00000100,
1051         0x30800, 0xffffffff, 0xe0000000,
1052         0x3c020, 0xffffffff, 0x00010000,
1053         0x3c024, 0xffffffff, 0x00030002,
1054         0x3c028, 0xffffffff, 0x00040007,
1055         0x3c02c, 0xffffffff, 0x00060005,
1056         0x3c030, 0xffffffff, 0x00090008,
1057         0x3c034, 0xffffffff, 0x00010000,
1058         0x3c038, 0xffffffff, 0x00030002,
1059         0x3c03c, 0xffffffff, 0x00040007,
1060         0x3c040, 0xffffffff, 0x00060005,
1061         0x3c044, 0xffffffff, 0x00090008,
1062         0x3c048, 0xffffffff, 0x00010000,
1063         0x3c04c, 0xffffffff, 0x00030002,
1064         0x3c050, 0xffffffff, 0x00040007,
1065         0x3c054, 0xffffffff, 0x00060005,
1066         0x3c058, 0xffffffff, 0x00090008,
1067         0x3c05c, 0xffffffff, 0x00010000,
1068         0x3c060, 0xffffffff, 0x00030002,
1069         0x3c064, 0xffffffff, 0x00040007,
1070         0x3c068, 0xffffffff, 0x00060005,
1071         0x3c06c, 0xffffffff, 0x00090008,
1072         0x3c070, 0xffffffff, 0x00010000,
1073         0x3c074, 0xffffffff, 0x00030002,
1074         0x3c078, 0xffffffff, 0x00040007,
1075         0x3c07c, 0xffffffff, 0x00060005,
1076         0x3c080, 0xffffffff, 0x00090008,
1077         0x3c084, 0xffffffff, 0x00010000,
1078         0x3c088, 0xffffffff, 0x00030002,
1079         0x3c08c, 0xffffffff, 0x00040007,
1080         0x3c090, 0xffffffff, 0x00060005,
1081         0x3c094, 0xffffffff, 0x00090008,
1082         0x3c098, 0xffffffff, 0x00010000,
1083         0x3c09c, 0xffffffff, 0x00030002,
1084         0x3c0a0, 0xffffffff, 0x00040007,
1085         0x3c0a4, 0xffffffff, 0x00060005,
1086         0x3c0a8, 0xffffffff, 0x00090008,
1087         0x3c000, 0xffffffff, 0x96e00200,
1088         0x8708, 0xffffffff, 0x00900100,
1089         0xc424, 0xffffffff, 0x0020003f,
1090         0x38, 0xffffffff, 0x0140001c,
1091         0x3c, 0x000f0000, 0x000f0000,
1092         0x220, 0xffffffff, 0xC060000C,
1093         0x224, 0xc0000fff, 0x00000100,
1094         0xf90, 0xffffffff, 0x00000100,
1095         0xf98, 0x00000101, 0x00000000,
1096         0x20a8, 0xffffffff, 0x00000104,
1097         0x55e4, 0xff000fff, 0x00000100,
1098         0x30cc, 0xc0000fff, 0x00000104,
1099         0xc1e4, 0x00000001, 0x00000001,
1100         0xd00c, 0xff000ff0, 0x00000100,
1101         0xd80c, 0xff000ff0, 0x00000100
1102 };
1103
1104 static const u32 spectre_golden_spm_registers[] =
1105 {
1106         0x30800, 0xe0ffffff, 0xe0000000
1107 };
1108
1109 static const u32 spectre_golden_common_registers[] =
1110 {
1111         0xc770, 0xffffffff, 0x00000800,
1112         0xc774, 0xffffffff, 0x00000800,
1113         0xc798, 0xffffffff, 0x00007fbf,
1114         0xc79c, 0xffffffff, 0x00007faf
1115 };
1116
1117 static const u32 spectre_golden_registers[] =
1118 {
1119         0x3c000, 0xffff1fff, 0x96940200,
1120         0x3c00c, 0xffff0001, 0xff000000,
1121         0x3c200, 0xfffc0fff, 0x00000100,
1122         0x6ed8, 0x00010101, 0x00010000,
1123         0x9834, 0xf00fffff, 0x00000400,
1124         0x9838, 0xfffffffc, 0x00020200,
1125         0x5bb0, 0x000000f0, 0x00000070,
1126         0x5bc0, 0xf0311fff, 0x80300000,
1127         0x98f8, 0x73773777, 0x12010001,
1128         0x9b7c, 0x00ff0000, 0x00fc0000,
1129         0x2f48, 0x73773777, 0x12010001,
1130         0x8a14, 0xf000003f, 0x00000007,
1131         0x8b24, 0xffffffff, 0x00ffffff,
1132         0x28350, 0x3f3f3fff, 0x00000082,
1133         0x28354, 0x0000003f, 0x00000000,
1134         0x3e78, 0x00000001, 0x00000002,
1135         0x913c, 0xffff03df, 0x00000004,
1136         0xc768, 0x00000008, 0x00000008,
1137         0x8c00, 0x000008ff, 0x00000800,
1138         0x9508, 0x00010000, 0x00010000,
1139         0xac0c, 0xffffffff, 0x54763210,
1140         0x214f8, 0x01ff01ff, 0x00000002,
1141         0x21498, 0x007ff800, 0x00200000,
1142         0x2015c, 0xffffffff, 0x00000f40,
1143         0x30934, 0xffffffff, 0x00000001
1144 };
1145
1146 static const u32 spectre_mgcg_cgcg_init[] =
1147 {
1148         0xc420, 0xffffffff, 0xfffffffc,
1149         0x30800, 0xffffffff, 0xe0000000,
1150         0x3c2a0, 0xffffffff, 0x00000100,
1151         0x3c208, 0xffffffff, 0x00000100,
1152         0x3c2c0, 0xffffffff, 0x00000100,
1153         0x3c2c8, 0xffffffff, 0x00000100,
1154         0x3c2c4, 0xffffffff, 0x00000100,
1155         0x55e4, 0xffffffff, 0x00600100,
1156         0x3c280, 0xffffffff, 0x00000100,
1157         0x3c214, 0xffffffff, 0x06000100,
1158         0x3c220, 0xffffffff, 0x00000100,
1159         0x3c218, 0xffffffff, 0x06000100,
1160         0x3c204, 0xffffffff, 0x00000100,
1161         0x3c2e0, 0xffffffff, 0x00000100,
1162         0x3c224, 0xffffffff, 0x00000100,
1163         0x3c200, 0xffffffff, 0x00000100,
1164         0x3c230, 0xffffffff, 0x00000100,
1165         0x3c234, 0xffffffff, 0x00000100,
1166         0x3c250, 0xffffffff, 0x00000100,
1167         0x3c254, 0xffffffff, 0x00000100,
1168         0x3c258, 0xffffffff, 0x00000100,
1169         0x3c25c, 0xffffffff, 0x00000100,
1170         0x3c260, 0xffffffff, 0x00000100,
1171         0x3c27c, 0xffffffff, 0x00000100,
1172         0x3c278, 0xffffffff, 0x00000100,
1173         0x3c210, 0xffffffff, 0x06000100,
1174         0x3c290, 0xffffffff, 0x00000100,
1175         0x3c274, 0xffffffff, 0x00000100,
1176         0x3c2b4, 0xffffffff, 0x00000100,
1177         0x3c2b0, 0xffffffff, 0x00000100,
1178         0x3c270, 0xffffffff, 0x00000100,
1179         0x30800, 0xffffffff, 0xe0000000,
1180         0x3c020, 0xffffffff, 0x00010000,
1181         0x3c024, 0xffffffff, 0x00030002,
1182         0x3c028, 0xffffffff, 0x00040007,
1183         0x3c02c, 0xffffffff, 0x00060005,
1184         0x3c030, 0xffffffff, 0x00090008,
1185         0x3c034, 0xffffffff, 0x00010000,
1186         0x3c038, 0xffffffff, 0x00030002,
1187         0x3c03c, 0xffffffff, 0x00040007,
1188         0x3c040, 0xffffffff, 0x00060005,
1189         0x3c044, 0xffffffff, 0x00090008,
1190         0x3c048, 0xffffffff, 0x00010000,
1191         0x3c04c, 0xffffffff, 0x00030002,
1192         0x3c050, 0xffffffff, 0x00040007,
1193         0x3c054, 0xffffffff, 0x00060005,
1194         0x3c058, 0xffffffff, 0x00090008,
1195         0x3c05c, 0xffffffff, 0x00010000,
1196         0x3c060, 0xffffffff, 0x00030002,
1197         0x3c064, 0xffffffff, 0x00040007,
1198         0x3c068, 0xffffffff, 0x00060005,
1199         0x3c06c, 0xffffffff, 0x00090008,
1200         0x3c070, 0xffffffff, 0x00010000,
1201         0x3c074, 0xffffffff, 0x00030002,
1202         0x3c078, 0xffffffff, 0x00040007,
1203         0x3c07c, 0xffffffff, 0x00060005,
1204         0x3c080, 0xffffffff, 0x00090008,
1205         0x3c084, 0xffffffff, 0x00010000,
1206         0x3c088, 0xffffffff, 0x00030002,
1207         0x3c08c, 0xffffffff, 0x00040007,
1208         0x3c090, 0xffffffff, 0x00060005,
1209         0x3c094, 0xffffffff, 0x00090008,
1210         0x3c098, 0xffffffff, 0x00010000,
1211         0x3c09c, 0xffffffff, 0x00030002,
1212         0x3c0a0, 0xffffffff, 0x00040007,
1213         0x3c0a4, 0xffffffff, 0x00060005,
1214         0x3c0a8, 0xffffffff, 0x00090008,
1215         0x3c0ac, 0xffffffff, 0x00010000,
1216         0x3c0b0, 0xffffffff, 0x00030002,
1217         0x3c0b4, 0xffffffff, 0x00040007,
1218         0x3c0b8, 0xffffffff, 0x00060005,
1219         0x3c0bc, 0xffffffff, 0x00090008,
1220         0x3c000, 0xffffffff, 0x96e00200,
1221         0x8708, 0xffffffff, 0x00900100,
1222         0xc424, 0xffffffff, 0x0020003f,
1223         0x38, 0xffffffff, 0x0140001c,
1224         0x3c, 0x000f0000, 0x000f0000,
1225         0x220, 0xffffffff, 0xC060000C,
1226         0x224, 0xc0000fff, 0x00000100,
1227         0xf90, 0xffffffff, 0x00000100,
1228         0xf98, 0x00000101, 0x00000000,
1229         0x20a8, 0xffffffff, 0x00000104,
1230         0x55e4, 0xff000fff, 0x00000100,
1231         0x30cc, 0xc0000fff, 0x00000104,
1232         0xc1e4, 0x00000001, 0x00000001,
1233         0xd00c, 0xff000ff0, 0x00000100,
1234         0xd80c, 0xff000ff0, 0x00000100
1235 };
1236
1237 static const u32 kalindi_golden_spm_registers[] =
1238 {
1239         0x30800, 0xe0ffffff, 0xe0000000
1240 };
1241
1242 static const u32 kalindi_golden_common_registers[] =
1243 {
1244         0xc770, 0xffffffff, 0x00000800,
1245         0xc774, 0xffffffff, 0x00000800,
1246         0xc798, 0xffffffff, 0x00007fbf,
1247         0xc79c, 0xffffffff, 0x00007faf
1248 };
1249
1250 static const u32 kalindi_golden_registers[] =
1251 {
1252         0x3c000, 0xffffdfff, 0x6e944040,
1253         0x55e4, 0xff607fff, 0xfc000100,
1254         0x3c220, 0xff000fff, 0x00000100,
1255         0x3c224, 0xff000fff, 0x00000100,
1256         0x3c200, 0xfffc0fff, 0x00000100,
1257         0x6ed8, 0x00010101, 0x00010000,
1258         0x9830, 0xffffffff, 0x00000000,
1259         0x9834, 0xf00fffff, 0x00000400,
1260         0x5bb0, 0x000000f0, 0x00000070,
1261         0x5bc0, 0xf0311fff, 0x80300000,
1262         0x98f8, 0x73773777, 0x12010001,
1263         0x98fc, 0xffffffff, 0x00000010,
1264         0x9b7c, 0x00ff0000, 0x00fc0000,
1265         0x8030, 0x00001f0f, 0x0000100a,
1266         0x2f48, 0x73773777, 0x12010001,
1267         0x2408, 0x000fffff, 0x000c007f,
1268         0x8a14, 0xf000003f, 0x00000007,
1269         0x8b24, 0x3fff3fff, 0x00ffcfff,
1270         0x30a04, 0x0000ff0f, 0x00000000,
1271         0x28a4c, 0x07ffffff, 0x06000000,
1272         0x4d8, 0x00000fff, 0x00000100,
1273         0x3e78, 0x00000001, 0x00000002,
1274         0xc768, 0x00000008, 0x00000008,
1275         0x8c00, 0x000000ff, 0x00000003,
1276         0x214f8, 0x01ff01ff, 0x00000002,
1277         0x21498, 0x007ff800, 0x00200000,
1278         0x2015c, 0xffffffff, 0x00000f40,
1279         0x88c4, 0x001f3ae3, 0x00000082,
1280         0x88d4, 0x0000001f, 0x00000010,
1281         0x30934, 0xffffffff, 0x00000000
1282 };
1283
1284 static const u32 kalindi_mgcg_cgcg_init[] =
1285 {
1286         0xc420, 0xffffffff, 0xfffffffc,
1287         0x30800, 0xffffffff, 0xe0000000,
1288         0x3c2a0, 0xffffffff, 0x00000100,
1289         0x3c208, 0xffffffff, 0x00000100,
1290         0x3c2c0, 0xffffffff, 0x00000100,
1291         0x3c2c8, 0xffffffff, 0x00000100,
1292         0x3c2c4, 0xffffffff, 0x00000100,
1293         0x55e4, 0xffffffff, 0x00600100,
1294         0x3c280, 0xffffffff, 0x00000100,
1295         0x3c214, 0xffffffff, 0x06000100,
1296         0x3c220, 0xffffffff, 0x00000100,
1297         0x3c218, 0xffffffff, 0x06000100,
1298         0x3c204, 0xffffffff, 0x00000100,
1299         0x3c2e0, 0xffffffff, 0x00000100,
1300         0x3c224, 0xffffffff, 0x00000100,
1301         0x3c200, 0xffffffff, 0x00000100,
1302         0x3c230, 0xffffffff, 0x00000100,
1303         0x3c234, 0xffffffff, 0x00000100,
1304         0x3c250, 0xffffffff, 0x00000100,
1305         0x3c254, 0xffffffff, 0x00000100,
1306         0x3c258, 0xffffffff, 0x00000100,
1307         0x3c25c, 0xffffffff, 0x00000100,
1308         0x3c260, 0xffffffff, 0x00000100,
1309         0x3c27c, 0xffffffff, 0x00000100,
1310         0x3c278, 0xffffffff, 0x00000100,
1311         0x3c210, 0xffffffff, 0x06000100,
1312         0x3c290, 0xffffffff, 0x00000100,
1313         0x3c274, 0xffffffff, 0x00000100,
1314         0x3c2b4, 0xffffffff, 0x00000100,
1315         0x3c2b0, 0xffffffff, 0x00000100,
1316         0x3c270, 0xffffffff, 0x00000100,
1317         0x30800, 0xffffffff, 0xe0000000,
1318         0x3c020, 0xffffffff, 0x00010000,
1319         0x3c024, 0xffffffff, 0x00030002,
1320         0x3c028, 0xffffffff, 0x00040007,
1321         0x3c02c, 0xffffffff, 0x00060005,
1322         0x3c030, 0xffffffff, 0x00090008,
1323         0x3c034, 0xffffffff, 0x00010000,
1324         0x3c038, 0xffffffff, 0x00030002,
1325         0x3c03c, 0xffffffff, 0x00040007,
1326         0x3c040, 0xffffffff, 0x00060005,
1327         0x3c044, 0xffffffff, 0x00090008,
1328         0x3c000, 0xffffffff, 0x96e00200,
1329         0x8708, 0xffffffff, 0x00900100,
1330         0xc424, 0xffffffff, 0x0020003f,
1331         0x38, 0xffffffff, 0x0140001c,
1332         0x3c, 0x000f0000, 0x000f0000,
1333         0x220, 0xffffffff, 0xC060000C,
1334         0x224, 0xc0000fff, 0x00000100,
1335         0x20a8, 0xffffffff, 0x00000104,
1336         0x55e4, 0xff000fff, 0x00000100,
1337         0x30cc, 0xc0000fff, 0x00000104,
1338         0xc1e4, 0x00000001, 0x00000001,
1339         0xd00c, 0xff000ff0, 0x00000100,
1340         0xd80c, 0xff000ff0, 0x00000100
1341 };
1342
1343 static const u32 hawaii_golden_spm_registers[] =
1344 {
1345         0x30800, 0xe0ffffff, 0xe0000000
1346 };
1347
1348 static const u32 hawaii_golden_common_registers[] =
1349 {
1350         0x30800, 0xffffffff, 0xe0000000,
1351         0x28350, 0xffffffff, 0x3a00161a,
1352         0x28354, 0xffffffff, 0x0000002e,
1353         0x9a10, 0xffffffff, 0x00018208,
1354         0x98f8, 0xffffffff, 0x12011003
1355 };
1356
1357 static const u32 hawaii_golden_registers[] =
1358 {
1359         0x3354, 0x00000333, 0x00000333,
1360         0x9a10, 0x00010000, 0x00058208,
1361         0x9830, 0xffffffff, 0x00000000,
1362         0x9834, 0xf00fffff, 0x00000400,
1363         0x9838, 0x0002021c, 0x00020200,
1364         0xc78, 0x00000080, 0x00000000,
1365         0x5bb0, 0x000000f0, 0x00000070,
1366         0x5bc0, 0xf0311fff, 0x80300000,
1367         0x350c, 0x00810000, 0x408af000,
1368         0x7030, 0x31000111, 0x00000011,
1369         0x2f48, 0x73773777, 0x12010001,
1370         0x2120, 0x0000007f, 0x0000001b,
1371         0x21dc, 0x00007fb6, 0x00002191,
1372         0x3628, 0x0000003f, 0x0000000a,
1373         0x362c, 0x0000003f, 0x0000000a,
1374         0x2ae4, 0x00073ffe, 0x000022a2,
1375         0x240c, 0x000007ff, 0x00000000,
1376         0x8bf0, 0x00002001, 0x00000001,
1377         0x8b24, 0xffffffff, 0x00ffffff,
1378         0x30a04, 0x0000ff0f, 0x00000000,
1379         0x28a4c, 0x07ffffff, 0x06000000,
1380         0x3e78, 0x00000001, 0x00000002,
1381         0xc768, 0x00000008, 0x00000008,
1382         0xc770, 0x00000f00, 0x00000800,
1383         0xc774, 0x00000f00, 0x00000800,
1384         0xc798, 0x00ffffff, 0x00ff7fbf,
1385         0xc79c, 0x00ffffff, 0x00ff7faf,
1386         0x8c00, 0x000000ff, 0x00000800,
1387         0xe40, 0x00001fff, 0x00001fff,
1388         0x9060, 0x0000007f, 0x00000020,
1389         0x9508, 0x00010000, 0x00010000,
1390         0xae00, 0x00100000, 0x000ff07c,
1391         0xac14, 0x000003ff, 0x0000000f,
1392         0xac10, 0xffffffff, 0x7564fdec,
1393         0xac0c, 0xffffffff, 0x3120b9a8,
1394         0xac08, 0x20000000, 0x0f9c0000
1395 };
1396
1397 static const u32 hawaii_mgcg_cgcg_init[] =
1398 {
1399         0xc420, 0xffffffff, 0xfffffffd,
1400         0x30800, 0xffffffff, 0xe0000000,
1401         0x3c2a0, 0xffffffff, 0x00000100,
1402         0x3c208, 0xffffffff, 0x00000100,
1403         0x3c2c0, 0xffffffff, 0x00000100,
1404         0x3c2c8, 0xffffffff, 0x00000100,
1405         0x3c2c4, 0xffffffff, 0x00000100,
1406         0x55e4, 0xffffffff, 0x00200100,
1407         0x3c280, 0xffffffff, 0x00000100,
1408         0x3c214, 0xffffffff, 0x06000100,
1409         0x3c220, 0xffffffff, 0x00000100,
1410         0x3c218, 0xffffffff, 0x06000100,
1411         0x3c204, 0xffffffff, 0x00000100,
1412         0x3c2e0, 0xffffffff, 0x00000100,
1413         0x3c224, 0xffffffff, 0x00000100,
1414         0x3c200, 0xffffffff, 0x00000100,
1415         0x3c230, 0xffffffff, 0x00000100,
1416         0x3c234, 0xffffffff, 0x00000100,
1417         0x3c250, 0xffffffff, 0x00000100,
1418         0x3c254, 0xffffffff, 0x00000100,
1419         0x3c258, 0xffffffff, 0x00000100,
1420         0x3c25c, 0xffffffff, 0x00000100,
1421         0x3c260, 0xffffffff, 0x00000100,
1422         0x3c27c, 0xffffffff, 0x00000100,
1423         0x3c278, 0xffffffff, 0x00000100,
1424         0x3c210, 0xffffffff, 0x06000100,
1425         0x3c290, 0xffffffff, 0x00000100,
1426         0x3c274, 0xffffffff, 0x00000100,
1427         0x3c2b4, 0xffffffff, 0x00000100,
1428         0x3c2b0, 0xffffffff, 0x00000100,
1429         0x3c270, 0xffffffff, 0x00000100,
1430         0x30800, 0xffffffff, 0xe0000000,
1431         0x3c020, 0xffffffff, 0x00010000,
1432         0x3c024, 0xffffffff, 0x00030002,
1433         0x3c028, 0xffffffff, 0x00040007,
1434         0x3c02c, 0xffffffff, 0x00060005,
1435         0x3c030, 0xffffffff, 0x00090008,
1436         0x3c034, 0xffffffff, 0x00010000,
1437         0x3c038, 0xffffffff, 0x00030002,
1438         0x3c03c, 0xffffffff, 0x00040007,
1439         0x3c040, 0xffffffff, 0x00060005,
1440         0x3c044, 0xffffffff, 0x00090008,
1441         0x3c048, 0xffffffff, 0x00010000,
1442         0x3c04c, 0xffffffff, 0x00030002,
1443         0x3c050, 0xffffffff, 0x00040007,
1444         0x3c054, 0xffffffff, 0x00060005,
1445         0x3c058, 0xffffffff, 0x00090008,
1446         0x3c05c, 0xffffffff, 0x00010000,
1447         0x3c060, 0xffffffff, 0x00030002,
1448         0x3c064, 0xffffffff, 0x00040007,
1449         0x3c068, 0xffffffff, 0x00060005,
1450         0x3c06c, 0xffffffff, 0x00090008,
1451         0x3c070, 0xffffffff, 0x00010000,
1452         0x3c074, 0xffffffff, 0x00030002,
1453         0x3c078, 0xffffffff, 0x00040007,
1454         0x3c07c, 0xffffffff, 0x00060005,
1455         0x3c080, 0xffffffff, 0x00090008,
1456         0x3c084, 0xffffffff, 0x00010000,
1457         0x3c088, 0xffffffff, 0x00030002,
1458         0x3c08c, 0xffffffff, 0x00040007,
1459         0x3c090, 0xffffffff, 0x00060005,
1460         0x3c094, 0xffffffff, 0x00090008,
1461         0x3c098, 0xffffffff, 0x00010000,
1462         0x3c09c, 0xffffffff, 0x00030002,
1463         0x3c0a0, 0xffffffff, 0x00040007,
1464         0x3c0a4, 0xffffffff, 0x00060005,
1465         0x3c0a8, 0xffffffff, 0x00090008,
1466         0x3c0ac, 0xffffffff, 0x00010000,
1467         0x3c0b0, 0xffffffff, 0x00030002,
1468         0x3c0b4, 0xffffffff, 0x00040007,
1469         0x3c0b8, 0xffffffff, 0x00060005,
1470         0x3c0bc, 0xffffffff, 0x00090008,
1471         0x3c0c0, 0xffffffff, 0x00010000,
1472         0x3c0c4, 0xffffffff, 0x00030002,
1473         0x3c0c8, 0xffffffff, 0x00040007,
1474         0x3c0cc, 0xffffffff, 0x00060005,
1475         0x3c0d0, 0xffffffff, 0x00090008,
1476         0x3c0d4, 0xffffffff, 0x00010000,
1477         0x3c0d8, 0xffffffff, 0x00030002,
1478         0x3c0dc, 0xffffffff, 0x00040007,
1479         0x3c0e0, 0xffffffff, 0x00060005,
1480         0x3c0e4, 0xffffffff, 0x00090008,
1481         0x3c0e8, 0xffffffff, 0x00010000,
1482         0x3c0ec, 0xffffffff, 0x00030002,
1483         0x3c0f0, 0xffffffff, 0x00040007,
1484         0x3c0f4, 0xffffffff, 0x00060005,
1485         0x3c0f8, 0xffffffff, 0x00090008,
1486         0xc318, 0xffffffff, 0x00020200,
1487         0x3350, 0xffffffff, 0x00000200,
1488         0x15c0, 0xffffffff, 0x00000400,
1489         0x55e8, 0xffffffff, 0x00000000,
1490         0x2f50, 0xffffffff, 0x00000902,
1491         0x3c000, 0xffffffff, 0x96940200,
1492         0x8708, 0xffffffff, 0x00900100,
1493         0xc424, 0xffffffff, 0x0020003f,
1494         0x38, 0xffffffff, 0x0140001c,
1495         0x3c, 0x000f0000, 0x000f0000,
1496         0x220, 0xffffffff, 0xc060000c,
1497         0x224, 0xc0000fff, 0x00000100,
1498         0xf90, 0xffffffff, 0x00000100,
1499         0xf98, 0x00000101, 0x00000000,
1500         0x20a8, 0xffffffff, 0x00000104,
1501         0x55e4, 0xff000fff, 0x00000100,
1502         0x30cc, 0xc0000fff, 0x00000104,
1503         0xc1e4, 0x00000001, 0x00000001,
1504         0xd00c, 0xff000ff0, 0x00000100,
1505         0xd80c, 0xff000ff0, 0x00000100
1506 };
1507
1508 static const u32 godavari_golden_registers[] =
1509 {
1510         0x55e4, 0xff607fff, 0xfc000100,
1511         0x6ed8, 0x00010101, 0x00010000,
1512         0x9830, 0xffffffff, 0x00000000,
1513         0x98302, 0xf00fffff, 0x00000400,
1514         0x6130, 0xffffffff, 0x00010000,
1515         0x5bb0, 0x000000f0, 0x00000070,
1516         0x5bc0, 0xf0311fff, 0x80300000,
1517         0x98f8, 0x73773777, 0x12010001,
1518         0x98fc, 0xffffffff, 0x00000010,
1519         0x8030, 0x00001f0f, 0x0000100a,
1520         0x2f48, 0x73773777, 0x12010001,
1521         0x2408, 0x000fffff, 0x000c007f,
1522         0x8a14, 0xf000003f, 0x00000007,
1523         0x8b24, 0xffffffff, 0x00ff0fff,
1524         0x30a04, 0x0000ff0f, 0x00000000,
1525         0x28a4c, 0x07ffffff, 0x06000000,
1526         0x4d8, 0x00000fff, 0x00000100,
1527         0xd014, 0x00010000, 0x00810001,
1528         0xd814, 0x00010000, 0x00810001,
1529         0x3e78, 0x00000001, 0x00000002,
1530         0xc768, 0x00000008, 0x00000008,
1531         0xc770, 0x00000f00, 0x00000800,
1532         0xc774, 0x00000f00, 0x00000800,
1533         0xc798, 0x00ffffff, 0x00ff7fbf,
1534         0xc79c, 0x00ffffff, 0x00ff7faf,
1535         0x8c00, 0x000000ff, 0x00000001,
1536         0x214f8, 0x01ff01ff, 0x00000002,
1537         0x21498, 0x007ff800, 0x00200000,
1538         0x2015c, 0xffffffff, 0x00000f40,
1539         0x88c4, 0x001f3ae3, 0x00000082,
1540         0x88d4, 0x0000001f, 0x00000010,
1541         0x30934, 0xffffffff, 0x00000000
1542 };
1543
1544
1545 static void cik_init_golden_registers(struct radeon_device *rdev)
1546 {
1547         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1548         mutex_lock(&rdev->grbm_idx_mutex);
1549         switch (rdev->family) {
1550         case CHIP_BONAIRE:
1551                 radeon_program_register_sequence(rdev,
1552                                                  bonaire_mgcg_cgcg_init,
1553                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1554                 radeon_program_register_sequence(rdev,
1555                                                  bonaire_golden_registers,
1556                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1557                 radeon_program_register_sequence(rdev,
1558                                                  bonaire_golden_common_registers,
1559                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1560                 radeon_program_register_sequence(rdev,
1561                                                  bonaire_golden_spm_registers,
1562                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1563                 break;
1564         case CHIP_KABINI:
1565                 radeon_program_register_sequence(rdev,
1566                                                  kalindi_mgcg_cgcg_init,
1567                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1568                 radeon_program_register_sequence(rdev,
1569                                                  kalindi_golden_registers,
1570                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1571                 radeon_program_register_sequence(rdev,
1572                                                  kalindi_golden_common_registers,
1573                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1574                 radeon_program_register_sequence(rdev,
1575                                                  kalindi_golden_spm_registers,
1576                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1577                 break;
1578         case CHIP_MULLINS:
1579                 radeon_program_register_sequence(rdev,
1580                                                  kalindi_mgcg_cgcg_init,
1581                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1582                 radeon_program_register_sequence(rdev,
1583                                                  godavari_golden_registers,
1584                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1585                 radeon_program_register_sequence(rdev,
1586                                                  kalindi_golden_common_registers,
1587                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1588                 radeon_program_register_sequence(rdev,
1589                                                  kalindi_golden_spm_registers,
1590                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1591                 break;
1592         case CHIP_KAVERI:
1593                 radeon_program_register_sequence(rdev,
1594                                                  spectre_mgcg_cgcg_init,
1595                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1596                 radeon_program_register_sequence(rdev,
1597                                                  spectre_golden_registers,
1598                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1599                 radeon_program_register_sequence(rdev,
1600                                                  spectre_golden_common_registers,
1601                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1602                 radeon_program_register_sequence(rdev,
1603                                                  spectre_golden_spm_registers,
1604                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1605                 break;
1606         case CHIP_HAWAII:
1607                 radeon_program_register_sequence(rdev,
1608                                                  hawaii_mgcg_cgcg_init,
1609                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1610                 radeon_program_register_sequence(rdev,
1611                                                  hawaii_golden_registers,
1612                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1613                 radeon_program_register_sequence(rdev,
1614                                                  hawaii_golden_common_registers,
1615                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1616                 radeon_program_register_sequence(rdev,
1617                                                  hawaii_golden_spm_registers,
1618                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1619                 break;
1620         default:
1621                 break;
1622         }
1623         mutex_unlock(&rdev->grbm_idx_mutex);
1624 }
1625
1626 /**
1627  * cik_get_xclk - get the xclk
1628  *
1629  * @rdev: radeon_device pointer
1630  *
1631  * Returns the reference clock used by the gfx engine
1632  * (CIK).
1633  */
1634 u32 cik_get_xclk(struct radeon_device *rdev)
1635 {
1636         u32 reference_clock = rdev->clock.spll.reference_freq;
1637
1638         if (rdev->flags & RADEON_IS_IGP) {
1639                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1640                         return reference_clock / 2;
1641         } else {
1642                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1643                         return reference_clock / 4;
1644         }
1645         return reference_clock;
1646 }
1647
1648 /**
1649  * cik_mm_rdoorbell - read a doorbell dword
1650  *
1651  * @rdev: radeon_device pointer
1652  * @index: doorbell index
1653  *
1654  * Returns the value in the doorbell aperture at the
1655  * requested doorbell index (CIK).
1656  */
1657 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1658 {
1659         if (index < rdev->doorbell.num_doorbells) {
1660                 return readl(rdev->doorbell.ptr + index);
1661         } else {
1662                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1663                 return 0;
1664         }
1665 }
1666
1667 /**
1668  * cik_mm_wdoorbell - write a doorbell dword
1669  *
1670  * @rdev: radeon_device pointer
1671  * @index: doorbell index
1672  * @v: value to write
1673  *
1674  * Writes @v to the doorbell aperture at the
1675  * requested doorbell index (CIK).
1676  */
1677 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1678 {
1679         if (index < rdev->doorbell.num_doorbells) {
1680                 writel(v, rdev->doorbell.ptr + index);
1681         } else {
1682                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1683         }
1684 }
1685
1686 #define BONAIRE_IO_MC_REGS_SIZE 36
1687
1688 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1689 {
1690         {0x00000070, 0x04400000},
1691         {0x00000071, 0x80c01803},
1692         {0x00000072, 0x00004004},
1693         {0x00000073, 0x00000100},
1694         {0x00000074, 0x00ff0000},
1695         {0x00000075, 0x34000000},
1696         {0x00000076, 0x08000014},
1697         {0x00000077, 0x00cc08ec},
1698         {0x00000078, 0x00000400},
1699         {0x00000079, 0x00000000},
1700         {0x0000007a, 0x04090000},
1701         {0x0000007c, 0x00000000},
1702         {0x0000007e, 0x4408a8e8},
1703         {0x0000007f, 0x00000304},
1704         {0x00000080, 0x00000000},
1705         {0x00000082, 0x00000001},
1706         {0x00000083, 0x00000002},
1707         {0x00000084, 0xf3e4f400},
1708         {0x00000085, 0x052024e3},
1709         {0x00000087, 0x00000000},
1710         {0x00000088, 0x01000000},
1711         {0x0000008a, 0x1c0a0000},
1712         {0x0000008b, 0xff010000},
1713         {0x0000008d, 0xffffefff},
1714         {0x0000008e, 0xfff3efff},
1715         {0x0000008f, 0xfff3efbf},
1716         {0x00000092, 0xf7ffffff},
1717         {0x00000093, 0xffffff7f},
1718         {0x00000095, 0x00101101},
1719         {0x00000096, 0x00000fff},
1720         {0x00000097, 0x00116fff},
1721         {0x00000098, 0x60010000},
1722         {0x00000099, 0x10010000},
1723         {0x0000009a, 0x00006000},
1724         {0x0000009b, 0x00001000},
1725         {0x0000009f, 0x00b48000}
1726 };
1727
1728 #define HAWAII_IO_MC_REGS_SIZE 22
1729
1730 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1731 {
1732         {0x0000007d, 0x40000000},
1733         {0x0000007e, 0x40180304},
1734         {0x0000007f, 0x0000ff00},
1735         {0x00000081, 0x00000000},
1736         {0x00000083, 0x00000800},
1737         {0x00000086, 0x00000000},
1738         {0x00000087, 0x00000100},
1739         {0x00000088, 0x00020100},
1740         {0x00000089, 0x00000000},
1741         {0x0000008b, 0x00040000},
1742         {0x0000008c, 0x00000100},
1743         {0x0000008e, 0xff010000},
1744         {0x00000090, 0xffffefff},
1745         {0x00000091, 0xfff3efff},
1746         {0x00000092, 0xfff3efbf},
1747         {0x00000093, 0xf7ffffff},
1748         {0x00000094, 0xffffff7f},
1749         {0x00000095, 0x00000fff},
1750         {0x00000096, 0x00116fff},
1751         {0x00000097, 0x60010000},
1752         {0x00000098, 0x10010000},
1753         {0x0000009f, 0x00c79000}
1754 };
1755
1756
1757 /**
1758  * cik_srbm_select - select specific register instances
1759  *
1760  * @rdev: radeon_device pointer
1761  * @me: selected ME (micro engine)
1762  * @pipe: pipe
1763  * @queue: queue
1764  * @vmid: VMID
1765  *
1766  * Switches the currently active registers instances.  Some
1767  * registers are instanced per VMID, others are instanced per
1768  * me/pipe/queue combination.
1769  */
1770 static void cik_srbm_select(struct radeon_device *rdev,
1771                             u32 me, u32 pipe, u32 queue, u32 vmid)
1772 {
1773         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1774                              MEID(me & 0x3) |
1775                              VMID(vmid & 0xf) |
1776                              QUEUEID(queue & 0x7));
1777         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1778 }
1779
1780 /* ucode loading */
1781 /**
1782  * ci_mc_load_microcode - load MC ucode into the hw
1783  *
1784  * @rdev: radeon_device pointer
1785  *
1786  * Load the GDDR MC ucode into the hw (CIK).
1787  * Returns 0 on success, error on failure.
1788  */
1789 int ci_mc_load_microcode(struct radeon_device *rdev)
1790 {
1791         const __be32 *fw_data = NULL;
1792         const __le32 *new_fw_data = NULL;
1793         u32 running, blackout = 0, tmp;
1794         u32 *io_mc_regs = NULL;
1795         const __le32 *new_io_mc_regs = NULL;
1796         int i, regs_size, ucode_size;
1797
1798         if (!rdev->mc_fw)
1799                 return -EINVAL;
1800
1801         if (rdev->new_fw) {
1802                 const struct mc_firmware_header_v1_0 *hdr =
1803                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1804
1805                 radeon_ucode_print_mc_hdr(&hdr->header);
1806
1807                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1808                 new_io_mc_regs = (const __le32 *)
1809                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1810                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1811                 new_fw_data = (const __le32 *)
1812                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1813         } else {
1814                 ucode_size = rdev->mc_fw->size / 4;
1815
1816                 switch (rdev->family) {
1817                 case CHIP_BONAIRE:
1818                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1819                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1820                         break;
1821                 case CHIP_HAWAII:
1822                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1823                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1824                         break;
1825                 default:
1826                         return -EINVAL;
1827                 }
1828                 fw_data = (const __be32 *)rdev->mc_fw->data;
1829         }
1830
1831         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1832
1833         if (running == 0) {
1834                 if (running) {
1835                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1836                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1837                 }
1838
1839                 /* reset the engine and set to writable */
1840                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1841                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1842
1843                 /* load mc io regs */
1844                 for (i = 0; i < regs_size; i++) {
1845                         if (rdev->new_fw) {
1846                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1847                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1848                         } else {
1849                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1850                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1851                         }
1852                 }
1853
1854                 tmp = RREG32(MC_SEQ_MISC0);
1855                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1856                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1857                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1858                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1859                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1860                 }
1861
1862                 /* load the MC ucode */
1863                 for (i = 0; i < ucode_size; i++) {
1864                         if (rdev->new_fw)
1865                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1866                         else
1867                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1868                 }
1869
1870                 /* put the engine back into the active state */
1871                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1872                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1873                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1874
1875                 /* wait for training to complete */
1876                 for (i = 0; i < rdev->usec_timeout; i++) {
1877                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1878                                 break;
1879                         udelay(1);
1880                 }
1881                 for (i = 0; i < rdev->usec_timeout; i++) {
1882                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1883                                 break;
1884                         udelay(1);
1885                 }
1886
1887                 if (running)
1888                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1889         }
1890
1891         return 0;
1892 }
1893
1894 /**
1895  * cik_init_microcode - load ucode images from disk
1896  *
1897  * @rdev: radeon_device pointer
1898  *
1899  * Use the firmware interface to load the ucode images into
1900  * the driver (not loaded into hw).
1901  * Returns 0 on success, error on failure.
1902  */
1903 static int cik_init_microcode(struct radeon_device *rdev)
1904 {
1905         const char *chip_name;
1906         const char *new_chip_name;
1907         size_t pfp_req_size, me_req_size, ce_req_size,
1908                 mec_req_size, rlc_req_size, mc_req_size = 0,
1909                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1910         char fw_name[30];
1911         int new_fw = 0;
1912         int err;
1913         int num_fw;
1914
1915         DRM_DEBUG("\n");
1916
1917         switch (rdev->family) {
1918         case CHIP_BONAIRE:
1919                 chip_name = "BONAIRE";
1920                 new_chip_name = "bonaire";
1921                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1922                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1923                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1924                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1925                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1926                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1927                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1928                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1929                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1930                 num_fw = 8;
1931                 break;
1932         case CHIP_HAWAII:
1933                 chip_name = "HAWAII";
1934                 new_chip_name = "hawaii";
1935                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1936                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1937                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1938                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1939                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1940                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1941                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1942                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1943                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1944                 num_fw = 8;
1945                 break;
1946         case CHIP_KAVERI:
1947                 chip_name = "KAVERI";
1948                 new_chip_name = "kaveri";
1949                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1950                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1951                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1952                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1953                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1954                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1955                 num_fw = 7;
1956                 break;
1957         case CHIP_KABINI:
1958                 chip_name = "KABINI";
1959                 new_chip_name = "kabini";
1960                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1961                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1962                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1963                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1964                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1965                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1966                 num_fw = 6;
1967                 break;
1968         case CHIP_MULLINS:
1969                 chip_name = "MULLINS";
1970                 new_chip_name = "mullins";
1971                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1972                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1973                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1974                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1975                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1976                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1977                 num_fw = 6;
1978                 break;
1979         default: BUG();
1980         }
1981
1982         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1983
1984         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1985         err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1986         if (err) {
1987                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1988                 err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1989                 if (err)
1990                         goto out;
1991                 if (rdev->pfp_fw->size != pfp_req_size) {
1992                         printk(KERN_ERR
1993                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1994                                rdev->pfp_fw->size, fw_name);
1995                         err = -EINVAL;
1996                         goto out;
1997                 }
1998         } else {
1999                 err = radeon_ucode_validate(rdev->pfp_fw);
2000                 if (err) {
2001                         printk(KERN_ERR
2002                                "cik_fw: validation failed for firmware \"%s\"\n",
2003                                fw_name);
2004                         goto out;
2005                 } else {
2006                         new_fw++;
2007                 }
2008         }
2009
2010         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2011         err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2012         if (err) {
2013                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2014                 err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2015                 if (err)
2016                         goto out;
2017                 if (rdev->me_fw->size != me_req_size) {
2018                         printk(KERN_ERR
2019                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2020                                rdev->me_fw->size, fw_name);
2021                         err = -EINVAL;
2022                 }
2023         } else {
2024                 err = radeon_ucode_validate(rdev->me_fw);
2025                 if (err) {
2026                         printk(KERN_ERR
2027                                "cik_fw: validation failed for firmware \"%s\"\n",
2028                                fw_name);
2029                         goto out;
2030                 } else {
2031                         new_fw++;
2032                 }
2033         }
2034
2035         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2036         err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2037         if (err) {
2038                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2039                 err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2040                 if (err)
2041                         goto out;
2042                 if (rdev->ce_fw->size != ce_req_size) {
2043                         printk(KERN_ERR
2044                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2045                                rdev->ce_fw->size, fw_name);
2046                         err = -EINVAL;
2047                 }
2048         } else {
2049                 err = radeon_ucode_validate(rdev->ce_fw);
2050                 if (err) {
2051                         printk(KERN_ERR
2052                                "cik_fw: validation failed for firmware \"%s\"\n",
2053                                fw_name);
2054                         goto out;
2055                 } else {
2056                         new_fw++;
2057                 }
2058         }
2059
2060         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2061         err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2062         if (err) {
2063                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2064                 err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2065                 if (err)
2066                         goto out;
2067                 if (rdev->mec_fw->size != mec_req_size) {
2068                         printk(KERN_ERR
2069                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2070                                rdev->mec_fw->size, fw_name);
2071                         err = -EINVAL;
2072                 }
2073         } else {
2074                 err = radeon_ucode_validate(rdev->mec_fw);
2075                 if (err) {
2076                         printk(KERN_ERR
2077                                "cik_fw: validation failed for firmware \"%s\"\n",
2078                                fw_name);
2079                         goto out;
2080                 } else {
2081                         new_fw++;
2082                 }
2083         }
2084
2085         if (rdev->family == CHIP_KAVERI) {
2086                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2087                 err = reject_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2088                 if (err) {
2089                         goto out;
2090                 } else {
2091                         err = radeon_ucode_validate(rdev->mec2_fw);
2092                         if (err) {
2093                                 goto out;
2094                         } else {
2095                                 new_fw++;
2096                         }
2097                 }
2098         }
2099
2100         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2101         err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2102         if (err) {
2103                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2104                 err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2105                 if (err)
2106                         goto out;
2107                 if (rdev->rlc_fw->size != rlc_req_size) {
2108                         printk(KERN_ERR
2109                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2110                                rdev->rlc_fw->size, fw_name);
2111                         err = -EINVAL;
2112                 }
2113         } else {
2114                 err = radeon_ucode_validate(rdev->rlc_fw);
2115                 if (err) {
2116                         printk(KERN_ERR
2117                                "cik_fw: validation failed for firmware \"%s\"\n",
2118                                fw_name);
2119                         goto out;
2120                 } else {
2121                         new_fw++;
2122                 }
2123         }
2124
2125         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2126         err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2127         if (err) {
2128                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2129                 err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2130                 if (err)
2131                         goto out;
2132                 if (rdev->sdma_fw->size != sdma_req_size) {
2133                         printk(KERN_ERR
2134                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2135                                rdev->sdma_fw->size, fw_name);
2136                         err = -EINVAL;
2137                 }
2138         } else {
2139                 err = radeon_ucode_validate(rdev->sdma_fw);
2140                 if (err) {
2141                         printk(KERN_ERR
2142                                "cik_fw: validation failed for firmware \"%s\"\n",
2143                                fw_name);
2144                         goto out;
2145                 } else {
2146                         new_fw++;
2147                 }
2148         }
2149
2150         /* No SMC, MC ucode on APUs */
2151         if (!(rdev->flags & RADEON_IS_IGP)) {
2152                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2153                 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2154                 if (err) {
2155                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2156                         err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2157                         if (err) {
2158                                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2159                                 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2160                                 if (err)
2161                                         goto out;
2162                         }
2163                         if ((rdev->mc_fw->size != mc_req_size) &&
2164                             (rdev->mc_fw->size != mc2_req_size)){
2165                                 printk(KERN_ERR
2166                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2167                                        rdev->mc_fw->size, fw_name);
2168                                 err = -EINVAL;
2169                         }
2170                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2171                 } else {
2172                         err = radeon_ucode_validate(rdev->mc_fw);
2173                         if (err) {
2174                                 printk(KERN_ERR
2175                                        "cik_fw: validation failed for firmware \"%s\"\n",
2176                                        fw_name);
2177                                 goto out;
2178                         } else {
2179                                 new_fw++;
2180                         }
2181                 }
2182
2183                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2184                 err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2185                 if (err) {
2186                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2187                         err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2188                         if (err) {
2189                                 printk(KERN_ERR
2190                                        "smc: error loading firmware \"%s\"\n",
2191                                        fw_name);
2192                                 release_firmware(rdev->smc_fw);
2193                                 rdev->smc_fw = NULL;
2194                                 err = 0;
2195                         } else if (rdev->smc_fw->size != smc_req_size) {
2196                                 printk(KERN_ERR
2197                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2198                                        rdev->smc_fw->size, fw_name);
2199                                 err = -EINVAL;
2200                         }
2201                 } else {
2202                         err = radeon_ucode_validate(rdev->smc_fw);
2203                         if (err) {
2204                                 printk(KERN_ERR
2205                                        "cik_fw: validation failed for firmware \"%s\"\n",
2206                                        fw_name);
2207                                 goto out;
2208                         } else {
2209                                 new_fw++;
2210                         }
2211                 }
2212         }
2213
2214         if (new_fw == 0) {
2215                 rdev->new_fw = false;
2216         } else if (new_fw < num_fw) {
2217                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2218                 err = -EINVAL;
2219         } else {
2220                 rdev->new_fw = true;
2221         }
2222
2223 out:
2224         if (err) {
2225                 if (err != -EINVAL)
2226                         printk(KERN_ERR
2227                                "cik_cp: Failed to load firmware \"%s\"\n",
2228                                fw_name);
2229                 release_firmware(rdev->pfp_fw);
2230                 rdev->pfp_fw = NULL;
2231                 release_firmware(rdev->me_fw);
2232                 rdev->me_fw = NULL;
2233                 release_firmware(rdev->ce_fw);
2234                 rdev->ce_fw = NULL;
2235                 release_firmware(rdev->mec_fw);
2236                 rdev->mec_fw = NULL;
2237                 release_firmware(rdev->mec2_fw);
2238                 rdev->mec2_fw = NULL;
2239                 release_firmware(rdev->rlc_fw);
2240                 rdev->rlc_fw = NULL;
2241                 release_firmware(rdev->sdma_fw);
2242                 rdev->sdma_fw = NULL;
2243                 release_firmware(rdev->mc_fw);
2244                 rdev->mc_fw = NULL;
2245                 release_firmware(rdev->smc_fw);
2246                 rdev->smc_fw = NULL;
2247         }
2248         return err;
2249 }
2250
2251 /*
2252  * Core functions
2253  */
2254 /**
2255  * cik_tiling_mode_table_init - init the hw tiling table
2256  *
2257  * @rdev: radeon_device pointer
2258  *
2259  * Starting with SI, the tiling setup is done globally in a
2260  * set of 32 tiling modes.  Rather than selecting each set of
2261  * parameters per surface as on older asics, we just select
2262  * which index in the tiling table we want to use, and the
2263  * surface uses those parameters (CIK).
2264  */
2265 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2266 {
2267         const u32 num_tile_mode_states = 32;
2268         const u32 num_secondary_tile_mode_states = 16;
2269         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2270         u32 num_pipe_configs;
2271         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2272                 rdev->config.cik.max_shader_engines;
2273
2274         switch (rdev->config.cik.mem_row_size_in_kb) {
2275         case 1:
2276                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2277                 break;
2278         case 2:
2279         default:
2280                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2281                 break;
2282         case 4:
2283                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2284                 break;
2285         }
2286
2287         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2288         if (num_pipe_configs > 8)
2289                 num_pipe_configs = 16;
2290
2291         if (num_pipe_configs == 16) {
2292                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2293                         switch (reg_offset) {
2294                         case 0:
2295                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2296                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2297                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2298                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2299                                 break;
2300                         case 1:
2301                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2303                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2305                                 break;
2306                         case 2:
2307                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2308                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2309                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2310                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2311                                 break;
2312                         case 3:
2313                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2314                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2315                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2317                                 break;
2318                         case 4:
2319                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2321                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2322                                                  TILE_SPLIT(split_equal_to_row_size));
2323                                 break;
2324                         case 5:
2325                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2326                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2328                                 break;
2329                         case 6:
2330                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2331                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2332                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2334                                 break;
2335                         case 7:
2336                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2337                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339                                                  TILE_SPLIT(split_equal_to_row_size));
2340                                 break;
2341                         case 8:
2342                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2343                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2344                                 break;
2345                         case 9:
2346                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2347                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2349                                 break;
2350                         case 10:
2351                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2352                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2353                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2355                                 break;
2356                         case 11:
2357                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2358                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2359                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2360                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361                                 break;
2362                         case 12:
2363                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2364                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2365                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367                                 break;
2368                         case 13:
2369                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2370                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2372                                 break;
2373                         case 14:
2374                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2378                                 break;
2379                         case 16:
2380                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2381                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2382                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2383                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2384                                 break;
2385                         case 17:
2386                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2387                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2390                                 break;
2391                         case 27:
2392                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2393                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2395                                 break;
2396                         case 28:
2397                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2399                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2401                                 break;
2402                         case 29:
2403                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2404                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2405                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2406                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2407                                 break;
2408                         case 30:
2409                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2410                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2411                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2413                                 break;
2414                         default:
2415                                 gb_tile_moden = 0;
2416                                 break;
2417                         }
2418                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2419                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2420                 }
2421                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2422                         switch (reg_offset) {
2423                         case 0:
2424                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2426                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2427                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2428                                 break;
2429                         case 1:
2430                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2431                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2432                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2433                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2434                                 break;
2435                         case 2:
2436                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2438                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2439                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2440                                 break;
2441                         case 3:
2442                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2444                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2445                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2446                                 break;
2447                         case 4:
2448                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2450                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2451                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2452                                 break;
2453                         case 5:
2454                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2456                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2457                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2458                                 break;
2459                         case 6:
2460                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2464                                 break;
2465                         case 8:
2466                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2468                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2469                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2470                                 break;
2471                         case 9:
2472                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2474                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2475                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2476                                 break;
2477                         case 10:
2478                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2481                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2482                                 break;
2483                         case 11:
2484                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2488                                 break;
2489                         case 12:
2490                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2493                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2494                                 break;
2495                         case 13:
2496                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2500                                 break;
2501                         case 14:
2502                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2504                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2505                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2506                                 break;
2507                         default:
2508                                 gb_tile_moden = 0;
2509                                 break;
2510                         }
2511                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2512                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2513                 }
2514         } else if (num_pipe_configs == 8) {
2515                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2516                         switch (reg_offset) {
2517                         case 0:
2518                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2519                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2520                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2522                                 break;
2523                         case 1:
2524                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2526                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2528                                 break;
2529                         case 2:
2530                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2531                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2532                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2534                                 break;
2535                         case 3:
2536                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2540                                 break;
2541                         case 4:
2542                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2544                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2545                                                  TILE_SPLIT(split_equal_to_row_size));
2546                                 break;
2547                         case 5:
2548                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2551                                 break;
2552                         case 6:
2553                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2554                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2555                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2557                                 break;
2558                         case 7:
2559                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2561                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562                                                  TILE_SPLIT(split_equal_to_row_size));
2563                                 break;
2564                         case 8:
2565                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2566                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2567                                 break;
2568                         case 9:
2569                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2570                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2572                                 break;
2573                         case 10:
2574                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2575                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2576                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2578                                 break;
2579                         case 11:
2580                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2581                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2582                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2583                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2584                                 break;
2585                         case 12:
2586                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2587                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2588                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2590                                 break;
2591                         case 13:
2592                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2593                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2595                                 break;
2596                         case 14:
2597                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2599                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2601                                 break;
2602                         case 16:
2603                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2604                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2605                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2606                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2607                                 break;
2608                         case 17:
2609                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2610                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2611                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2613                                 break;
2614                         case 27:
2615                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2616                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2617                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2618                                 break;
2619                         case 28:
2620                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2622                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2624                                 break;
2625                         case 29:
2626                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2627                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2628                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2629                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2630                                 break;
2631                         case 30:
2632                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2633                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2634                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2636                                 break;
2637                         default:
2638                                 gb_tile_moden = 0;
2639                                 break;
2640                         }
2641                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2642                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2643                 }
2644                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2645                         switch (reg_offset) {
2646                         case 0:
2647                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2649                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2650                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2651                                 break;
2652                         case 1:
2653                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2655                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2656                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2657                                 break;
2658                         case 2:
2659                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2662                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2663                                 break;
2664                         case 3:
2665                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2666                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2667                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2668                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2669                                 break;
2670                         case 4:
2671                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2674                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2675                                 break;
2676                         case 5:
2677                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2678                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2679                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2680                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2681                                 break;
2682                         case 6:
2683                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2685                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2686                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2687                                 break;
2688                         case 8:
2689                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2690                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2691                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2692                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2693                                 break;
2694                         case 9:
2695                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2696                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2697                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2698                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2699                                 break;
2700                         case 10:
2701                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2703                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2704                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2705                                 break;
2706                         case 11:
2707                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2708                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2709                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2710                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2711                                 break;
2712                         case 12:
2713                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2715                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2716                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2717                                 break;
2718                         case 13:
2719                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2721                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2722                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2723                                 break;
2724                         case 14:
2725                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2728                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2729                                 break;
2730                         default:
2731                                 gb_tile_moden = 0;
2732                                 break;
2733                         }
2734                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2735                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2736                 }
2737         } else if (num_pipe_configs == 4) {
2738                 if (num_rbs == 4) {
2739                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2740                                 switch (reg_offset) {
2741                                 case 0:
2742                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2743                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2744                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2746                                         break;
2747                                 case 1:
2748                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2750                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2752                                         break;
2753                                 case 2:
2754                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2755                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2756                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2758                                         break;
2759                                 case 3:
2760                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2764                                         break;
2765                                 case 4:
2766                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2767                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2768                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                                          TILE_SPLIT(split_equal_to_row_size));
2770                                         break;
2771                                 case 5:
2772                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2775                                         break;
2776                                 case 6:
2777                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2778                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2779                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2781                                         break;
2782                                 case 7:
2783                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2784                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2785                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786                                                          TILE_SPLIT(split_equal_to_row_size));
2787                                         break;
2788                                 case 8:
2789                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2790                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2791                                         break;
2792                                 case 9:
2793                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2794                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2796                                         break;
2797                                 case 10:
2798                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2799                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2800                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2802                                         break;
2803                                 case 11:
2804                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2805                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2806                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2807                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2808                                         break;
2809                                 case 12:
2810                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2811                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2812                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2814                                         break;
2815                                 case 13:
2816                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2817                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2818                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2819                                         break;
2820                                 case 14:
2821                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2823                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2825                                         break;
2826                                 case 16:
2827                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2828                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2829                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2830                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2831                                         break;
2832                                 case 17:
2833                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2834                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2835                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2837                                         break;
2838                                 case 27:
2839                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2840                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2841                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2842                                         break;
2843                                 case 28:
2844                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2845                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2846                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2847                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2848                                         break;
2849                                 case 29:
2850                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2851                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2852                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2853                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2854                                         break;
2855                                 case 30:
2856                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2857                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2858                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2860                                         break;
2861                                 default:
2862                                         gb_tile_moden = 0;
2863                                         break;
2864                                 }
2865                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2866                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2867                         }
2868                 } else if (num_rbs < 4) {
2869                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2870                                 switch (reg_offset) {
2871                                 case 0:
2872                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2874                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2875                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2876                                         break;
2877                                 case 1:
2878                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2879                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2880                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2881                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2882                                         break;
2883                                 case 2:
2884                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2886                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2887                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2888                                         break;
2889                                 case 3:
2890                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2891                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2892                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2893                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2894                                         break;
2895                                 case 4:
2896                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2899                                                          TILE_SPLIT(split_equal_to_row_size));
2900                                         break;
2901                                 case 5:
2902                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2903                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2904                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2905                                         break;
2906                                 case 6:
2907                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2908                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2909                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2910                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2911                                         break;
2912                                 case 7:
2913                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2914                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2915                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2916                                                          TILE_SPLIT(split_equal_to_row_size));
2917                                         break;
2918                                 case 8:
2919                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2920                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2921                                         break;
2922                                 case 9:
2923                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2924                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2925                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2926                                         break;
2927                                 case 10:
2928                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2930                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2931                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2932                                         break;
2933                                 case 11:
2934                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2935                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2936                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2937                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938                                         break;
2939                                 case 12:
2940                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2943                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944                                         break;
2945                                 case 13:
2946                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2947                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2948                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2949                                         break;
2950                                 case 14:
2951                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2952                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2954                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955                                         break;
2956                                 case 16:
2957                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2958                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2959                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2960                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2961                                         break;
2962                                 case 17:
2963                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2964                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2966                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2967                                         break;
2968                                 case 27:
2969                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2970                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2971                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2972                                         break;
2973                                 case 28:
2974                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2975                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2976                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2977                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2978                                         break;
2979                                 case 29:
2980                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2981                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2982                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2983                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2984                                         break;
2985                                 case 30:
2986                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2987                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2988                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2989                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2990                                         break;
2991                                 default:
2992                                         gb_tile_moden = 0;
2993                                         break;
2994                                 }
2995                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2996                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2997                         }
2998                 }
2999                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3000                         switch (reg_offset) {
3001                         case 0:
3002                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3003                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3004                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3005                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3006                                 break;
3007                         case 1:
3008                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3010                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3011                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3012                                 break;
3013                         case 2:
3014                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3015                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3016                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3017                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3018                                 break;
3019                         case 3:
3020                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3022                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3023                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3024                                 break;
3025                         case 4:
3026                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3027                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3028                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3029                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3030                                 break;
3031                         case 5:
3032                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3033                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3034                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3035                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3036                                 break;
3037                         case 6:
3038                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3040                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3041                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3042                                 break;
3043                         case 8:
3044                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3045                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3046                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3047                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3048                                 break;
3049                         case 9:
3050                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3051                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3052                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3053                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3054                                 break;
3055                         case 10:
3056                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3058                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3059                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3060                                 break;
3061                         case 11:
3062                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3063                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3064                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3065                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3066                                 break;
3067                         case 12:
3068                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3070                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3071                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3072                                 break;
3073                         case 13:
3074                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3077                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3078                                 break;
3079                         case 14:
3080                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3081                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3082                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3083                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3084                                 break;
3085                         default:
3086                                 gb_tile_moden = 0;
3087                                 break;
3088                         }
3089                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3090                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3091                 }
3092         } else if (num_pipe_configs == 2) {
3093                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3094                         switch (reg_offset) {
3095                         case 0:
3096                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3097                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3098                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3099                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3100                                 break;
3101                         case 1:
3102                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3103                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3104                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3105                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3106                                 break;
3107                         case 2:
3108                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3109                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3110                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3111                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3112                                 break;
3113                         case 3:
3114                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3116                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3117                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3118                                 break;
3119                         case 4:
3120                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3121                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3122                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3123                                                  TILE_SPLIT(split_equal_to_row_size));
3124                                 break;
3125                         case 5:
3126                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3127                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3128                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3129                                 break;
3130                         case 6:
3131                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3132                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3133                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3134                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3135                                 break;
3136                         case 7:
3137                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3138                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3139                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3140                                                  TILE_SPLIT(split_equal_to_row_size));
3141                                 break;
3142                         case 8:
3143                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3144                                                 PIPE_CONFIG(ADDR_SURF_P2);
3145                                 break;
3146                         case 9:
3147                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3148                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3149                                                  PIPE_CONFIG(ADDR_SURF_P2));
3150                                 break;
3151                         case 10:
3152                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3153                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3154                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3155                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3156                                 break;
3157                         case 11:
3158                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3159                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3160                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3162                                 break;
3163                         case 12:
3164                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3165                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3166                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3167                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3168                                 break;
3169                         case 13:
3170                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3171                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3172                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3173                                 break;
3174                         case 14:
3175                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3176                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3177                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3178                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3179                                 break;
3180                         case 16:
3181                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3182                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3183                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3184                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3185                                 break;
3186                         case 17:
3187                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3188                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3189                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3190                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3191                                 break;
3192                         case 27:
3193                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3194                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3195                                                  PIPE_CONFIG(ADDR_SURF_P2));
3196                                 break;
3197                         case 28:
3198                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3199                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3200                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3201                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3202                                 break;
3203                         case 29:
3204                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3205                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3206                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3207                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3208                                 break;
3209                         case 30:
3210                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3211                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3212                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3213                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3214                                 break;
3215                         default:
3216                                 gb_tile_moden = 0;
3217                                 break;
3218                         }
3219                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3220                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3221                 }
3222                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3223                         switch (reg_offset) {
3224                         case 0:
3225                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3226                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3227                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3228                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3229                                 break;
3230                         case 1:
3231                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3232                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3233                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3234                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3235                                 break;
3236                         case 2:
3237                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3238                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3239                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3240                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3241                                 break;
3242                         case 3:
3243                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3244                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3245                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3246                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3247                                 break;
3248                         case 4:
3249                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3250                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3251                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3252                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3253                                 break;
3254                         case 5:
3255                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3256                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3257                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3258                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3259                                 break;
3260                         case 6:
3261                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3262                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3263                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3264                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3265                                 break;
3266                         case 8:
3267                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3268                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3269                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3270                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3271                                 break;
3272                         case 9:
3273                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3274                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3275                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3276                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3277                                 break;
3278                         case 10:
3279                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3280                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3281                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3282                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3283                                 break;
3284                         case 11:
3285                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3286                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3287                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3288                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3289                                 break;
3290                         case 12:
3291                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3292                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3293                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3294                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3295                                 break;
3296                         case 13:
3297                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3298                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3299                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3300                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3301                                 break;
3302                         case 14:
3303                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3304                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3305                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3306                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3307                                 break;
3308                         default:
3309                                 gb_tile_moden = 0;
3310                                 break;
3311                         }
3312                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3313                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3314                 }
3315         } else
3316                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3317 }
3318
3319 /**
3320  * cik_select_se_sh - select which SE, SH to address
3321  *
3322  * @rdev: radeon_device pointer
3323  * @se_num: shader engine to address
3324  * @sh_num: sh block to address
3325  *
3326  * Select which SE, SH combinations to address. Certain
3327  * registers are instanced per SE or SH.  0xffffffff means
3328  * broadcast to all SEs or SHs (CIK).
3329  */
3330 static void cik_select_se_sh(struct radeon_device *rdev,
3331                              u32 se_num, u32 sh_num)
3332 {
3333         u32 data = INSTANCE_BROADCAST_WRITES;
3334
3335         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3336                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3337         else if (se_num == 0xffffffff)
3338                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3339         else if (sh_num == 0xffffffff)
3340                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3341         else
3342                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3343         WREG32(GRBM_GFX_INDEX, data);
3344 }
3345
3346 /**
3347  * cik_create_bitmask - create a bitmask
3348  *
3349  * @bit_width: length of the mask
3350  *
3351  * create a variable length bit mask (CIK).
3352  * Returns the bitmask.
3353  */
3354 static u32 cik_create_bitmask(u32 bit_width)
3355 {
3356         u32 i, mask = 0;
3357
3358         for (i = 0; i < bit_width; i++) {
3359                 mask <<= 1;
3360                 mask |= 1;
3361         }
3362         return mask;
3363 }
3364
3365 /**
3366  * cik_get_rb_disabled - computes the mask of disabled RBs
3367  *
3368  * @rdev: radeon_device pointer
3369  * @max_rb_num: max RBs (render backends) for the asic
3370  * @se_num: number of SEs (shader engines) for the asic
3371  * @sh_per_se: number of SH blocks per SE for the asic
3372  *
3373  * Calculates the bitmask of disabled RBs (CIK).
3374  * Returns the disabled RB bitmask.
3375  */
3376 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3377                               u32 max_rb_num_per_se,
3378                               u32 sh_per_se)
3379 {
3380         u32 data, mask;
3381
3382         data = RREG32(CC_RB_BACKEND_DISABLE);
3383         if (data & 1)
3384                 data &= BACKEND_DISABLE_MASK;
3385         else
3386                 data = 0;
3387         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3388
3389         data >>= BACKEND_DISABLE_SHIFT;
3390
3391         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3392
3393         return data & mask;
3394 }
3395
3396 /**
3397  * cik_setup_rb - setup the RBs on the asic
3398  *
3399  * @rdev: radeon_device pointer
3400  * @se_num: number of SEs (shader engines) for the asic
3401  * @sh_per_se: number of SH blocks per SE for the asic
3402  * @max_rb_num: max RBs (render backends) for the asic
3403  *
3404  * Configures per-SE/SH RB registers (CIK).
3405  */
3406 static void cik_setup_rb(struct radeon_device *rdev,
3407                          u32 se_num, u32 sh_per_se,
3408                          u32 max_rb_num_per_se)
3409 {
3410         int i, j;
3411         u32 data, mask;
3412         u32 disabled_rbs = 0;
3413         u32 enabled_rbs = 0;
3414
3415         mutex_lock(&rdev->grbm_idx_mutex);
3416         for (i = 0; i < se_num; i++) {
3417                 for (j = 0; j < sh_per_se; j++) {
3418                         cik_select_se_sh(rdev, i, j);
3419                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3420                         if (rdev->family == CHIP_HAWAII)
3421                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3422                         else
3423                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3424                 }
3425         }
3426         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3427         mutex_unlock(&rdev->grbm_idx_mutex);
3428
3429         mask = 1;
3430         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3431                 if (!(disabled_rbs & mask))
3432                         enabled_rbs |= mask;
3433                 mask <<= 1;
3434         }
3435
3436         rdev->config.cik.backend_enable_mask = enabled_rbs;
3437
3438         mutex_lock(&rdev->grbm_idx_mutex);
3439         for (i = 0; i < se_num; i++) {
3440                 cik_select_se_sh(rdev, i, 0xffffffff);
3441                 data = 0;
3442                 for (j = 0; j < sh_per_se; j++) {
3443                         switch (enabled_rbs & 3) {
3444                         case 0:
3445                                 if (j == 0)
3446                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3447                                 else
3448                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3449                                 break;
3450                         case 1:
3451                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3452                                 break;
3453                         case 2:
3454                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3455                                 break;
3456                         case 3:
3457                         default:
3458                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3459                                 break;
3460                         }
3461                         enabled_rbs >>= 2;
3462                 }
3463                 WREG32(PA_SC_RASTER_CONFIG, data);
3464         }
3465         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3466         mutex_unlock(&rdev->grbm_idx_mutex);
3467 }
3468
3469 /**
3470  * cik_gpu_init - setup the 3D engine
3471  *
3472  * @rdev: radeon_device pointer
3473  *
3474  * Configures the 3D engine and tiling configuration
3475  * registers so that the 3D engine is usable.
3476  */
3477 static void cik_gpu_init(struct radeon_device *rdev)
3478 {
3479         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3480         u32 mc_shared_chmap, mc_arb_ramcfg;
3481         u32 hdp_host_path_cntl;
3482         u32 tmp;
3483         int i, j;
3484
3485         switch (rdev->family) {
3486         case CHIP_BONAIRE:
3487                 rdev->config.cik.max_shader_engines = 2;
3488                 rdev->config.cik.max_tile_pipes = 4;
3489                 rdev->config.cik.max_cu_per_sh = 7;
3490                 rdev->config.cik.max_sh_per_se = 1;
3491                 rdev->config.cik.max_backends_per_se = 2;
3492                 rdev->config.cik.max_texture_channel_caches = 4;
3493                 rdev->config.cik.max_gprs = 256;
3494                 rdev->config.cik.max_gs_threads = 32;
3495                 rdev->config.cik.max_hw_contexts = 8;
3496
3497                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3498                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3499                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3500                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3501                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3502                 break;
3503         case CHIP_HAWAII:
3504                 rdev->config.cik.max_shader_engines = 4;
3505                 rdev->config.cik.max_tile_pipes = 16;
3506                 rdev->config.cik.max_cu_per_sh = 11;
3507                 rdev->config.cik.max_sh_per_se = 1;
3508                 rdev->config.cik.max_backends_per_se = 4;
3509                 rdev->config.cik.max_texture_channel_caches = 16;
3510                 rdev->config.cik.max_gprs = 256;
3511                 rdev->config.cik.max_gs_threads = 32;
3512                 rdev->config.cik.max_hw_contexts = 8;
3513
3514                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3515                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3516                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3517                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3518                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3519                 break;
3520         case CHIP_KAVERI:
3521                 rdev->config.cik.max_shader_engines = 1;
3522                 rdev->config.cik.max_tile_pipes = 4;
3523                 rdev->config.cik.max_cu_per_sh = 8;
3524                 rdev->config.cik.max_backends_per_se = 2;
3525                 rdev->config.cik.max_sh_per_se = 1;
3526                 rdev->config.cik.max_texture_channel_caches = 4;
3527                 rdev->config.cik.max_gprs = 256;
3528                 rdev->config.cik.max_gs_threads = 16;
3529                 rdev->config.cik.max_hw_contexts = 8;
3530
3531                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3532                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3533                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3534                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3535                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3536                 break;
3537         case CHIP_KABINI:
3538         case CHIP_MULLINS:
3539         default:
3540                 rdev->config.cik.max_shader_engines = 1;
3541                 rdev->config.cik.max_tile_pipes = 2;
3542                 rdev->config.cik.max_cu_per_sh = 2;
3543                 rdev->config.cik.max_sh_per_se = 1;
3544                 rdev->config.cik.max_backends_per_se = 1;
3545                 rdev->config.cik.max_texture_channel_caches = 2;
3546                 rdev->config.cik.max_gprs = 256;
3547                 rdev->config.cik.max_gs_threads = 16;
3548                 rdev->config.cik.max_hw_contexts = 8;
3549
3550                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3551                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3552                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3553                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3554                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3555                 break;
3556         }
3557
3558         /* Initialize HDP */
3559         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3560                 WREG32((0x2c14 + j), 0x00000000);
3561                 WREG32((0x2c18 + j), 0x00000000);
3562                 WREG32((0x2c1c + j), 0x00000000);
3563                 WREG32((0x2c20 + j), 0x00000000);
3564                 WREG32((0x2c24 + j), 0x00000000);
3565         }
3566
3567         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3568         WREG32(SRBM_INT_CNTL, 0x1);
3569         WREG32(SRBM_INT_ACK, 0x1);
3570
3571         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3572
3573         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3574         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3575
3576         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3577         rdev->config.cik.mem_max_burst_length_bytes = 256;
3578         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3579         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3580         if (rdev->config.cik.mem_row_size_in_kb > 4)
3581                 rdev->config.cik.mem_row_size_in_kb = 4;
3582         /* XXX use MC settings? */
3583         rdev->config.cik.shader_engine_tile_size = 32;
3584         rdev->config.cik.num_gpus = 1;
3585         rdev->config.cik.multi_gpu_tile_size = 64;
3586
3587         /* fix up row size */
3588         gb_addr_config &= ~ROW_SIZE_MASK;
3589         switch (rdev->config.cik.mem_row_size_in_kb) {
3590         case 1:
3591         default:
3592                 gb_addr_config |= ROW_SIZE(0);
3593                 break;
3594         case 2:
3595                 gb_addr_config |= ROW_SIZE(1);
3596                 break;
3597         case 4:
3598                 gb_addr_config |= ROW_SIZE(2);
3599                 break;
3600         }
3601
3602         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3603          * not have bank info, so create a custom tiling dword.
3604          * bits 3:0   num_pipes
3605          * bits 7:4   num_banks
3606          * bits 11:8  group_size
3607          * bits 15:12 row_size
3608          */
3609         rdev->config.cik.tile_config = 0;
3610         switch (rdev->config.cik.num_tile_pipes) {
3611         case 1:
3612                 rdev->config.cik.tile_config |= (0 << 0);
3613                 break;
3614         case 2:
3615                 rdev->config.cik.tile_config |= (1 << 0);
3616                 break;
3617         case 4:
3618                 rdev->config.cik.tile_config |= (2 << 0);
3619                 break;
3620         case 8:
3621         default:
3622                 /* XXX what about 12? */
3623                 rdev->config.cik.tile_config |= (3 << 0);
3624                 break;
3625         }
3626         rdev->config.cik.tile_config |=
3627                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3628         rdev->config.cik.tile_config |=
3629                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3630         rdev->config.cik.tile_config |=
3631                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3632
3633         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3634         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3635         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3636         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3637         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3638         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3639         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3640         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3641
3642         cik_tiling_mode_table_init(rdev);
3643
3644         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3645                      rdev->config.cik.max_sh_per_se,
3646                      rdev->config.cik.max_backends_per_se);
3647
3648         rdev->config.cik.active_cus = 0;
3649         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3650                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3651                         rdev->config.cik.active_cus +=
3652                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3653                 }
3654         }
3655
3656         /* set HW defaults for 3D engine */
3657         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3658
3659         mutex_lock(&rdev->grbm_idx_mutex);
3660         /*
3661          * making sure that the following register writes will be broadcasted
3662          * to all the shaders
3663          */
3664         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3665         WREG32(SX_DEBUG_1, 0x20);
3666
3667         WREG32(TA_CNTL_AUX, 0x00010000);
3668
3669         tmp = RREG32(SPI_CONFIG_CNTL);
3670         tmp |= 0x03000000;
3671         WREG32(SPI_CONFIG_CNTL, tmp);
3672
3673         WREG32(SQ_CONFIG, 1);
3674
3675         WREG32(DB_DEBUG, 0);
3676
3677         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3678         tmp |= 0x00000400;
3679         WREG32(DB_DEBUG2, tmp);
3680
3681         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3682         tmp |= 0x00020200;
3683         WREG32(DB_DEBUG3, tmp);
3684
3685         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3686         tmp |= 0x00018208;
3687         WREG32(CB_HW_CONTROL, tmp);
3688
3689         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3690
3691         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3692                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3693                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3694                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3695
3696         WREG32(VGT_NUM_INSTANCES, 1);
3697
3698         WREG32(CP_PERFMON_CNTL, 0);
3699
3700         WREG32(SQ_CONFIG, 0);
3701
3702         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3703                                           FORCE_EOV_MAX_REZ_CNT(255)));
3704
3705         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3706                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3707
3708         WREG32(VGT_GS_VERTEX_REUSE, 16);
3709         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3710
3711         tmp = RREG32(HDP_MISC_CNTL);
3712         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3713         WREG32(HDP_MISC_CNTL, tmp);
3714
3715         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3716         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3717
3718         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3719         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3720         mutex_unlock(&rdev->grbm_idx_mutex);
3721
3722         udelay(50);
3723 }
3724
3725 /*
3726  * GPU scratch registers helpers function.
3727  */
3728 /**
3729  * cik_scratch_init - setup driver info for CP scratch regs
3730  *
3731  * @rdev: radeon_device pointer
3732  *
3733  * Set up the number and offset of the CP scratch registers.
3734  * NOTE: use of CP scratch registers is a legacy inferface and
3735  * is not used by default on newer asics (r6xx+).  On newer asics,
3736  * memory buffers are used for fences rather than scratch regs.
3737  */
3738 static void cik_scratch_init(struct radeon_device *rdev)
3739 {
3740         int i;
3741
3742         rdev->scratch.num_reg = 7;
3743         rdev->scratch.reg_base = SCRATCH_REG0;
3744         for (i = 0; i < rdev->scratch.num_reg; i++) {
3745                 rdev->scratch.free[i] = true;
3746                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3747         }
3748 }
3749
3750 /**
3751  * cik_ring_test - basic gfx ring test
3752  *
3753  * @rdev: radeon_device pointer
3754  * @ring: radeon_ring structure holding ring information
3755  *
3756  * Allocate a scratch register and write to it using the gfx ring (CIK).
3757  * Provides a basic gfx ring test to verify that the ring is working.
3758  * Used by cik_cp_gfx_resume();
3759  * Returns 0 on success, error on failure.
3760  */
3761 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3762 {
3763         uint32_t scratch;
3764         uint32_t tmp = 0;
3765         unsigned i;
3766         int r;
3767
3768         r = radeon_scratch_get(rdev, &scratch);
3769         if (r) {
3770                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3771                 return r;
3772         }
3773         WREG32(scratch, 0xCAFEDEAD);
3774         r = radeon_ring_lock(rdev, ring, 3);
3775         if (r) {
3776                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3777                 radeon_scratch_free(rdev, scratch);
3778                 return r;
3779         }
3780         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3781         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3782         radeon_ring_write(ring, 0xDEADBEEF);
3783         radeon_ring_unlock_commit(rdev, ring, false);
3784
3785         for (i = 0; i < rdev->usec_timeout; i++) {
3786                 tmp = RREG32(scratch);
3787                 if (tmp == 0xDEADBEEF)
3788                         break;
3789                 DRM_UDELAY(1);
3790         }
3791         if (i < rdev->usec_timeout) {
3792                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3793         } else {
3794                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3795                           ring->idx, scratch, tmp);
3796                 r = -EINVAL;
3797         }
3798         radeon_scratch_free(rdev, scratch);
3799         return r;
3800 }
3801
3802 /**
3803  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3804  *
3805  * @rdev: radeon_device pointer
3806  * @ridx: radeon ring index
3807  *
3808  * Emits an hdp flush on the cp.
3809  */
3810 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3811                                        int ridx)
3812 {
3813         struct radeon_ring *ring = &rdev->ring[ridx];
3814         u32 ref_and_mask;
3815
3816         switch (ring->idx) {
3817         case CAYMAN_RING_TYPE_CP1_INDEX:
3818         case CAYMAN_RING_TYPE_CP2_INDEX:
3819         default:
3820                 switch (ring->me) {
3821                 case 0:
3822                         ref_and_mask = CP2 << ring->pipe;
3823                         break;
3824                 case 1:
3825                         ref_and_mask = CP6 << ring->pipe;
3826                         break;
3827                 default:
3828                         return;
3829                 }
3830                 break;
3831         case RADEON_RING_TYPE_GFX_INDEX:
3832                 ref_and_mask = CP0;
3833                 break;
3834         }
3835
3836         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3837         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3838                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3839                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3840         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3841         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3842         radeon_ring_write(ring, ref_and_mask);
3843         radeon_ring_write(ring, ref_and_mask);
3844         radeon_ring_write(ring, 0x20); /* poll interval */
3845 }
3846
3847 /**
3848  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3849  *
3850  * @rdev: radeon_device pointer
3851  * @fence: radeon fence object
3852  *
3853  * Emits a fence sequnce number on the gfx ring and flushes
3854  * GPU caches.
3855  */
3856 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3857                              struct radeon_fence *fence)
3858 {
3859         struct radeon_ring *ring = &rdev->ring[fence->ring];
3860         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3861
3862         /* Workaround for cache flush problems. First send a dummy EOP
3863          * event down the pipe with seq one below.
3864          */
3865         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3866         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3867                                  EOP_TC_ACTION_EN |
3868                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3869                                  EVENT_INDEX(5)));
3870         radeon_ring_write(ring, addr & 0xfffffffc);
3871         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3872                                 DATA_SEL(1) | INT_SEL(0));
3873         radeon_ring_write(ring, fence->seq - 1);
3874         radeon_ring_write(ring, 0);
3875
3876         /* Then send the real EOP event down the pipe. */
3877         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3878         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3879                                  EOP_TC_ACTION_EN |
3880                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3881                                  EVENT_INDEX(5)));
3882         radeon_ring_write(ring, addr & 0xfffffffc);
3883         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3884         radeon_ring_write(ring, fence->seq);
3885         radeon_ring_write(ring, 0);
3886 }
3887
3888 /**
3889  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3890  *
3891  * @rdev: radeon_device pointer
3892  * @fence: radeon fence object
3893  *
3894  * Emits a fence sequnce number on the compute ring and flushes
3895  * GPU caches.
3896  */
3897 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3898                                  struct radeon_fence *fence)
3899 {
3900         struct radeon_ring *ring = &rdev->ring[fence->ring];
3901         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3902
3903         /* RELEASE_MEM - flush caches, send int */
3904         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3905         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3906                                  EOP_TC_ACTION_EN |
3907                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3908                                  EVENT_INDEX(5)));
3909         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3910         radeon_ring_write(ring, addr & 0xfffffffc);
3911         radeon_ring_write(ring, upper_32_bits(addr));
3912         radeon_ring_write(ring, fence->seq);
3913         radeon_ring_write(ring, 0);
3914 }
3915
3916 /**
3917  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3918  *
3919  * @rdev: radeon_device pointer
3920  * @ring: radeon ring buffer object
3921  * @semaphore: radeon semaphore object
3922  * @emit_wait: Is this a sempahore wait?
3923  *
3924  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3925  * from running ahead of semaphore waits.
3926  */
3927 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3928                              struct radeon_ring *ring,
3929                              struct radeon_semaphore *semaphore,
3930                              bool emit_wait)
3931 {
3932         uint64_t addr = semaphore->gpu_addr;
3933         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3934
3935         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3936         radeon_ring_write(ring, lower_32_bits(addr));
3937         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3938
3939         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3940                 /* Prevent the PFP from running ahead of the semaphore wait */
3941                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3942                 radeon_ring_write(ring, 0x0);
3943         }
3944
3945         return true;
3946 }
3947
3948 /**
3949  * cik_copy_cpdma - copy pages using the CP DMA engine
3950  *
3951  * @rdev: radeon_device pointer
3952  * @src_offset: src GPU address
3953  * @dst_offset: dst GPU address
3954  * @num_gpu_pages: number of GPU pages to xfer
3955  * @resv: reservation object to sync to
3956  *
3957  * Copy GPU paging using the CP DMA engine (CIK+).
3958  * Used by the radeon ttm implementation to move pages if
3959  * registered as the asic copy callback.
3960  */
3961 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3962                                     uint64_t src_offset, uint64_t dst_offset,
3963                                     unsigned num_gpu_pages,
3964                                     struct reservation_object *resv)
3965 {
3966         struct radeon_fence *fence;
3967         struct radeon_sync sync;
3968         int ring_index = rdev->asic->copy.blit_ring_index;
3969         struct radeon_ring *ring = &rdev->ring[ring_index];
3970         u32 size_in_bytes, cur_size_in_bytes, control;
3971         int i, num_loops;
3972         int r = 0;
3973
3974         radeon_sync_create(&sync);
3975
3976         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3977         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3978         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3979         if (r) {
3980                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3981                 radeon_sync_free(rdev, &sync, NULL);
3982                 return ERR_PTR(r);
3983         }
3984
3985         radeon_sync_resv(rdev, &sync, resv, false);
3986         radeon_sync_rings(rdev, &sync, ring->idx);
3987
3988         for (i = 0; i < num_loops; i++) {
3989                 cur_size_in_bytes = size_in_bytes;
3990                 if (cur_size_in_bytes > 0x1fffff)
3991                         cur_size_in_bytes = 0x1fffff;
3992                 size_in_bytes -= cur_size_in_bytes;
3993                 control = 0;
3994                 if (size_in_bytes == 0)
3995                         control |= PACKET3_DMA_DATA_CP_SYNC;
3996                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3997                 radeon_ring_write(ring, control);
3998                 radeon_ring_write(ring, lower_32_bits(src_offset));
3999                 radeon_ring_write(ring, upper_32_bits(src_offset));
4000                 radeon_ring_write(ring, lower_32_bits(dst_offset));
4001                 radeon_ring_write(ring, upper_32_bits(dst_offset));
4002                 radeon_ring_write(ring, cur_size_in_bytes);
4003                 src_offset += cur_size_in_bytes;
4004                 dst_offset += cur_size_in_bytes;
4005         }
4006
4007         r = radeon_fence_emit(rdev, &fence, ring->idx);
4008         if (r) {
4009                 radeon_ring_unlock_undo(rdev, ring);
4010                 radeon_sync_free(rdev, &sync, NULL);
4011                 return ERR_PTR(r);
4012         }
4013
4014         radeon_ring_unlock_commit(rdev, ring, false);
4015         radeon_sync_free(rdev, &sync, fence);
4016
4017         return fence;
4018 }
4019
4020 /*
4021  * IB stuff
4022  */
4023 /**
4024  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4025  *
4026  * @rdev: radeon_device pointer
4027  * @ib: radeon indirect buffer object
4028  *
4029  * Emits an DE (drawing engine) or CE (constant engine) IB
4030  * on the gfx ring.  IBs are usually generated by userspace
4031  * acceleration drivers and submitted to the kernel for
4032  * sheduling on the ring.  This function schedules the IB
4033  * on the gfx ring for execution by the GPU.
4034  */
4035 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4036 {
4037         struct radeon_ring *ring = &rdev->ring[ib->ring];
4038         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4039         u32 header, control = INDIRECT_BUFFER_VALID;
4040
4041         if (ib->is_const_ib) {
4042                 /* set switch buffer packet before const IB */
4043                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4044                 radeon_ring_write(ring, 0);
4045
4046                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4047         } else {
4048                 u32 next_rptr;
4049                 if (ring->rptr_save_reg) {
4050                         next_rptr = ring->wptr + 3 + 4;
4051                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4052                         radeon_ring_write(ring, ((ring->rptr_save_reg -
4053                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
4054                         radeon_ring_write(ring, next_rptr);
4055                 } else if (rdev->wb.enabled) {
4056                         next_rptr = ring->wptr + 5 + 4;
4057                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4058                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4059                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4060                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4061                         radeon_ring_write(ring, next_rptr);
4062                 }
4063
4064                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4065         }
4066
4067         control |= ib->length_dw | (vm_id << 24);
4068
4069         radeon_ring_write(ring, header);
4070         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
4071         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4072         radeon_ring_write(ring, control);
4073 }
4074
4075 /**
4076  * cik_ib_test - basic gfx ring IB test
4077  *
4078  * @rdev: radeon_device pointer
4079  * @ring: radeon_ring structure holding ring information
4080  *
4081  * Allocate an IB and execute it on the gfx ring (CIK).
4082  * Provides a basic gfx ring test to verify that IBs are working.
4083  * Returns 0 on success, error on failure.
4084  */
4085 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4086 {
4087         struct radeon_ib ib;
4088         uint32_t scratch;
4089         uint32_t tmp = 0;
4090         unsigned i;
4091         int r;
4092
4093         r = radeon_scratch_get(rdev, &scratch);
4094         if (r) {
4095                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4096                 return r;
4097         }
4098         WREG32(scratch, 0xCAFEDEAD);
4099         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4100         if (r) {
4101                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4102                 radeon_scratch_free(rdev, scratch);
4103                 return r;
4104         }
4105         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4106         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4107         ib.ptr[2] = 0xDEADBEEF;
4108         ib.length_dw = 3;
4109         r = radeon_ib_schedule(rdev, &ib, NULL, false);
4110         if (r) {
4111                 radeon_scratch_free(rdev, scratch);
4112                 radeon_ib_free(rdev, &ib);
4113                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4114                 return r;
4115         }
4116         r = radeon_fence_wait(ib.fence, false);
4117         if (r) {
4118                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4119                 radeon_scratch_free(rdev, scratch);
4120                 radeon_ib_free(rdev, &ib);
4121                 return r;
4122         }
4123         for (i = 0; i < rdev->usec_timeout; i++) {
4124                 tmp = RREG32(scratch);
4125                 if (tmp == 0xDEADBEEF)
4126                         break;
4127                 DRM_UDELAY(1);
4128         }
4129         if (i < rdev->usec_timeout) {
4130                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4131         } else {
4132                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4133                           scratch, tmp);
4134                 r = -EINVAL;
4135         }
4136         radeon_scratch_free(rdev, scratch);
4137         radeon_ib_free(rdev, &ib);
4138         return r;
4139 }
4140
4141 /*
4142  * CP.
4143  * On CIK, gfx and compute now have independant command processors.
4144  *
4145  * GFX
4146  * Gfx consists of a single ring and can process both gfx jobs and
4147  * compute jobs.  The gfx CP consists of three microengines (ME):
4148  * PFP - Pre-Fetch Parser
4149  * ME - Micro Engine
4150  * CE - Constant Engine
4151  * The PFP and ME make up what is considered the Drawing Engine (DE).
4152  * The CE is an asynchronous engine used for updating buffer desciptors
4153  * used by the DE so that they can be loaded into cache in parallel
4154  * while the DE is processing state update packets.
4155  *
4156  * Compute
4157  * The compute CP consists of two microengines (ME):
4158  * MEC1 - Compute MicroEngine 1
4159  * MEC2 - Compute MicroEngine 2
4160  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4161  * The queues are exposed to userspace and are programmed directly
4162  * by the compute runtime.
4163  */
4164 /**
4165  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4166  *
4167  * @rdev: radeon_device pointer
4168  * @enable: enable or disable the MEs
4169  *
4170  * Halts or unhalts the gfx MEs.
4171  */
4172 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4173 {
4174         if (enable)
4175                 WREG32(CP_ME_CNTL, 0);
4176         else {
4177                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4178                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4179                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4180                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4181         }
4182         udelay(50);
4183 }
4184
4185 /**
4186  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4187  *
4188  * @rdev: radeon_device pointer
4189  *
4190  * Loads the gfx PFP, ME, and CE ucode.
4191  * Returns 0 for success, -EINVAL if the ucode is not available.
4192  */
4193 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4194 {
4195         int i;
4196
4197         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4198                 return -EINVAL;
4199
4200         cik_cp_gfx_enable(rdev, false);
4201
4202         if (rdev->new_fw) {
4203                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
4204                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4205                 const struct gfx_firmware_header_v1_0 *ce_hdr =
4206                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4207                 const struct gfx_firmware_header_v1_0 *me_hdr =
4208                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4209                 const __le32 *fw_data;
4210                 u32 fw_size;
4211
4212                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4213                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4214                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
4215
4216                 /* PFP */
4217                 fw_data = (const __le32 *)
4218                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4219                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4220                 WREG32(CP_PFP_UCODE_ADDR, 0);
4221                 for (i = 0; i < fw_size; i++)
4222                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4223                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4224
4225                 /* CE */
4226                 fw_data = (const __le32 *)
4227                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4228                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4229                 WREG32(CP_CE_UCODE_ADDR, 0);
4230                 for (i = 0; i < fw_size; i++)
4231                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4232                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4233
4234                 /* ME */
4235                 fw_data = (const __be32 *)
4236                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4237                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4238                 WREG32(CP_ME_RAM_WADDR, 0);
4239                 for (i = 0; i < fw_size; i++)
4240                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4241                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4242                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4243         } else {
4244                 const __be32 *fw_data;
4245
4246                 /* PFP */
4247                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4248                 WREG32(CP_PFP_UCODE_ADDR, 0);
4249                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4250                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4251                 WREG32(CP_PFP_UCODE_ADDR, 0);
4252
4253                 /* CE */
4254                 fw_data = (const __be32 *)rdev->ce_fw->data;
4255                 WREG32(CP_CE_UCODE_ADDR, 0);
4256                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4257                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4258                 WREG32(CP_CE_UCODE_ADDR, 0);
4259
4260                 /* ME */
4261                 fw_data = (const __be32 *)rdev->me_fw->data;
4262                 WREG32(CP_ME_RAM_WADDR, 0);
4263                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4264                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4265                 WREG32(CP_ME_RAM_WADDR, 0);
4266         }
4267
4268         return 0;
4269 }
4270
4271 /**
4272  * cik_cp_gfx_start - start the gfx ring
4273  *
4274  * @rdev: radeon_device pointer
4275  *
4276  * Enables the ring and loads the clear state context and other
4277  * packets required to init the ring.
4278  * Returns 0 for success, error for failure.
4279  */
4280 static int cik_cp_gfx_start(struct radeon_device *rdev)
4281 {
4282         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4283         int r, i;
4284
4285         /* init the CP */
4286         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4287         WREG32(CP_ENDIAN_SWAP, 0);
4288         WREG32(CP_DEVICE_ID, 1);
4289
4290         cik_cp_gfx_enable(rdev, true);
4291
4292         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4293         if (r) {
4294                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4295                 return r;
4296         }
4297
4298         /* init the CE partitions.  CE only used for gfx on CIK */
4299         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4300         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4301         radeon_ring_write(ring, 0x8000);
4302         radeon_ring_write(ring, 0x8000);
4303
4304         /* setup clear context state */
4305         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4306         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4307
4308         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4309         radeon_ring_write(ring, 0x80000000);
4310         radeon_ring_write(ring, 0x80000000);
4311
4312         for (i = 0; i < cik_default_size; i++)
4313                 radeon_ring_write(ring, cik_default_state[i]);
4314
4315         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4316         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4317
4318         /* set clear context state */
4319         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4320         radeon_ring_write(ring, 0);
4321
4322         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4323         radeon_ring_write(ring, 0x00000316);
4324         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4325         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4326
4327         radeon_ring_unlock_commit(rdev, ring, false);
4328
4329         return 0;
4330 }
4331
4332 /**
4333  * cik_cp_gfx_fini - stop the gfx ring
4334  *
4335  * @rdev: radeon_device pointer
4336  *
4337  * Stop the gfx ring and tear down the driver ring
4338  * info.
4339  */
4340 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4341 {
4342         cik_cp_gfx_enable(rdev, false);
4343         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4344 }
4345
4346 /**
4347  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4348  *
4349  * @rdev: radeon_device pointer
4350  *
4351  * Program the location and size of the gfx ring buffer
4352  * and test it to make sure it's working.
4353  * Returns 0 for success, error for failure.
4354  */
4355 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4356 {
4357         struct radeon_ring *ring;
4358         u32 tmp;
4359         u32 rb_bufsz;
4360         u64 rb_addr;
4361         int r;
4362
4363         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4364         if (rdev->family != CHIP_HAWAII)
4365                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4366
4367         /* Set the write pointer delay */
4368         WREG32(CP_RB_WPTR_DELAY, 0);
4369
4370         /* set the RB to use vmid 0 */
4371         WREG32(CP_RB_VMID, 0);
4372
4373         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4374
4375         /* ring 0 - compute and gfx */
4376         /* Set ring buffer size */
4377         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4378         rb_bufsz = order_base_2(ring->ring_size / 8);
4379         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4380 #ifdef __BIG_ENDIAN
4381         tmp |= BUF_SWAP_32BIT;
4382 #endif
4383         WREG32(CP_RB0_CNTL, tmp);
4384
4385         /* Initialize the ring buffer's read and write pointers */
4386         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4387         ring->wptr = 0;
4388         WREG32(CP_RB0_WPTR, ring->wptr);
4389
4390         /* set the wb address wether it's enabled or not */
4391         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4392         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4393
4394         /* scratch register shadowing is no longer supported */
4395         WREG32(SCRATCH_UMSK, 0);
4396
4397         if (!rdev->wb.enabled)
4398                 tmp |= RB_NO_UPDATE;
4399
4400         mdelay(1);
4401         WREG32(CP_RB0_CNTL, tmp);
4402
4403         rb_addr = ring->gpu_addr >> 8;
4404         WREG32(CP_RB0_BASE, rb_addr);
4405         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4406
4407         /* start the ring */
4408         cik_cp_gfx_start(rdev);
4409         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4410         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4411         if (r) {
4412                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4413                 return r;
4414         }
4415
4416         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4417                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4418
4419         return 0;
4420 }
4421
4422 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4423                      struct radeon_ring *ring)
4424 {
4425         u32 rptr;
4426
4427         if (rdev->wb.enabled)
4428                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4429         else
4430                 rptr = RREG32(CP_RB0_RPTR);
4431
4432         return rptr;
4433 }
4434
4435 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4436                      struct radeon_ring *ring)
4437 {
4438         u32 wptr;
4439
4440         wptr = RREG32(CP_RB0_WPTR);
4441
4442         return wptr;
4443 }
4444
4445 void cik_gfx_set_wptr(struct radeon_device *rdev,
4446                       struct radeon_ring *ring)
4447 {
4448         WREG32(CP_RB0_WPTR, ring->wptr);
4449         (void)RREG32(CP_RB0_WPTR);
4450 }
4451
4452 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4453                          struct radeon_ring *ring)
4454 {
4455         u32 rptr;
4456
4457         if (rdev->wb.enabled) {
4458                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4459         } else {
4460                 mutex_lock(&rdev->srbm_mutex);
4461                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4462                 rptr = RREG32(CP_HQD_PQ_RPTR);
4463                 cik_srbm_select(rdev, 0, 0, 0, 0);
4464                 mutex_unlock(&rdev->srbm_mutex);
4465         }
4466
4467         return rptr;
4468 }
4469
4470 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4471                          struct radeon_ring *ring)
4472 {
4473         u32 wptr;
4474
4475         if (rdev->wb.enabled) {
4476                 /* XXX check if swapping is necessary on BE */
4477                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4478         } else {
4479                 mutex_lock(&rdev->srbm_mutex);
4480                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4481                 wptr = RREG32(CP_HQD_PQ_WPTR);
4482                 cik_srbm_select(rdev, 0, 0, 0, 0);
4483                 mutex_unlock(&rdev->srbm_mutex);
4484         }
4485
4486         return wptr;
4487 }
4488
4489 void cik_compute_set_wptr(struct radeon_device *rdev,
4490                           struct radeon_ring *ring)
4491 {
4492         /* XXX check if swapping is necessary on BE */
4493         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4494         WDOORBELL32(ring->doorbell_index, ring->wptr);
4495 }
4496
4497 static void cik_compute_stop(struct radeon_device *rdev,
4498                              struct radeon_ring *ring)
4499 {
4500         u32 j, tmp;
4501
4502         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4503         /* Disable wptr polling. */
4504         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4505         tmp &= ~WPTR_POLL_EN;
4506         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4507         /* Disable HQD. */
4508         if (RREG32(CP_HQD_ACTIVE) & 1) {
4509                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4510                 for (j = 0; j < rdev->usec_timeout; j++) {
4511                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4512                                 break;
4513                         udelay(1);
4514                 }
4515                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4516                 WREG32(CP_HQD_PQ_RPTR, 0);
4517                 WREG32(CP_HQD_PQ_WPTR, 0);
4518         }
4519         cik_srbm_select(rdev, 0, 0, 0, 0);
4520 }
4521
4522 /**
4523  * cik_cp_compute_enable - enable/disable the compute CP MEs
4524  *
4525  * @rdev: radeon_device pointer
4526  * @enable: enable or disable the MEs
4527  *
4528  * Halts or unhalts the compute MEs.
4529  */
4530 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4531 {
4532         if (enable)
4533                 WREG32(CP_MEC_CNTL, 0);
4534         else {
4535                 /*
4536                  * To make hibernation reliable we need to clear compute ring
4537                  * configuration before halting the compute ring.
4538                  */
4539                 mutex_lock(&rdev->srbm_mutex);
4540                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4541                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4542                 mutex_unlock(&rdev->srbm_mutex);
4543
4544                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4545                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4546                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4547         }
4548         udelay(50);
4549 }
4550
4551 /**
4552  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4553  *
4554  * @rdev: radeon_device pointer
4555  *
4556  * Loads the compute MEC1&2 ucode.
4557  * Returns 0 for success, -EINVAL if the ucode is not available.
4558  */
4559 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4560 {
4561         int i;
4562
4563         if (!rdev->mec_fw)
4564                 return -EINVAL;
4565
4566         cik_cp_compute_enable(rdev, false);
4567
4568         if (rdev->new_fw) {
4569                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4570                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4571                 const __le32 *fw_data;
4572                 u32 fw_size;
4573
4574                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4575
4576                 /* MEC1 */
4577                 fw_data = (const __le32 *)
4578                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4579                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4580                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4581                 for (i = 0; i < fw_size; i++)
4582                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4583                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4584
4585                 /* MEC2 */
4586                 if (rdev->family == CHIP_KAVERI) {
4587                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4588                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4589
4590                         fw_data = (const __le32 *)
4591                                 (rdev->mec2_fw->data +
4592                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4593                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4594                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4595                         for (i = 0; i < fw_size; i++)
4596                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4597                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4598                 }
4599         } else {
4600                 const __be32 *fw_data;
4601
4602                 /* MEC1 */
4603                 fw_data = (const __be32 *)rdev->mec_fw->data;
4604                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4605                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4606                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4607                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4608
4609                 if (rdev->family == CHIP_KAVERI) {
4610                         /* MEC2 */
4611                         fw_data = (const __be32 *)rdev->mec_fw->data;
4612                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4613                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4614                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4615                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4616                 }
4617         }
4618
4619         return 0;
4620 }
4621
4622 /**
4623  * cik_cp_compute_start - start the compute queues
4624  *
4625  * @rdev: radeon_device pointer
4626  *
4627  * Enable the compute queues.
4628  * Returns 0 for success, error for failure.
4629  */
4630 static int cik_cp_compute_start(struct radeon_device *rdev)
4631 {
4632         cik_cp_compute_enable(rdev, true);
4633
4634         return 0;
4635 }
4636
4637 /**
4638  * cik_cp_compute_fini - stop the compute queues
4639  *
4640  * @rdev: radeon_device pointer
4641  *
4642  * Stop the compute queues and tear down the driver queue
4643  * info.
4644  */
4645 static void cik_cp_compute_fini(struct radeon_device *rdev)
4646 {
4647         int i, idx, r;
4648
4649         cik_cp_compute_enable(rdev, false);
4650
4651         for (i = 0; i < 2; i++) {
4652                 if (i == 0)
4653                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4654                 else
4655                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4656
4657                 if (rdev->ring[idx].mqd_obj) {
4658                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4659                         if (unlikely(r != 0))
4660                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4661
4662                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4663                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4664
4665                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4666                         rdev->ring[idx].mqd_obj = NULL;
4667                 }
4668         }
4669 }
4670
4671 static void cik_mec_fini(struct radeon_device *rdev)
4672 {
4673         int r;
4674
4675         if (rdev->mec.hpd_eop_obj) {
4676                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4677                 if (unlikely(r != 0))
4678                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4679                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4680                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4681
4682                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4683                 rdev->mec.hpd_eop_obj = NULL;
4684         }
4685 }
4686
4687 #define MEC_HPD_SIZE 2048
4688
4689 static int cik_mec_init(struct radeon_device *rdev)
4690 {
4691         int r;
4692         u32 *hpd;
4693
4694         /*
4695          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4696          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4697          * Nonetheless, we assign only 1 pipe because all other pipes will
4698          * be handled by KFD
4699          */
4700         rdev->mec.num_mec = 1;
4701         rdev->mec.num_pipe = 1;
4702         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4703
4704         if (rdev->mec.hpd_eop_obj == NULL) {
4705                 r = radeon_bo_create(rdev,
4706                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4707                                      PAGE_SIZE, true,
4708                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4709                                      &rdev->mec.hpd_eop_obj);
4710                 if (r) {
4711                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4712                         return r;
4713                 }
4714         }
4715
4716         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4717         if (unlikely(r != 0)) {
4718                 cik_mec_fini(rdev);
4719                 return r;
4720         }
4721         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4722                           &rdev->mec.hpd_eop_gpu_addr);
4723         if (r) {
4724                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4725                 cik_mec_fini(rdev);
4726                 return r;
4727         }
4728         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4729         if (r) {
4730                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4731                 cik_mec_fini(rdev);
4732                 return r;
4733         }
4734
4735         /* clear memory.  Not sure if this is required or not */
4736         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4737
4738         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4739         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4740
4741         return 0;
4742 }
4743
4744 struct hqd_registers
4745 {
4746         u32 cp_mqd_base_addr;
4747         u32 cp_mqd_base_addr_hi;
4748         u32 cp_hqd_active;
4749         u32 cp_hqd_vmid;
4750         u32 cp_hqd_persistent_state;
4751         u32 cp_hqd_pipe_priority;
4752         u32 cp_hqd_queue_priority;
4753         u32 cp_hqd_quantum;
4754         u32 cp_hqd_pq_base;
4755         u32 cp_hqd_pq_base_hi;
4756         u32 cp_hqd_pq_rptr;
4757         u32 cp_hqd_pq_rptr_report_addr;
4758         u32 cp_hqd_pq_rptr_report_addr_hi;
4759         u32 cp_hqd_pq_wptr_poll_addr;
4760         u32 cp_hqd_pq_wptr_poll_addr_hi;
4761         u32 cp_hqd_pq_doorbell_control;
4762         u32 cp_hqd_pq_wptr;
4763         u32 cp_hqd_pq_control;
4764         u32 cp_hqd_ib_base_addr;
4765         u32 cp_hqd_ib_base_addr_hi;
4766         u32 cp_hqd_ib_rptr;
4767         u32 cp_hqd_ib_control;
4768         u32 cp_hqd_iq_timer;
4769         u32 cp_hqd_iq_rptr;
4770         u32 cp_hqd_dequeue_request;
4771         u32 cp_hqd_dma_offload;
4772         u32 cp_hqd_sema_cmd;
4773         u32 cp_hqd_msg_type;
4774         u32 cp_hqd_atomic0_preop_lo;
4775         u32 cp_hqd_atomic0_preop_hi;
4776         u32 cp_hqd_atomic1_preop_lo;
4777         u32 cp_hqd_atomic1_preop_hi;
4778         u32 cp_hqd_hq_scheduler0;
4779         u32 cp_hqd_hq_scheduler1;
4780         u32 cp_mqd_control;
4781 };
4782
4783 struct bonaire_mqd
4784 {
4785         u32 header;
4786         u32 dispatch_initiator;
4787         u32 dimensions[3];
4788         u32 start_idx[3];
4789         u32 num_threads[3];
4790         u32 pipeline_stat_enable;
4791         u32 perf_counter_enable;
4792         u32 pgm[2];
4793         u32 tba[2];
4794         u32 tma[2];
4795         u32 pgm_rsrc[2];
4796         u32 vmid;
4797         u32 resource_limits;
4798         u32 static_thread_mgmt01[2];
4799         u32 tmp_ring_size;
4800         u32 static_thread_mgmt23[2];
4801         u32 restart[3];
4802         u32 thread_trace_enable;
4803         u32 reserved1;
4804         u32 user_data[16];
4805         u32 vgtcs_invoke_count[2];
4806         struct hqd_registers queue_state;
4807         u32 dequeue_cntr;
4808         u32 interrupt_queue[64];
4809 };
4810
4811 /**
4812  * cik_cp_compute_resume - setup the compute queue registers
4813  *
4814  * @rdev: radeon_device pointer
4815  *
4816  * Program the compute queues and test them to make sure they
4817  * are working.
4818  * Returns 0 for success, error for failure.
4819  */
4820 static int cik_cp_compute_resume(struct radeon_device *rdev)
4821 {
4822         int r, i, j, idx;
4823         u32 tmp;
4824         bool use_doorbell = true;
4825         u64 hqd_gpu_addr;
4826         u64 mqd_gpu_addr;
4827         u64 eop_gpu_addr;
4828         u64 wb_gpu_addr;
4829         u32 *buf;
4830         struct bonaire_mqd *mqd;
4831
4832         r = cik_cp_compute_start(rdev);
4833         if (r)
4834                 return r;
4835
4836         /* fix up chicken bits */
4837         tmp = RREG32(CP_CPF_DEBUG);
4838         tmp |= (1 << 23);
4839         WREG32(CP_CPF_DEBUG, tmp);
4840
4841         /* init the pipes */
4842         mutex_lock(&rdev->srbm_mutex);
4843
4844         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4845
4846         cik_srbm_select(rdev, 0, 0, 0, 0);
4847
4848         /* write the EOP addr */
4849         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4850         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4851
4852         /* set the VMID assigned */
4853         WREG32(CP_HPD_EOP_VMID, 0);
4854
4855         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4856         tmp = RREG32(CP_HPD_EOP_CONTROL);
4857         tmp &= ~EOP_SIZE_MASK;
4858         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4859         WREG32(CP_HPD_EOP_CONTROL, tmp);
4860
4861         mutex_unlock(&rdev->srbm_mutex);
4862
4863         /* init the queues.  Just two for now. */
4864         for (i = 0; i < 2; i++) {
4865                 if (i == 0)
4866                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4867                 else
4868                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4869
4870                 if (rdev->ring[idx].mqd_obj == NULL) {
4871                         r = radeon_bo_create(rdev,
4872                                              sizeof(struct bonaire_mqd),
4873                                              PAGE_SIZE, true,
4874                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4875                                              NULL, &rdev->ring[idx].mqd_obj);
4876                         if (r) {
4877                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4878                                 return r;
4879                         }
4880                 }
4881
4882                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4883                 if (unlikely(r != 0)) {
4884                         cik_cp_compute_fini(rdev);
4885                         return r;
4886                 }
4887                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4888                                   &mqd_gpu_addr);
4889                 if (r) {
4890                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4891                         cik_cp_compute_fini(rdev);
4892                         return r;
4893                 }
4894                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4895                 if (r) {
4896                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4897                         cik_cp_compute_fini(rdev);
4898                         return r;
4899                 }
4900
4901                 /* init the mqd struct */
4902                 memset(buf, 0, sizeof(struct bonaire_mqd));
4903
4904                 mqd = (struct bonaire_mqd *)buf;
4905                 mqd->header = 0xC0310800;
4906                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4907                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4908                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4909                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4910
4911                 mutex_lock(&rdev->srbm_mutex);
4912                 cik_srbm_select(rdev, rdev->ring[idx].me,
4913                                 rdev->ring[idx].pipe,
4914                                 rdev->ring[idx].queue, 0);
4915
4916                 /* disable wptr polling */
4917                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4918                 tmp &= ~WPTR_POLL_EN;
4919                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4920
4921                 /* enable doorbell? */
4922                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4923                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4924                 if (use_doorbell)
4925                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4926                 else
4927                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4928                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4929                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4930
4931                 /* disable the queue if it's active */
4932                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4933                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4934                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4935                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4936                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4937                         for (j = 0; j < rdev->usec_timeout; j++) {
4938                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4939                                         break;
4940                                 udelay(1);
4941                         }
4942                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4943                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4944                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4945                 }
4946
4947                 /* set the pointer to the MQD */
4948                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4949                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4950                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4951                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4952                 /* set MQD vmid to 0 */
4953                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4954                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4955                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4956
4957                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4958                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4959                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4960                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4961                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4962                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4963
4964                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4965                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4966                 mqd->queue_state.cp_hqd_pq_control &=
4967                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4968
4969                 mqd->queue_state.cp_hqd_pq_control |=
4970                         order_base_2(rdev->ring[idx].ring_size / 8);
4971                 mqd->queue_state.cp_hqd_pq_control |=
4972                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4973 #ifdef __BIG_ENDIAN
4974                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4975 #endif
4976                 mqd->queue_state.cp_hqd_pq_control &=
4977                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4978                 mqd->queue_state.cp_hqd_pq_control |=
4979                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4980                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4981
4982                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4983                 if (i == 0)
4984                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4985                 else
4986                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4987                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4988                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4989                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4990                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4991                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4992
4993                 /* set the wb address wether it's enabled or not */
4994                 if (i == 0)
4995                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4996                 else
4997                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4998                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4999                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5000                         upper_32_bits(wb_gpu_addr) & 0xffff;
5001                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5002                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5003                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5004                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5005
5006                 /* enable the doorbell if requested */
5007                 if (use_doorbell) {
5008                         mqd->queue_state.cp_hqd_pq_doorbell_control =
5009                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5010                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5011                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
5012                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5013                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5014                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
5015                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
5016
5017                 } else {
5018                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5019                 }
5020                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5021                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5022
5023                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5024                 rdev->ring[idx].wptr = 0;
5025                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5026                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5027                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5028
5029                 /* set the vmid for the queue */
5030                 mqd->queue_state.cp_hqd_vmid = 0;
5031                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5032
5033                 /* activate the queue */
5034                 mqd->queue_state.cp_hqd_active = 1;
5035                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5036
5037                 cik_srbm_select(rdev, 0, 0, 0, 0);
5038                 mutex_unlock(&rdev->srbm_mutex);
5039
5040                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5041                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5042
5043                 rdev->ring[idx].ready = true;
5044                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5045                 if (r)
5046                         rdev->ring[idx].ready = false;
5047         }
5048
5049         return 0;
5050 }
5051
5052 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5053 {
5054         cik_cp_gfx_enable(rdev, enable);
5055         cik_cp_compute_enable(rdev, enable);
5056 }
5057
5058 static int cik_cp_load_microcode(struct radeon_device *rdev)
5059 {
5060         int r;
5061
5062         r = cik_cp_gfx_load_microcode(rdev);
5063         if (r)
5064                 return r;
5065         r = cik_cp_compute_load_microcode(rdev);
5066         if (r)
5067                 return r;
5068
5069         return 0;
5070 }
5071
5072 static void cik_cp_fini(struct radeon_device *rdev)
5073 {
5074         cik_cp_gfx_fini(rdev);
5075         cik_cp_compute_fini(rdev);
5076 }
5077
5078 static int cik_cp_resume(struct radeon_device *rdev)
5079 {
5080         int r;
5081
5082         cik_enable_gui_idle_interrupt(rdev, false);
5083
5084         r = cik_cp_load_microcode(rdev);
5085         if (r)
5086                 return r;
5087
5088         r = cik_cp_gfx_resume(rdev);
5089         if (r)
5090                 return r;
5091         r = cik_cp_compute_resume(rdev);
5092         if (r)
5093                 return r;
5094
5095         cik_enable_gui_idle_interrupt(rdev, true);
5096
5097         return 0;
5098 }
5099
5100 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5101 {
5102         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5103                 RREG32(GRBM_STATUS));
5104         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5105                 RREG32(GRBM_STATUS2));
5106         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5107                 RREG32(GRBM_STATUS_SE0));
5108         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5109                 RREG32(GRBM_STATUS_SE1));
5110         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5111                 RREG32(GRBM_STATUS_SE2));
5112         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5113                 RREG32(GRBM_STATUS_SE3));
5114         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5115                 RREG32(SRBM_STATUS));
5116         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5117                 RREG32(SRBM_STATUS2));
5118         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5119                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5120         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5121                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5122         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5123         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5124                  RREG32(CP_STALLED_STAT1));
5125         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5126                  RREG32(CP_STALLED_STAT2));
5127         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5128                  RREG32(CP_STALLED_STAT3));
5129         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5130                  RREG32(CP_CPF_BUSY_STAT));
5131         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5132                  RREG32(CP_CPF_STALLED_STAT1));
5133         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5134         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5135         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5136                  RREG32(CP_CPC_STALLED_STAT1));
5137         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5138 }
5139
5140 /**
5141  * cik_gpu_check_soft_reset - check which blocks are busy
5142  *
5143  * @rdev: radeon_device pointer
5144  *
5145  * Check which blocks are busy and return the relevant reset
5146  * mask to be used by cik_gpu_soft_reset().
5147  * Returns a mask of the blocks to be reset.
5148  */
5149 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5150 {
5151         u32 reset_mask = 0;
5152         u32 tmp;
5153
5154         /* GRBM_STATUS */
5155         tmp = RREG32(GRBM_STATUS);
5156         if (tmp & (PA_BUSY | SC_BUSY |
5157                    BCI_BUSY | SX_BUSY |
5158                    TA_BUSY | VGT_BUSY |
5159                    DB_BUSY | CB_BUSY |
5160                    GDS_BUSY | SPI_BUSY |
5161                    IA_BUSY | IA_BUSY_NO_DMA))
5162                 reset_mask |= RADEON_RESET_GFX;
5163
5164         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5165                 reset_mask |= RADEON_RESET_CP;
5166
5167         /* GRBM_STATUS2 */
5168         tmp = RREG32(GRBM_STATUS2);
5169         if (tmp & RLC_BUSY)
5170                 reset_mask |= RADEON_RESET_RLC;
5171
5172         /* SDMA0_STATUS_REG */
5173         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5174         if (!(tmp & SDMA_IDLE))
5175                 reset_mask |= RADEON_RESET_DMA;
5176
5177         /* SDMA1_STATUS_REG */
5178         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5179         if (!(tmp & SDMA_IDLE))
5180                 reset_mask |= RADEON_RESET_DMA1;
5181
5182         /* SRBM_STATUS2 */
5183         tmp = RREG32(SRBM_STATUS2);
5184         if (tmp & SDMA_BUSY)
5185                 reset_mask |= RADEON_RESET_DMA;
5186
5187         if (tmp & SDMA1_BUSY)
5188                 reset_mask |= RADEON_RESET_DMA1;
5189
5190         /* SRBM_STATUS */
5191         tmp = RREG32(SRBM_STATUS);
5192
5193         if (tmp & IH_BUSY)
5194                 reset_mask |= RADEON_RESET_IH;
5195
5196         if (tmp & SEM_BUSY)
5197                 reset_mask |= RADEON_RESET_SEM;
5198
5199         if (tmp & GRBM_RQ_PENDING)
5200                 reset_mask |= RADEON_RESET_GRBM;
5201
5202         if (tmp & VMC_BUSY)
5203                 reset_mask |= RADEON_RESET_VMC;
5204
5205         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5206                    MCC_BUSY | MCD_BUSY))
5207                 reset_mask |= RADEON_RESET_MC;
5208
5209         if (evergreen_is_display_hung(rdev))
5210                 reset_mask |= RADEON_RESET_DISPLAY;
5211
5212         /* Skip MC reset as it's mostly likely not hung, just busy */
5213         if (reset_mask & RADEON_RESET_MC) {
5214                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5215                 reset_mask &= ~RADEON_RESET_MC;
5216         }
5217
5218         return reset_mask;
5219 }
5220
5221 /**
5222  * cik_gpu_soft_reset - soft reset GPU
5223  *
5224  * @rdev: radeon_device pointer
5225  * @reset_mask: mask of which blocks to reset
5226  *
5227  * Soft reset the blocks specified in @reset_mask.
5228  */
5229 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5230 {
5231         struct evergreen_mc_save save;
5232         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5233         u32 tmp;
5234
5235         if (reset_mask == 0)
5236                 return;
5237
5238         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5239
5240         cik_print_gpu_status_regs(rdev);
5241         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5242                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5243         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5244                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5245
5246         /* disable CG/PG */
5247         cik_fini_pg(rdev);
5248         cik_fini_cg(rdev);
5249
5250         /* stop the rlc */
5251         cik_rlc_stop(rdev);
5252
5253         /* Disable GFX parsing/prefetching */
5254         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5255
5256         /* Disable MEC parsing/prefetching */
5257         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5258
5259         if (reset_mask & RADEON_RESET_DMA) {
5260                 /* sdma0 */
5261                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5262                 tmp |= SDMA_HALT;
5263                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5264         }
5265         if (reset_mask & RADEON_RESET_DMA1) {
5266                 /* sdma1 */
5267                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5268                 tmp |= SDMA_HALT;
5269                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5270         }
5271
5272         evergreen_mc_stop(rdev, &save);
5273         if (evergreen_mc_wait_for_idle(rdev)) {
5274                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5275         }
5276
5277         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5278                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5279
5280         if (reset_mask & RADEON_RESET_CP) {
5281                 grbm_soft_reset |= SOFT_RESET_CP;
5282
5283                 srbm_soft_reset |= SOFT_RESET_GRBM;
5284         }
5285
5286         if (reset_mask & RADEON_RESET_DMA)
5287                 srbm_soft_reset |= SOFT_RESET_SDMA;
5288
5289         if (reset_mask & RADEON_RESET_DMA1)
5290                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5291
5292         if (reset_mask & RADEON_RESET_DISPLAY)
5293                 srbm_soft_reset |= SOFT_RESET_DC;
5294
5295         if (reset_mask & RADEON_RESET_RLC)
5296                 grbm_soft_reset |= SOFT_RESET_RLC;
5297
5298         if (reset_mask & RADEON_RESET_SEM)
5299                 srbm_soft_reset |= SOFT_RESET_SEM;
5300
5301         if (reset_mask & RADEON_RESET_IH)
5302                 srbm_soft_reset |= SOFT_RESET_IH;
5303
5304         if (reset_mask & RADEON_RESET_GRBM)
5305                 srbm_soft_reset |= SOFT_RESET_GRBM;
5306
5307         if (reset_mask & RADEON_RESET_VMC)
5308                 srbm_soft_reset |= SOFT_RESET_VMC;
5309
5310         if (!(rdev->flags & RADEON_IS_IGP)) {
5311                 if (reset_mask & RADEON_RESET_MC)
5312                         srbm_soft_reset |= SOFT_RESET_MC;
5313         }
5314
5315         if (grbm_soft_reset) {
5316                 tmp = RREG32(GRBM_SOFT_RESET);
5317                 tmp |= grbm_soft_reset;
5318                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5319                 WREG32(GRBM_SOFT_RESET, tmp);
5320                 tmp = RREG32(GRBM_SOFT_RESET);
5321
5322                 udelay(50);
5323
5324                 tmp &= ~grbm_soft_reset;
5325                 WREG32(GRBM_SOFT_RESET, tmp);
5326                 tmp = RREG32(GRBM_SOFT_RESET);
5327         }
5328
5329         if (srbm_soft_reset) {
5330                 tmp = RREG32(SRBM_SOFT_RESET);
5331                 tmp |= srbm_soft_reset;
5332                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5333                 WREG32(SRBM_SOFT_RESET, tmp);
5334                 tmp = RREG32(SRBM_SOFT_RESET);
5335
5336                 udelay(50);
5337
5338                 tmp &= ~srbm_soft_reset;
5339                 WREG32(SRBM_SOFT_RESET, tmp);
5340                 tmp = RREG32(SRBM_SOFT_RESET);
5341         }
5342
5343         /* Wait a little for things to settle down */
5344         udelay(50);
5345
5346         evergreen_mc_resume(rdev, &save);
5347         udelay(50);
5348
5349         cik_print_gpu_status_regs(rdev);
5350 }
5351
5352 struct kv_reset_save_regs {
5353         u32 gmcon_reng_execute;
5354         u32 gmcon_misc;
5355         u32 gmcon_misc3;
5356 };
5357
5358 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5359                                    struct kv_reset_save_regs *save)
5360 {
5361         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5362         save->gmcon_misc = RREG32(GMCON_MISC);
5363         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5364
5365         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5366         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5367                                                 STCTRL_STUTTER_EN));
5368 }
5369
5370 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5371                                       struct kv_reset_save_regs *save)
5372 {
5373         int i;
5374
5375         WREG32(GMCON_PGFSM_WRITE, 0);
5376         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5377
5378         for (i = 0; i < 5; i++)
5379                 WREG32(GMCON_PGFSM_WRITE, 0);
5380
5381         WREG32(GMCON_PGFSM_WRITE, 0);
5382         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5383
5384         for (i = 0; i < 5; i++)
5385                 WREG32(GMCON_PGFSM_WRITE, 0);
5386
5387         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5388         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5389
5390         for (i = 0; i < 5; i++)
5391                 WREG32(GMCON_PGFSM_WRITE, 0);
5392
5393         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5394         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5395
5396         for (i = 0; i < 5; i++)
5397                 WREG32(GMCON_PGFSM_WRITE, 0);
5398
5399         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5400         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5401
5402         for (i = 0; i < 5; i++)
5403                 WREG32(GMCON_PGFSM_WRITE, 0);
5404
5405         WREG32(GMCON_PGFSM_WRITE, 0);
5406         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5407
5408         for (i = 0; i < 5; i++)
5409                 WREG32(GMCON_PGFSM_WRITE, 0);
5410
5411         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5412         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5413
5414         for (i = 0; i < 5; i++)
5415                 WREG32(GMCON_PGFSM_WRITE, 0);
5416
5417         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5418         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5419
5420         for (i = 0; i < 5; i++)
5421                 WREG32(GMCON_PGFSM_WRITE, 0);
5422
5423         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5424         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5425
5426         for (i = 0; i < 5; i++)
5427                 WREG32(GMCON_PGFSM_WRITE, 0);
5428
5429         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5430         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5431
5432         for (i = 0; i < 5; i++)
5433                 WREG32(GMCON_PGFSM_WRITE, 0);
5434
5435         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5436         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5437
5438         WREG32(GMCON_MISC3, save->gmcon_misc3);
5439         WREG32(GMCON_MISC, save->gmcon_misc);
5440         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5441 }
5442
5443 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5444 {
5445         struct evergreen_mc_save save;
5446         struct kv_reset_save_regs kv_save = { 0 };
5447         u32 tmp, i;
5448
5449         dev_info(rdev->dev, "GPU pci config reset\n");
5450
5451         /* disable dpm? */
5452
5453         /* disable cg/pg */
5454         cik_fini_pg(rdev);
5455         cik_fini_cg(rdev);
5456
5457         /* Disable GFX parsing/prefetching */
5458         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5459
5460         /* Disable MEC parsing/prefetching */
5461         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5462
5463         /* sdma0 */
5464         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5465         tmp |= SDMA_HALT;
5466         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5467         /* sdma1 */
5468         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5469         tmp |= SDMA_HALT;
5470         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5471         /* XXX other engines? */
5472
5473         /* halt the rlc, disable cp internal ints */
5474         cik_rlc_stop(rdev);
5475
5476         udelay(50);
5477
5478         /* disable mem access */
5479         evergreen_mc_stop(rdev, &save);
5480         if (evergreen_mc_wait_for_idle(rdev)) {
5481                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5482         }
5483
5484         if (rdev->flags & RADEON_IS_IGP)
5485                 kv_save_regs_for_reset(rdev, &kv_save);
5486
5487         /* disable BM */
5488         pci_clear_master(rdev->pdev);
5489         /* reset */
5490         radeon_pci_config_reset(rdev);
5491
5492         udelay(100);
5493
5494         /* wait for asic to come out of reset */
5495         for (i = 0; i < rdev->usec_timeout; i++) {
5496                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5497                         break;
5498                 udelay(1);
5499         }
5500
5501         /* does asic init need to be run first??? */
5502         if (rdev->flags & RADEON_IS_IGP)
5503                 kv_restore_regs_for_reset(rdev, &kv_save);
5504 }
5505
5506 /**
5507  * cik_asic_reset - soft reset GPU
5508  *
5509  * @rdev: radeon_device pointer
5510  *
5511  * Look up which blocks are hung and attempt
5512  * to reset them.
5513  * Returns 0 for success.
5514  */
5515 int cik_asic_reset(struct radeon_device *rdev)
5516 {
5517         u32 reset_mask;
5518
5519         reset_mask = cik_gpu_check_soft_reset(rdev);
5520
5521         if (reset_mask)
5522                 r600_set_bios_scratch_engine_hung(rdev, true);
5523
5524         /* try soft reset */
5525         cik_gpu_soft_reset(rdev, reset_mask);
5526
5527         reset_mask = cik_gpu_check_soft_reset(rdev);
5528
5529         /* try pci config reset */
5530         if (reset_mask && radeon_hard_reset)
5531                 cik_gpu_pci_config_reset(rdev);
5532
5533         reset_mask = cik_gpu_check_soft_reset(rdev);
5534
5535         if (!reset_mask)
5536                 r600_set_bios_scratch_engine_hung(rdev, false);
5537
5538         return 0;
5539 }
5540
5541 /**
5542  * cik_gfx_is_lockup - check if the 3D engine is locked up
5543  *
5544  * @rdev: radeon_device pointer
5545  * @ring: radeon_ring structure holding ring information
5546  *
5547  * Check if the 3D engine is locked up (CIK).
5548  * Returns true if the engine is locked, false if not.
5549  */
5550 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5551 {
5552         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5553
5554         if (!(reset_mask & (RADEON_RESET_GFX |
5555                             RADEON_RESET_COMPUTE |
5556                             RADEON_RESET_CP))) {
5557                 radeon_ring_lockup_update(rdev, ring);
5558                 return false;
5559         }
5560         return radeon_ring_test_lockup(rdev, ring);
5561 }
5562
5563 /* MC */
5564 /**
5565  * cik_mc_program - program the GPU memory controller
5566  *
5567  * @rdev: radeon_device pointer
5568  *
5569  * Set the location of vram, gart, and AGP in the GPU's
5570  * physical address space (CIK).
5571  */
5572 static void cik_mc_program(struct radeon_device *rdev)
5573 {
5574         struct evergreen_mc_save save;
5575         u32 tmp;
5576         int i, j;
5577
5578         /* Initialize HDP */
5579         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5580                 WREG32((0x2c14 + j), 0x00000000);
5581                 WREG32((0x2c18 + j), 0x00000000);
5582                 WREG32((0x2c1c + j), 0x00000000);
5583                 WREG32((0x2c20 + j), 0x00000000);
5584                 WREG32((0x2c24 + j), 0x00000000);
5585         }
5586         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5587
5588         evergreen_mc_stop(rdev, &save);
5589         if (radeon_mc_wait_for_idle(rdev)) {
5590                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5591         }
5592         /* Lockout access through VGA aperture*/
5593         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5594         /* Update configuration */
5595         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5596                rdev->mc.vram_start >> 12);
5597         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5598                rdev->mc.vram_end >> 12);
5599         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5600                rdev->vram_scratch.gpu_addr >> 12);
5601         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5602         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5603         WREG32(MC_VM_FB_LOCATION, tmp);
5604         /* XXX double check these! */
5605         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5606         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5607         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5608         WREG32(MC_VM_AGP_BASE, 0);
5609         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5610         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5611         if (radeon_mc_wait_for_idle(rdev)) {
5612                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5613         }
5614         evergreen_mc_resume(rdev, &save);
5615         /* we need to own VRAM, so turn off the VGA renderer here
5616          * to stop it overwriting our objects */
5617         rv515_vga_render_disable(rdev);
5618 }
5619
5620 /**
5621  * cik_mc_init - initialize the memory controller driver params
5622  *
5623  * @rdev: radeon_device pointer
5624  *
5625  * Look up the amount of vram, vram width, and decide how to place
5626  * vram and gart within the GPU's physical address space (CIK).
5627  * Returns 0 for success.
5628  */
5629 static int cik_mc_init(struct radeon_device *rdev)
5630 {
5631         u32 tmp;
5632         int chansize, numchan;
5633
5634         /* Get VRAM informations */
5635         rdev->mc.vram_is_ddr = true;
5636         tmp = RREG32(MC_ARB_RAMCFG);
5637         if (tmp & CHANSIZE_MASK) {
5638                 chansize = 64;
5639         } else {
5640                 chansize = 32;
5641         }
5642         tmp = RREG32(MC_SHARED_CHMAP);
5643         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5644         case 0:
5645         default:
5646                 numchan = 1;
5647                 break;
5648         case 1:
5649                 numchan = 2;
5650                 break;
5651         case 2:
5652                 numchan = 4;
5653                 break;
5654         case 3:
5655                 numchan = 8;
5656                 break;
5657         case 4:
5658                 numchan = 3;
5659                 break;
5660         case 5:
5661                 numchan = 6;
5662                 break;
5663         case 6:
5664                 numchan = 10;
5665                 break;
5666         case 7:
5667                 numchan = 12;
5668                 break;
5669         case 8:
5670                 numchan = 16;
5671                 break;
5672         }
5673         rdev->mc.vram_width = numchan * chansize;
5674         /* Could aper size report 0 ? */
5675         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5676         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5677         /* size in MB on si */
5678         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5679         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5680         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5681         si_vram_gtt_location(rdev, &rdev->mc);
5682         radeon_update_bandwidth_info(rdev);
5683
5684         return 0;
5685 }
5686
5687 /*
5688  * GART
5689  * VMID 0 is the physical GPU addresses as used by the kernel.
5690  * VMIDs 1-15 are used for userspace clients and are handled
5691  * by the radeon vm/hsa code.
5692  */
5693 /**
5694  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5695  *
5696  * @rdev: radeon_device pointer
5697  *
5698  * Flush the TLB for the VMID 0 page table (CIK).
5699  */
5700 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5701 {
5702         /* flush hdp cache */
5703         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5704
5705         /* bits 0-15 are the VM contexts0-15 */
5706         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5707 }
5708
5709 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5710 {
5711         int i;
5712         uint32_t sh_mem_bases, sh_mem_config;
5713
5714         sh_mem_bases = 0x6000 | 0x6000 << 16;
5715         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5716         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5717
5718         mutex_lock(&rdev->srbm_mutex);
5719         for (i = 8; i < 16; i++) {
5720                 cik_srbm_select(rdev, 0, 0, 0, i);
5721                 /* CP and shaders */
5722                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5723                 WREG32(SH_MEM_APE1_BASE, 1);
5724                 WREG32(SH_MEM_APE1_LIMIT, 0);
5725                 WREG32(SH_MEM_BASES, sh_mem_bases);
5726         }
5727         cik_srbm_select(rdev, 0, 0, 0, 0);
5728         mutex_unlock(&rdev->srbm_mutex);
5729 }
5730
5731 /**
5732  * cik_pcie_gart_enable - gart enable
5733  *
5734  * @rdev: radeon_device pointer
5735  *
5736  * This sets up the TLBs, programs the page tables for VMID0,
5737  * sets up the hw for VMIDs 1-15 which are allocated on
5738  * demand, and sets up the global locations for the LDS, GDS,
5739  * and GPUVM for FSA64 clients (CIK).
5740  * Returns 0 for success, errors for failure.
5741  */
5742 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5743 {
5744         int r, i;
5745
5746         if (rdev->gart.robj == NULL) {
5747                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5748                 return -EINVAL;
5749         }
5750         r = radeon_gart_table_vram_pin(rdev);
5751         if (r)
5752                 return r;
5753         /* Setup TLB control */
5754         WREG32(MC_VM_MX_L1_TLB_CNTL,
5755                (0xA << 7) |
5756                ENABLE_L1_TLB |
5757                ENABLE_L1_FRAGMENT_PROCESSING |
5758                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5759                ENABLE_ADVANCED_DRIVER_MODEL |
5760                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5761         /* Setup L2 cache */
5762         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5763                ENABLE_L2_FRAGMENT_PROCESSING |
5764                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5765                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5766                EFFECTIVE_L2_QUEUE_SIZE(7) |
5767                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5768         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5769         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5770                BANK_SELECT(4) |
5771                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5772         /* setup context0 */
5773         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5774         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5775         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5776         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5777                         (u32)(rdev->dummy_page.addr >> 12));
5778         WREG32(VM_CONTEXT0_CNTL2, 0);
5779         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5780                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5781
5782         WREG32(0x15D4, 0);
5783         WREG32(0x15D8, 0);
5784         WREG32(0x15DC, 0);
5785
5786         /* restore context1-15 */
5787         /* set vm size, must be a multiple of 4 */
5788         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5789         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5790         for (i = 1; i < 16; i++) {
5791                 if (i < 8)
5792                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5793                                rdev->vm_manager.saved_table_addr[i]);
5794                 else
5795                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5796                                rdev->vm_manager.saved_table_addr[i]);
5797         }
5798
5799         /* enable context1-15 */
5800         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5801                (u32)(rdev->dummy_page.addr >> 12));
5802         WREG32(VM_CONTEXT1_CNTL2, 4);
5803         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5804                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5805                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5806                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5807                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5808                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5809                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5810                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5811                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5812                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5813                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5814                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5815                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5816                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5817
5818         if (rdev->family == CHIP_KAVERI) {
5819                 u32 tmp = RREG32(CHUB_CONTROL);
5820                 tmp &= ~BYPASS_VM;
5821                 WREG32(CHUB_CONTROL, tmp);
5822         }
5823
5824         /* XXX SH_MEM regs */
5825         /* where to put LDS, scratch, GPUVM in FSA64 space */
5826         mutex_lock(&rdev->srbm_mutex);
5827         for (i = 0; i < 16; i++) {
5828                 cik_srbm_select(rdev, 0, 0, 0, i);
5829                 /* CP and shaders */
5830                 WREG32(SH_MEM_CONFIG, 0);
5831                 WREG32(SH_MEM_APE1_BASE, 1);
5832                 WREG32(SH_MEM_APE1_LIMIT, 0);
5833                 WREG32(SH_MEM_BASES, 0);
5834                 /* SDMA GFX */
5835                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5836                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5837                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5838                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5839                 /* XXX SDMA RLC - todo */
5840         }
5841         cik_srbm_select(rdev, 0, 0, 0, 0);
5842         mutex_unlock(&rdev->srbm_mutex);
5843
5844         cik_pcie_init_compute_vmid(rdev);
5845
5846         cik_pcie_gart_tlb_flush(rdev);
5847         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5848                  (unsigned)(rdev->mc.gtt_size >> 20),
5849                  (unsigned long long)rdev->gart.table_addr);
5850         rdev->gart.ready = true;
5851         return 0;
5852 }
5853
5854 /**
5855  * cik_pcie_gart_disable - gart disable
5856  *
5857  * @rdev: radeon_device pointer
5858  *
5859  * This disables all VM page table (CIK).
5860  */
5861 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5862 {
5863         unsigned i;
5864
5865         for (i = 1; i < 16; ++i) {
5866                 uint32_t reg;
5867                 if (i < 8)
5868                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5869                 else
5870                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5871                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5872         }
5873
5874         /* Disable all tables */
5875         WREG32(VM_CONTEXT0_CNTL, 0);
5876         WREG32(VM_CONTEXT1_CNTL, 0);
5877         /* Setup TLB control */
5878         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5879                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5880         /* Setup L2 cache */
5881         WREG32(VM_L2_CNTL,
5882                ENABLE_L2_FRAGMENT_PROCESSING |
5883                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5884                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5885                EFFECTIVE_L2_QUEUE_SIZE(7) |
5886                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5887         WREG32(VM_L2_CNTL2, 0);
5888         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5889                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5890         radeon_gart_table_vram_unpin(rdev);
5891 }
5892
5893 /**
5894  * cik_pcie_gart_fini - vm fini callback
5895  *
5896  * @rdev: radeon_device pointer
5897  *
5898  * Tears down the driver GART/VM setup (CIK).
5899  */
5900 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5901 {
5902         cik_pcie_gart_disable(rdev);
5903         radeon_gart_table_vram_free(rdev);
5904         radeon_gart_fini(rdev);
5905 }
5906
5907 /* vm parser */
5908 /**
5909  * cik_ib_parse - vm ib_parse callback
5910  *
5911  * @rdev: radeon_device pointer
5912  * @ib: indirect buffer pointer
5913  *
5914  * CIK uses hw IB checking so this is a nop (CIK).
5915  */
5916 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5917 {
5918         return 0;
5919 }
5920
5921 /*
5922  * vm
5923  * VMID 0 is the physical GPU addresses as used by the kernel.
5924  * VMIDs 1-15 are used for userspace clients and are handled
5925  * by the radeon vm/hsa code.
5926  */
5927 /**
5928  * cik_vm_init - cik vm init callback
5929  *
5930  * @rdev: radeon_device pointer
5931  *
5932  * Inits cik specific vm parameters (number of VMs, base of vram for
5933  * VMIDs 1-15) (CIK).
5934  * Returns 0 for success.
5935  */
5936 int cik_vm_init(struct radeon_device *rdev)
5937 {
5938         /*
5939          * number of VMs
5940          * VMID 0 is reserved for System
5941          * radeon graphics/compute will use VMIDs 1-7
5942          * amdkfd will use VMIDs 8-15
5943          */
5944         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5945         /* base offset of vram pages */
5946         if (rdev->flags & RADEON_IS_IGP) {
5947                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5948                 tmp <<= 22;
5949                 rdev->vm_manager.vram_base_offset = tmp;
5950         } else
5951                 rdev->vm_manager.vram_base_offset = 0;
5952
5953         return 0;
5954 }
5955
5956 /**
5957  * cik_vm_fini - cik vm fini callback
5958  *
5959  * @rdev: radeon_device pointer
5960  *
5961  * Tear down any asic specific VM setup (CIK).
5962  */
5963 void cik_vm_fini(struct radeon_device *rdev)
5964 {
5965 }
5966
5967 /**
5968  * cik_vm_decode_fault - print human readable fault info
5969  *
5970  * @rdev: radeon_device pointer
5971  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5972  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5973  *
5974  * Print human readable fault information (CIK).
5975  */
5976 static void cik_vm_decode_fault(struct radeon_device *rdev,
5977                                 u32 status, u32 addr, u32 mc_client)
5978 {
5979         u32 mc_id;
5980         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5981         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5982         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5983                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5984
5985         if (rdev->family == CHIP_HAWAII)
5986                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5987         else
5988                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5989
5990         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5991                protections, vmid, addr,
5992                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5993                block, mc_client, mc_id);
5994 }
5995
5996 /**
5997  * cik_vm_flush - cik vm flush using the CP
5998  *
5999  * @rdev: radeon_device pointer
6000  *
6001  * Update the page table base and flush the VM TLB
6002  * using the CP (CIK).
6003  */
6004 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6005                   unsigned vm_id, uint64_t pd_addr)
6006 {
6007         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6008
6009         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6010         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6011                                  WRITE_DATA_DST_SEL(0)));
6012         if (vm_id < 8) {
6013                 radeon_ring_write(ring,
6014                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6015         } else {
6016                 radeon_ring_write(ring,
6017                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6018         }
6019         radeon_ring_write(ring, 0);
6020         radeon_ring_write(ring, pd_addr >> 12);
6021
6022         /* update SH_MEM_* regs */
6023         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6024         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6025                                  WRITE_DATA_DST_SEL(0)));
6026         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6027         radeon_ring_write(ring, 0);
6028         radeon_ring_write(ring, VMID(vm_id));
6029
6030         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6031         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6032                                  WRITE_DATA_DST_SEL(0)));
6033         radeon_ring_write(ring, SH_MEM_BASES >> 2);
6034         radeon_ring_write(ring, 0);
6035
6036         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6037         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6038         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6039         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6040
6041         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6042         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6043                                  WRITE_DATA_DST_SEL(0)));
6044         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6045         radeon_ring_write(ring, 0);
6046         radeon_ring_write(ring, VMID(0));
6047
6048         /* HDP flush */
6049         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6050
6051         /* bits 0-15 are the VM contexts0-15 */
6052         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6053         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6054                                  WRITE_DATA_DST_SEL(0)));
6055         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6056         radeon_ring_write(ring, 0);
6057         radeon_ring_write(ring, 1 << vm_id);
6058
6059         /* wait for the invalidate to complete */
6060         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6061         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6062                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6063                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6064         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6065         radeon_ring_write(ring, 0);
6066         radeon_ring_write(ring, 0); /* ref */
6067         radeon_ring_write(ring, 0); /* mask */
6068         radeon_ring_write(ring, 0x20); /* poll interval */
6069
6070         /* compute doesn't have PFP */
6071         if (usepfp) {
6072                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6073                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6074                 radeon_ring_write(ring, 0x0);
6075         }
6076 }
6077
6078 /*
6079  * RLC
6080  * The RLC is a multi-purpose microengine that handles a
6081  * variety of functions, the most important of which is
6082  * the interrupt controller.
6083  */
6084 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6085                                           bool enable)
6086 {
6087         u32 tmp = RREG32(CP_INT_CNTL_RING0);
6088
6089         if (enable)
6090                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6091         else
6092                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6093         WREG32(CP_INT_CNTL_RING0, tmp);
6094 }
6095
6096 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6097 {
6098         u32 tmp;
6099
6100         tmp = RREG32(RLC_LB_CNTL);
6101         if (enable)
6102                 tmp |= LOAD_BALANCE_ENABLE;
6103         else
6104                 tmp &= ~LOAD_BALANCE_ENABLE;
6105         WREG32(RLC_LB_CNTL, tmp);
6106 }
6107
6108 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6109 {
6110         u32 i, j, k;
6111         u32 mask;
6112
6113         mutex_lock(&rdev->grbm_idx_mutex);
6114         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6115                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6116                         cik_select_se_sh(rdev, i, j);
6117                         for (k = 0; k < rdev->usec_timeout; k++) {
6118                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6119                                         break;
6120                                 udelay(1);
6121                         }
6122                 }
6123         }
6124         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6125         mutex_unlock(&rdev->grbm_idx_mutex);
6126
6127         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6128         for (k = 0; k < rdev->usec_timeout; k++) {
6129                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6130                         break;
6131                 udelay(1);
6132         }
6133 }
6134
6135 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6136 {
6137         u32 tmp;
6138
6139         tmp = RREG32(RLC_CNTL);
6140         if (tmp != rlc)
6141                 WREG32(RLC_CNTL, rlc);
6142 }
6143
6144 static u32 cik_halt_rlc(struct radeon_device *rdev)
6145 {
6146         u32 data, orig;
6147
6148         orig = data = RREG32(RLC_CNTL);
6149
6150         if (data & RLC_ENABLE) {
6151                 u32 i;
6152
6153                 data &= ~RLC_ENABLE;
6154                 WREG32(RLC_CNTL, data);
6155
6156                 for (i = 0; i < rdev->usec_timeout; i++) {
6157                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6158                                 break;
6159                         udelay(1);
6160                 }
6161
6162                 cik_wait_for_rlc_serdes(rdev);
6163         }
6164
6165         return orig;
6166 }
6167
6168 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6169 {
6170         u32 tmp, i, mask;
6171
6172         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6173         WREG32(RLC_GPR_REG2, tmp);
6174
6175         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6176         for (i = 0; i < rdev->usec_timeout; i++) {
6177                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6178                         break;
6179                 udelay(1);
6180         }
6181
6182         for (i = 0; i < rdev->usec_timeout; i++) {
6183                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6184                         break;
6185                 udelay(1);
6186         }
6187 }
6188
6189 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6190 {
6191         u32 tmp;
6192
6193         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6194         WREG32(RLC_GPR_REG2, tmp);
6195 }
6196
6197 /**
6198  * cik_rlc_stop - stop the RLC ME
6199  *
6200  * @rdev: radeon_device pointer
6201  *
6202  * Halt the RLC ME (MicroEngine) (CIK).
6203  */
6204 static void cik_rlc_stop(struct radeon_device *rdev)
6205 {
6206         WREG32(RLC_CNTL, 0);
6207
6208         cik_enable_gui_idle_interrupt(rdev, false);
6209
6210         cik_wait_for_rlc_serdes(rdev);
6211 }
6212
6213 /**
6214  * cik_rlc_start - start the RLC ME
6215  *
6216  * @rdev: radeon_device pointer
6217  *
6218  * Unhalt the RLC ME (MicroEngine) (CIK).
6219  */
6220 static void cik_rlc_start(struct radeon_device *rdev)
6221 {
6222         WREG32(RLC_CNTL, RLC_ENABLE);
6223
6224         cik_enable_gui_idle_interrupt(rdev, true);
6225
6226         udelay(50);
6227 }
6228
6229 /**
6230  * cik_rlc_resume - setup the RLC hw
6231  *
6232  * @rdev: radeon_device pointer
6233  *
6234  * Initialize the RLC registers, load the ucode,
6235  * and start the RLC (CIK).
6236  * Returns 0 for success, -EINVAL if the ucode is not available.
6237  */
6238 static int cik_rlc_resume(struct radeon_device *rdev)
6239 {
6240         u32 i, size, tmp;
6241
6242         if (!rdev->rlc_fw)
6243                 return -EINVAL;
6244
6245         cik_rlc_stop(rdev);
6246
6247         /* disable CG */
6248         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6249         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6250
6251         si_rlc_reset(rdev);
6252
6253         cik_init_pg(rdev);
6254
6255         cik_init_cg(rdev);
6256
6257         WREG32(RLC_LB_CNTR_INIT, 0);
6258         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6259
6260         mutex_lock(&rdev->grbm_idx_mutex);
6261         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6262         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6263         WREG32(RLC_LB_PARAMS, 0x00600408);
6264         WREG32(RLC_LB_CNTL, 0x80000004);
6265         mutex_unlock(&rdev->grbm_idx_mutex);
6266
6267         WREG32(RLC_MC_CNTL, 0);
6268         WREG32(RLC_UCODE_CNTL, 0);
6269
6270         if (rdev->new_fw) {
6271                 const struct rlc_firmware_header_v1_0 *hdr =
6272                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6273                 const __le32 *fw_data = (const __le32 *)
6274                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6275
6276                 radeon_ucode_print_rlc_hdr(&hdr->header);
6277
6278                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6279                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6280                 for (i = 0; i < size; i++)
6281                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6282                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6283         } else {
6284                 const __be32 *fw_data;
6285
6286                 switch (rdev->family) {
6287                 case CHIP_BONAIRE:
6288                 case CHIP_HAWAII:
6289                 default:
6290                         size = BONAIRE_RLC_UCODE_SIZE;
6291                         break;
6292                 case CHIP_KAVERI:
6293                         size = KV_RLC_UCODE_SIZE;
6294                         break;
6295                 case CHIP_KABINI:
6296                         size = KB_RLC_UCODE_SIZE;
6297                         break;
6298                 case CHIP_MULLINS:
6299                         size = ML_RLC_UCODE_SIZE;
6300                         break;
6301                 }
6302
6303                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6304                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6305                 for (i = 0; i < size; i++)
6306                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6307                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6308         }
6309
6310         /* XXX - find out what chips support lbpw */
6311         cik_enable_lbpw(rdev, false);
6312
6313         if (rdev->family == CHIP_BONAIRE)
6314                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6315
6316         cik_rlc_start(rdev);
6317
6318         return 0;
6319 }
6320
6321 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6322 {
6323         u32 data, orig, tmp, tmp2;
6324
6325         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6326
6327         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6328                 cik_enable_gui_idle_interrupt(rdev, true);
6329
6330                 tmp = cik_halt_rlc(rdev);
6331
6332                 mutex_lock(&rdev->grbm_idx_mutex);
6333                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6334                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6335                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6336                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6337                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6338                 mutex_unlock(&rdev->grbm_idx_mutex);
6339
6340                 cik_update_rlc(rdev, tmp);
6341
6342                 data |= CGCG_EN | CGLS_EN;
6343         } else {
6344                 cik_enable_gui_idle_interrupt(rdev, false);
6345
6346                 RREG32(CB_CGTT_SCLK_CTRL);
6347                 RREG32(CB_CGTT_SCLK_CTRL);
6348                 RREG32(CB_CGTT_SCLK_CTRL);
6349                 RREG32(CB_CGTT_SCLK_CTRL);
6350
6351                 data &= ~(CGCG_EN | CGLS_EN);
6352         }
6353
6354         if (orig != data)
6355                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6356
6357 }
6358
6359 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6360 {
6361         u32 data, orig, tmp = 0;
6362
6363         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6364                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6365                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6366                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6367                                 data |= CP_MEM_LS_EN;
6368                                 if (orig != data)
6369                                         WREG32(CP_MEM_SLP_CNTL, data);
6370                         }
6371                 }
6372
6373                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6374                 data |= 0x00000001;
6375                 data &= 0xfffffffd;
6376                 if (orig != data)
6377                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6378
6379                 tmp = cik_halt_rlc(rdev);
6380
6381                 mutex_lock(&rdev->grbm_idx_mutex);
6382                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6383                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6384                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6385                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6386                 WREG32(RLC_SERDES_WR_CTRL, data);
6387                 mutex_unlock(&rdev->grbm_idx_mutex);
6388
6389                 cik_update_rlc(rdev, tmp);
6390
6391                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6392                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6393                         data &= ~SM_MODE_MASK;
6394                         data |= SM_MODE(0x2);
6395                         data |= SM_MODE_ENABLE;
6396                         data &= ~CGTS_OVERRIDE;
6397                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6398                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6399                                 data &= ~CGTS_LS_OVERRIDE;
6400                         data &= ~ON_MONITOR_ADD_MASK;
6401                         data |= ON_MONITOR_ADD_EN;
6402                         data |= ON_MONITOR_ADD(0x96);
6403                         if (orig != data)
6404                                 WREG32(CGTS_SM_CTRL_REG, data);
6405                 }
6406         } else {
6407                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6408                 data |= 0x00000003;
6409                 if (orig != data)
6410                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6411
6412                 data = RREG32(RLC_MEM_SLP_CNTL);
6413                 if (data & RLC_MEM_LS_EN) {
6414                         data &= ~RLC_MEM_LS_EN;
6415                         WREG32(RLC_MEM_SLP_CNTL, data);
6416                 }
6417
6418                 data = RREG32(CP_MEM_SLP_CNTL);
6419                 if (data & CP_MEM_LS_EN) {
6420                         data &= ~CP_MEM_LS_EN;
6421                         WREG32(CP_MEM_SLP_CNTL, data);
6422                 }
6423
6424                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6425                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6426                 if (orig != data)
6427                         WREG32(CGTS_SM_CTRL_REG, data);
6428
6429                 tmp = cik_halt_rlc(rdev);
6430
6431                 mutex_lock(&rdev->grbm_idx_mutex);
6432                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6433                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6434                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6435                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6436                 WREG32(RLC_SERDES_WR_CTRL, data);
6437                 mutex_unlock(&rdev->grbm_idx_mutex);
6438
6439                 cik_update_rlc(rdev, tmp);
6440         }
6441 }
6442
6443 static const u32 mc_cg_registers[] =
6444 {
6445         MC_HUB_MISC_HUB_CG,
6446         MC_HUB_MISC_SIP_CG,
6447         MC_HUB_MISC_VM_CG,
6448         MC_XPB_CLK_GAT,
6449         ATC_MISC_CG,
6450         MC_CITF_MISC_WR_CG,
6451         MC_CITF_MISC_RD_CG,
6452         MC_CITF_MISC_VM_CG,
6453         VM_L2_CG,
6454 };
6455
6456 static void cik_enable_mc_ls(struct radeon_device *rdev,
6457                              bool enable)
6458 {
6459         int i;
6460         u32 orig, data;
6461
6462         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6463                 orig = data = RREG32(mc_cg_registers[i]);
6464                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6465                         data |= MC_LS_ENABLE;
6466                 else
6467                         data &= ~MC_LS_ENABLE;
6468                 if (data != orig)
6469                         WREG32(mc_cg_registers[i], data);
6470         }
6471 }
6472
6473 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6474                                bool enable)
6475 {
6476         int i;
6477         u32 orig, data;
6478
6479         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6480                 orig = data = RREG32(mc_cg_registers[i]);
6481                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6482                         data |= MC_CG_ENABLE;
6483                 else
6484                         data &= ~MC_CG_ENABLE;
6485                 if (data != orig)
6486                         WREG32(mc_cg_registers[i], data);
6487         }
6488 }
6489
6490 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6491                                  bool enable)
6492 {
6493         u32 orig, data;
6494
6495         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6496                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6497                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6498         } else {
6499                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6500                 data |= 0xff000000;
6501                 if (data != orig)
6502                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6503
6504                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6505                 data |= 0xff000000;
6506                 if (data != orig)
6507                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6508         }
6509 }
6510
6511 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6512                                  bool enable)
6513 {
6514         u32 orig, data;
6515
6516         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6517                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6518                 data |= 0x100;
6519                 if (orig != data)
6520                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6521
6522                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6523                 data |= 0x100;
6524                 if (orig != data)
6525                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6526         } else {
6527                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6528                 data &= ~0x100;
6529                 if (orig != data)
6530                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6531
6532                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6533                 data &= ~0x100;
6534                 if (orig != data)
6535                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6536         }
6537 }
6538
6539 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6540                                 bool enable)
6541 {
6542         u32 orig, data;
6543
6544         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6545                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6546                 data = 0xfff;
6547                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6548
6549                 orig = data = RREG32(UVD_CGC_CTRL);
6550                 data |= DCM;
6551                 if (orig != data)
6552                         WREG32(UVD_CGC_CTRL, data);
6553         } else {
6554                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6555                 data &= ~0xfff;
6556                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6557
6558                 orig = data = RREG32(UVD_CGC_CTRL);
6559                 data &= ~DCM;
6560                 if (orig != data)
6561                         WREG32(UVD_CGC_CTRL, data);
6562         }
6563 }
6564
6565 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6566                                bool enable)
6567 {
6568         u32 orig, data;
6569
6570         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6571
6572         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6573                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6574                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6575         else
6576                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6577                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6578
6579         if (orig != data)
6580                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6581 }
6582
6583 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6584                                 bool enable)
6585 {
6586         u32 orig, data;
6587
6588         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6589
6590         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6591                 data &= ~CLOCK_GATING_DIS;
6592         else
6593                 data |= CLOCK_GATING_DIS;
6594
6595         if (orig != data)
6596                 WREG32(HDP_HOST_PATH_CNTL, data);
6597 }
6598
6599 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6600                               bool enable)
6601 {
6602         u32 orig, data;
6603
6604         orig = data = RREG32(HDP_MEM_POWER_LS);
6605
6606         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6607                 data |= HDP_LS_ENABLE;
6608         else
6609                 data &= ~HDP_LS_ENABLE;
6610
6611         if (orig != data)
6612                 WREG32(HDP_MEM_POWER_LS, data);
6613 }
6614
6615 void cik_update_cg(struct radeon_device *rdev,
6616                    u32 block, bool enable)
6617 {
6618
6619         if (block & RADEON_CG_BLOCK_GFX) {
6620                 cik_enable_gui_idle_interrupt(rdev, false);
6621                 /* order matters! */
6622                 if (enable) {
6623                         cik_enable_mgcg(rdev, true);
6624                         cik_enable_cgcg(rdev, true);
6625                 } else {
6626                         cik_enable_cgcg(rdev, false);
6627                         cik_enable_mgcg(rdev, false);
6628                 }
6629                 cik_enable_gui_idle_interrupt(rdev, true);
6630         }
6631
6632         if (block & RADEON_CG_BLOCK_MC) {
6633                 if (!(rdev->flags & RADEON_IS_IGP)) {
6634                         cik_enable_mc_mgcg(rdev, enable);
6635                         cik_enable_mc_ls(rdev, enable);
6636                 }
6637         }
6638
6639         if (block & RADEON_CG_BLOCK_SDMA) {
6640                 cik_enable_sdma_mgcg(rdev, enable);
6641                 cik_enable_sdma_mgls(rdev, enable);
6642         }
6643
6644         if (block & RADEON_CG_BLOCK_BIF) {
6645                 cik_enable_bif_mgls(rdev, enable);
6646         }
6647
6648         if (block & RADEON_CG_BLOCK_UVD) {
6649                 if (rdev->has_uvd)
6650                         cik_enable_uvd_mgcg(rdev, enable);
6651         }
6652
6653         if (block & RADEON_CG_BLOCK_HDP) {
6654                 cik_enable_hdp_mgcg(rdev, enable);
6655                 cik_enable_hdp_ls(rdev, enable);
6656         }
6657
6658         if (block & RADEON_CG_BLOCK_VCE) {
6659                 vce_v2_0_enable_mgcg(rdev, enable);
6660         }
6661 }
6662
6663 static void cik_init_cg(struct radeon_device *rdev)
6664 {
6665
6666         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6667
6668         if (rdev->has_uvd)
6669                 si_init_uvd_internal_cg(rdev);
6670
6671         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6672                              RADEON_CG_BLOCK_SDMA |
6673                              RADEON_CG_BLOCK_BIF |
6674                              RADEON_CG_BLOCK_UVD |
6675                              RADEON_CG_BLOCK_HDP), true);
6676 }
6677
6678 static void cik_fini_cg(struct radeon_device *rdev)
6679 {
6680         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6681                              RADEON_CG_BLOCK_SDMA |
6682                              RADEON_CG_BLOCK_BIF |
6683                              RADEON_CG_BLOCK_UVD |
6684                              RADEON_CG_BLOCK_HDP), false);
6685
6686         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6687 }
6688
6689 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6690                                           bool enable)
6691 {
6692         u32 data, orig;
6693
6694         orig = data = RREG32(RLC_PG_CNTL);
6695         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6696                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6697         else
6698                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6699         if (orig != data)
6700                 WREG32(RLC_PG_CNTL, data);
6701 }
6702
6703 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6704                                           bool enable)
6705 {
6706         u32 data, orig;
6707
6708         orig = data = RREG32(RLC_PG_CNTL);
6709         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6710                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6711         else
6712                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6713         if (orig != data)
6714                 WREG32(RLC_PG_CNTL, data);
6715 }
6716
6717 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6718 {
6719         u32 data, orig;
6720
6721         orig = data = RREG32(RLC_PG_CNTL);
6722         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6723                 data &= ~DISABLE_CP_PG;
6724         else
6725                 data |= DISABLE_CP_PG;
6726         if (orig != data)
6727                 WREG32(RLC_PG_CNTL, data);
6728 }
6729
6730 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6731 {
6732         u32 data, orig;
6733
6734         orig = data = RREG32(RLC_PG_CNTL);
6735         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6736                 data &= ~DISABLE_GDS_PG;
6737         else
6738                 data |= DISABLE_GDS_PG;
6739         if (orig != data)
6740                 WREG32(RLC_PG_CNTL, data);
6741 }
6742
6743 #define CP_ME_TABLE_SIZE    96
6744 #define CP_ME_TABLE_OFFSET  2048
6745 #define CP_MEC_TABLE_OFFSET 4096
6746
6747 void cik_init_cp_pg_table(struct radeon_device *rdev)
6748 {
6749         volatile u32 *dst_ptr;
6750         int me, i, max_me = 4;
6751         u32 bo_offset = 0;
6752         u32 table_offset, table_size;
6753
6754         if (rdev->family == CHIP_KAVERI)
6755                 max_me = 5;
6756
6757         if (rdev->rlc.cp_table_ptr == NULL)
6758                 return;
6759
6760         /* write the cp table buffer */
6761         dst_ptr = rdev->rlc.cp_table_ptr;
6762         for (me = 0; me < max_me; me++) {
6763                 if (rdev->new_fw) {
6764                         const __le32 *fw_data;
6765                         const struct gfx_firmware_header_v1_0 *hdr;
6766
6767                         if (me == 0) {
6768                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6769                                 fw_data = (const __le32 *)
6770                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6771                                 table_offset = le32_to_cpu(hdr->jt_offset);
6772                                 table_size = le32_to_cpu(hdr->jt_size);
6773                         } else if (me == 1) {
6774                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6775                                 fw_data = (const __le32 *)
6776                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6777                                 table_offset = le32_to_cpu(hdr->jt_offset);
6778                                 table_size = le32_to_cpu(hdr->jt_size);
6779                         } else if (me == 2) {
6780                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6781                                 fw_data = (const __le32 *)
6782                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6783                                 table_offset = le32_to_cpu(hdr->jt_offset);
6784                                 table_size = le32_to_cpu(hdr->jt_size);
6785                         } else if (me == 3) {
6786                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6787                                 fw_data = (const __le32 *)
6788                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6789                                 table_offset = le32_to_cpu(hdr->jt_offset);
6790                                 table_size = le32_to_cpu(hdr->jt_size);
6791                         } else {
6792                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6793                                 fw_data = (const __le32 *)
6794                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6795                                 table_offset = le32_to_cpu(hdr->jt_offset);
6796                                 table_size = le32_to_cpu(hdr->jt_size);
6797                         }
6798
6799                         for (i = 0; i < table_size; i ++) {
6800                                 dst_ptr[bo_offset + i] =
6801                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6802                         }
6803                         bo_offset += table_size;
6804                 } else {
6805                         const __be32 *fw_data;
6806                         table_size = CP_ME_TABLE_SIZE;
6807
6808                         if (me == 0) {
6809                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6810                                 table_offset = CP_ME_TABLE_OFFSET;
6811                         } else if (me == 1) {
6812                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6813                                 table_offset = CP_ME_TABLE_OFFSET;
6814                         } else if (me == 2) {
6815                                 fw_data = (const __be32 *)rdev->me_fw->data;
6816                                 table_offset = CP_ME_TABLE_OFFSET;
6817                         } else {
6818                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6819                                 table_offset = CP_MEC_TABLE_OFFSET;
6820                         }
6821
6822                         for (i = 0; i < table_size; i ++) {
6823                                 dst_ptr[bo_offset + i] =
6824                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6825                         }
6826                         bo_offset += table_size;
6827                 }
6828         }
6829 }
6830
6831 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6832                                 bool enable)
6833 {
6834         u32 data, orig;
6835
6836         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6837                 orig = data = RREG32(RLC_PG_CNTL);
6838                 data |= GFX_PG_ENABLE;
6839                 if (orig != data)
6840                         WREG32(RLC_PG_CNTL, data);
6841
6842                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6843                 data |= AUTO_PG_EN;
6844                 if (orig != data)
6845                         WREG32(RLC_AUTO_PG_CTRL, data);
6846         } else {
6847                 orig = data = RREG32(RLC_PG_CNTL);
6848                 data &= ~GFX_PG_ENABLE;
6849                 if (orig != data)
6850                         WREG32(RLC_PG_CNTL, data);
6851
6852                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6853                 data &= ~AUTO_PG_EN;
6854                 if (orig != data)
6855                         WREG32(RLC_AUTO_PG_CTRL, data);
6856
6857                 data = RREG32(DB_RENDER_CONTROL);
6858         }
6859 }
6860
6861 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6862 {
6863         u32 mask = 0, tmp, tmp1;
6864         int i;
6865
6866         mutex_lock(&rdev->grbm_idx_mutex);
6867         cik_select_se_sh(rdev, se, sh);
6868         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6869         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6870         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6871         mutex_unlock(&rdev->grbm_idx_mutex);
6872
6873         tmp &= 0xffff0000;
6874
6875         tmp |= tmp1;
6876         tmp >>= 16;
6877
6878         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6879                 mask <<= 1;
6880                 mask |= 1;
6881         }
6882
6883         return (~tmp) & mask;
6884 }
6885
6886 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6887 {
6888         u32 i, j, k, active_cu_number = 0;
6889         u32 mask, counter, cu_bitmap;
6890         u32 tmp = 0;
6891
6892         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6893                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6894                         mask = 1;
6895                         cu_bitmap = 0;
6896                         counter = 0;
6897                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6898                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6899                                         if (counter < 2)
6900                                                 cu_bitmap |= mask;
6901                                         counter ++;
6902                                 }
6903                                 mask <<= 1;
6904                         }
6905
6906                         active_cu_number += counter;
6907                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6908                 }
6909         }
6910
6911         WREG32(RLC_PG_AO_CU_MASK, tmp);
6912
6913         tmp = RREG32(RLC_MAX_PG_CU);
6914         tmp &= ~MAX_PU_CU_MASK;
6915         tmp |= MAX_PU_CU(active_cu_number);
6916         WREG32(RLC_MAX_PG_CU, tmp);
6917 }
6918
6919 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6920                                        bool enable)
6921 {
6922         u32 data, orig;
6923
6924         orig = data = RREG32(RLC_PG_CNTL);
6925         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6926                 data |= STATIC_PER_CU_PG_ENABLE;
6927         else
6928                 data &= ~STATIC_PER_CU_PG_ENABLE;
6929         if (orig != data)
6930                 WREG32(RLC_PG_CNTL, data);
6931 }
6932
6933 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6934                                         bool enable)
6935 {
6936         u32 data, orig;
6937
6938         orig = data = RREG32(RLC_PG_CNTL);
6939         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6940                 data |= DYN_PER_CU_PG_ENABLE;
6941         else
6942                 data &= ~DYN_PER_CU_PG_ENABLE;
6943         if (orig != data)
6944                 WREG32(RLC_PG_CNTL, data);
6945 }
6946
6947 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6948 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6949
6950 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6951 {
6952         u32 data, orig;
6953         u32 i;
6954
6955         if (rdev->rlc.cs_data) {
6956                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6957                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6958                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6959                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6960         } else {
6961                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6962                 for (i = 0; i < 3; i++)
6963                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6964         }
6965         if (rdev->rlc.reg_list) {
6966                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6967                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6968                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6969         }
6970
6971         orig = data = RREG32(RLC_PG_CNTL);
6972         data |= GFX_PG_SRC;
6973         if (orig != data)
6974                 WREG32(RLC_PG_CNTL, data);
6975
6976         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6977         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6978
6979         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6980         data &= ~IDLE_POLL_COUNT_MASK;
6981         data |= IDLE_POLL_COUNT(0x60);
6982         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6983
6984         data = 0x10101010;
6985         WREG32(RLC_PG_DELAY, data);
6986
6987         data = RREG32(RLC_PG_DELAY_2);
6988         data &= ~0xff;
6989         data |= 0x3;
6990         WREG32(RLC_PG_DELAY_2, data);
6991
6992         data = RREG32(RLC_AUTO_PG_CTRL);
6993         data &= ~GRBM_REG_SGIT_MASK;
6994         data |= GRBM_REG_SGIT(0x700);
6995         WREG32(RLC_AUTO_PG_CTRL, data);
6996
6997 }
6998
6999 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7000 {
7001         cik_enable_gfx_cgpg(rdev, enable);
7002         cik_enable_gfx_static_mgpg(rdev, enable);
7003         cik_enable_gfx_dynamic_mgpg(rdev, enable);
7004 }
7005
7006 u32 cik_get_csb_size(struct radeon_device *rdev)
7007 {
7008         u32 count = 0;
7009         const struct cs_section_def *sect = NULL;
7010         const struct cs_extent_def *ext = NULL;
7011
7012         if (rdev->rlc.cs_data == NULL)
7013                 return 0;
7014
7015         /* begin clear state */
7016         count += 2;
7017         /* context control state */
7018         count += 3;
7019
7020         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7021                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7022                         if (sect->id == SECT_CONTEXT)
7023                                 count += 2 + ext->reg_count;
7024                         else
7025                                 return 0;
7026                 }
7027         }
7028         /* pa_sc_raster_config/pa_sc_raster_config1 */
7029         count += 4;
7030         /* end clear state */
7031         count += 2;
7032         /* clear state */
7033         count += 2;
7034
7035         return count;
7036 }
7037
7038 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7039 {
7040         u32 count = 0, i;
7041         const struct cs_section_def *sect = NULL;
7042         const struct cs_extent_def *ext = NULL;
7043
7044         if (rdev->rlc.cs_data == NULL)
7045                 return;
7046         if (buffer == NULL)
7047                 return;
7048
7049         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7050         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7051
7052         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7053         buffer[count++] = cpu_to_le32(0x80000000);
7054         buffer[count++] = cpu_to_le32(0x80000000);
7055
7056         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7057                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7058                         if (sect->id == SECT_CONTEXT) {
7059                                 buffer[count++] =
7060                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7061                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7062                                 for (i = 0; i < ext->reg_count; i++)
7063                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
7064                         } else {
7065                                 return;
7066                         }
7067                 }
7068         }
7069
7070         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7071         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7072         switch (rdev->family) {
7073         case CHIP_BONAIRE:
7074                 buffer[count++] = cpu_to_le32(0x16000012);
7075                 buffer[count++] = cpu_to_le32(0x00000000);
7076                 break;
7077         case CHIP_KAVERI:
7078                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7079                 buffer[count++] = cpu_to_le32(0x00000000);
7080                 break;
7081         case CHIP_KABINI:
7082         case CHIP_MULLINS:
7083                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7084                 buffer[count++] = cpu_to_le32(0x00000000);
7085                 break;
7086         case CHIP_HAWAII:
7087                 buffer[count++] = cpu_to_le32(0x3a00161a);
7088                 buffer[count++] = cpu_to_le32(0x0000002e);
7089                 break;
7090         default:
7091                 buffer[count++] = cpu_to_le32(0x00000000);
7092                 buffer[count++] = cpu_to_le32(0x00000000);
7093                 break;
7094         }
7095
7096         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7097         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7098
7099         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7100         buffer[count++] = cpu_to_le32(0);
7101 }
7102
7103 static void cik_init_pg(struct radeon_device *rdev)
7104 {
7105         if (rdev->pg_flags) {
7106                 cik_enable_sck_slowdown_on_pu(rdev, true);
7107                 cik_enable_sck_slowdown_on_pd(rdev, true);
7108                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7109                         cik_init_gfx_cgpg(rdev);
7110                         cik_enable_cp_pg(rdev, true);
7111                         cik_enable_gds_pg(rdev, true);
7112                 }
7113                 cik_init_ao_cu_mask(rdev);
7114                 cik_update_gfx_pg(rdev, true);
7115         }
7116 }
7117
7118 static void cik_fini_pg(struct radeon_device *rdev)
7119 {
7120         if (rdev->pg_flags) {
7121                 cik_update_gfx_pg(rdev, false);
7122                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7123                         cik_enable_cp_pg(rdev, false);
7124                         cik_enable_gds_pg(rdev, false);
7125                 }
7126         }
7127 }
7128
7129 /*
7130  * Interrupts
7131  * Starting with r6xx, interrupts are handled via a ring buffer.
7132  * Ring buffers are areas of GPU accessible memory that the GPU
7133  * writes interrupt vectors into and the host reads vectors out of.
7134  * There is a rptr (read pointer) that determines where the
7135  * host is currently reading, and a wptr (write pointer)
7136  * which determines where the GPU has written.  When the
7137  * pointers are equal, the ring is idle.  When the GPU
7138  * writes vectors to the ring buffer, it increments the
7139  * wptr.  When there is an interrupt, the host then starts
7140  * fetching commands and processing them until the pointers are
7141  * equal again at which point it updates the rptr.
7142  */
7143
7144 /**
7145  * cik_enable_interrupts - Enable the interrupt ring buffer
7146  *
7147  * @rdev: radeon_device pointer
7148  *
7149  * Enable the interrupt ring buffer (CIK).
7150  */
7151 static void cik_enable_interrupts(struct radeon_device *rdev)
7152 {
7153         u32 ih_cntl = RREG32(IH_CNTL);
7154         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7155
7156         ih_cntl |= ENABLE_INTR;
7157         ih_rb_cntl |= IH_RB_ENABLE;
7158         WREG32(IH_CNTL, ih_cntl);
7159         WREG32(IH_RB_CNTL, ih_rb_cntl);
7160         rdev->ih.enabled = true;
7161 }
7162
7163 /**
7164  * cik_disable_interrupts - Disable the interrupt ring buffer
7165  *
7166  * @rdev: radeon_device pointer
7167  *
7168  * Disable the interrupt ring buffer (CIK).
7169  */
7170 static void cik_disable_interrupts(struct radeon_device *rdev)
7171 {
7172         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7173         u32 ih_cntl = RREG32(IH_CNTL);
7174
7175         ih_rb_cntl &= ~IH_RB_ENABLE;
7176         ih_cntl &= ~ENABLE_INTR;
7177         WREG32(IH_RB_CNTL, ih_rb_cntl);
7178         WREG32(IH_CNTL, ih_cntl);
7179         /* set rptr, wptr to 0 */
7180         WREG32(IH_RB_RPTR, 0);
7181         WREG32(IH_RB_WPTR, 0);
7182         rdev->ih.enabled = false;
7183         rdev->ih.rptr = 0;
7184 }
7185
7186 /**
7187  * cik_disable_interrupt_state - Disable all interrupt sources
7188  *
7189  * @rdev: radeon_device pointer
7190  *
7191  * Clear all interrupt enable bits used by the driver (CIK).
7192  */
7193 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7194 {
7195         u32 tmp;
7196
7197         /* gfx ring */
7198         tmp = RREG32(CP_INT_CNTL_RING0) &
7199                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7200         WREG32(CP_INT_CNTL_RING0, tmp);
7201         /* sdma */
7202         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7203         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7204         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7205         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7206         /* compute queues */
7207         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7208         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7209         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7210         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7211         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7212         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7213         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7214         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7215         /* grbm */
7216         WREG32(GRBM_INT_CNTL, 0);
7217         /* SRBM */
7218         WREG32(SRBM_INT_CNTL, 0);
7219         /* vline/vblank, etc. */
7220         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7221         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7222         if (rdev->num_crtc >= 4) {
7223                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7224                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7225         }
7226         if (rdev->num_crtc >= 6) {
7227                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7228                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7229         }
7230         /* pflip */
7231         if (rdev->num_crtc >= 2) {
7232                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7233                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7234         }
7235         if (rdev->num_crtc >= 4) {
7236                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7237                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7238         }
7239         if (rdev->num_crtc >= 6) {
7240                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7241                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7242         }
7243
7244         /* dac hotplug */
7245         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7246
7247         /* digital hotplug */
7248         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7249         WREG32(DC_HPD1_INT_CONTROL, tmp);
7250         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7251         WREG32(DC_HPD2_INT_CONTROL, tmp);
7252         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7253         WREG32(DC_HPD3_INT_CONTROL, tmp);
7254         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7255         WREG32(DC_HPD4_INT_CONTROL, tmp);
7256         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7257         WREG32(DC_HPD5_INT_CONTROL, tmp);
7258         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7259         WREG32(DC_HPD6_INT_CONTROL, tmp);
7260
7261 }
7262
7263 /**
7264  * cik_irq_init - init and enable the interrupt ring
7265  *
7266  * @rdev: radeon_device pointer
7267  *
7268  * Allocate a ring buffer for the interrupt controller,
7269  * enable the RLC, disable interrupts, enable the IH
7270  * ring buffer and enable it (CIK).
7271  * Called at device load and reume.
7272  * Returns 0 for success, errors for failure.
7273  */
7274 static int cik_irq_init(struct radeon_device *rdev)
7275 {
7276         int ret = 0;
7277         int rb_bufsz;
7278         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7279
7280         /* allocate ring */
7281         ret = r600_ih_ring_alloc(rdev);
7282         if (ret)
7283                 return ret;
7284
7285         /* disable irqs */
7286         cik_disable_interrupts(rdev);
7287
7288         /* init rlc */
7289         ret = cik_rlc_resume(rdev);
7290         if (ret) {
7291                 r600_ih_ring_fini(rdev);
7292                 return ret;
7293         }
7294
7295         /* setup interrupt control */
7296         /* set dummy read address to dummy page address */
7297         WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
7298         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7299         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7300          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7301          */
7302         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7303         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7304         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7305         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7306
7307         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7308         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7309
7310         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7311                       IH_WPTR_OVERFLOW_CLEAR |
7312                       (rb_bufsz << 1));
7313
7314         if (rdev->wb.enabled)
7315                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7316
7317         /* set the writeback address whether it's enabled or not */
7318         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7319         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7320
7321         WREG32(IH_RB_CNTL, ih_rb_cntl);
7322
7323         /* set rptr, wptr to 0 */
7324         WREG32(IH_RB_RPTR, 0);
7325         WREG32(IH_RB_WPTR, 0);
7326
7327         /* Default settings for IH_CNTL (disabled at first) */
7328         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7329         /* RPTR_REARM only works if msi's are enabled */
7330         if (rdev->msi_enabled)
7331                 ih_cntl |= RPTR_REARM;
7332         WREG32(IH_CNTL, ih_cntl);
7333
7334         /* force the active interrupt state to all disabled */
7335         cik_disable_interrupt_state(rdev);
7336
7337         pci_set_master(rdev->pdev);
7338
7339         /* enable irqs */
7340         cik_enable_interrupts(rdev);
7341
7342         return ret;
7343 }
7344
7345 /**
7346  * cik_irq_set - enable/disable interrupt sources
7347  *
7348  * @rdev: radeon_device pointer
7349  *
7350  * Enable interrupt sources on the GPU (vblanks, hpd,
7351  * etc.) (CIK).
7352  * Returns 0 for success, errors for failure.
7353  */
7354 int cik_irq_set(struct radeon_device *rdev)
7355 {
7356         u32 cp_int_cntl;
7357         u32 cp_m1p0;
7358         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7359         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7360         u32 grbm_int_cntl = 0;
7361         u32 dma_cntl, dma_cntl1;
7362
7363         if (!rdev->irq.installed) {
7364                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7365                 return -EINVAL;
7366         }
7367         /* don't enable anything if the ih is disabled */
7368         if (!rdev->ih.enabled) {
7369                 cik_disable_interrupts(rdev);
7370                 /* force the active interrupt state to all disabled */
7371                 cik_disable_interrupt_state(rdev);
7372                 return 0;
7373         }
7374
7375         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7376                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7377         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7378
7379         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7380         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7381         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7382         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7383         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7384         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7385
7386         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7387         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7388
7389         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7390
7391         /* enable CP interrupts on all rings */
7392         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7393                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7394                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7395         }
7396         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7397                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7398                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7399                 if (ring->me == 1) {
7400                         switch (ring->pipe) {
7401                         case 0:
7402                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7403                                 break;
7404                         default:
7405                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7406                                 break;
7407                         }
7408                 } else {
7409                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7410                 }
7411         }
7412         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7413                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7414                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7415                 if (ring->me == 1) {
7416                         switch (ring->pipe) {
7417                         case 0:
7418                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7419                                 break;
7420                         default:
7421                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7422                                 break;
7423                         }
7424                 } else {
7425                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7426                 }
7427         }
7428
7429         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7430                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7431                 dma_cntl |= TRAP_ENABLE;
7432         }
7433
7434         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7435                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7436                 dma_cntl1 |= TRAP_ENABLE;
7437         }
7438
7439         if (rdev->irq.crtc_vblank_int[0] ||
7440             atomic_read(&rdev->irq.pflip[0])) {
7441                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7442                 crtc1 |= VBLANK_INTERRUPT_MASK;
7443         }
7444         if (rdev->irq.crtc_vblank_int[1] ||
7445             atomic_read(&rdev->irq.pflip[1])) {
7446                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7447                 crtc2 |= VBLANK_INTERRUPT_MASK;
7448         }
7449         if (rdev->irq.crtc_vblank_int[2] ||
7450             atomic_read(&rdev->irq.pflip[2])) {
7451                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7452                 crtc3 |= VBLANK_INTERRUPT_MASK;
7453         }
7454         if (rdev->irq.crtc_vblank_int[3] ||
7455             atomic_read(&rdev->irq.pflip[3])) {
7456                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7457                 crtc4 |= VBLANK_INTERRUPT_MASK;
7458         }
7459         if (rdev->irq.crtc_vblank_int[4] ||
7460             atomic_read(&rdev->irq.pflip[4])) {
7461                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7462                 crtc5 |= VBLANK_INTERRUPT_MASK;
7463         }
7464         if (rdev->irq.crtc_vblank_int[5] ||
7465             atomic_read(&rdev->irq.pflip[5])) {
7466                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7467                 crtc6 |= VBLANK_INTERRUPT_MASK;
7468         }
7469         if (rdev->irq.hpd[0]) {
7470                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7471                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7472         }
7473         if (rdev->irq.hpd[1]) {
7474                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7475                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7476         }
7477         if (rdev->irq.hpd[2]) {
7478                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7479                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7480         }
7481         if (rdev->irq.hpd[3]) {
7482                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7483                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7484         }
7485         if (rdev->irq.hpd[4]) {
7486                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7487                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7488         }
7489         if (rdev->irq.hpd[5]) {
7490                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7491                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7492         }
7493
7494         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7495
7496         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7497         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7498
7499         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7500
7501         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7502
7503         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7504         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7505         if (rdev->num_crtc >= 4) {
7506                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7507                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7508         }
7509         if (rdev->num_crtc >= 6) {
7510                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7511                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7512         }
7513
7514         if (rdev->num_crtc >= 2) {
7515                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7516                        GRPH_PFLIP_INT_MASK);
7517                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7518                        GRPH_PFLIP_INT_MASK);
7519         }
7520         if (rdev->num_crtc >= 4) {
7521                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7522                        GRPH_PFLIP_INT_MASK);
7523                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7524                        GRPH_PFLIP_INT_MASK);
7525         }
7526         if (rdev->num_crtc >= 6) {
7527                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7528                        GRPH_PFLIP_INT_MASK);
7529                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7530                        GRPH_PFLIP_INT_MASK);
7531         }
7532
7533         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7534         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7535         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7536         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7537         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7538         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7539
7540         /* posting read */
7541         RREG32(SRBM_STATUS);
7542
7543         return 0;
7544 }
7545
7546 /**
7547  * cik_irq_ack - ack interrupt sources
7548  *
7549  * @rdev: radeon_device pointer
7550  *
7551  * Ack interrupt sources on the GPU (vblanks, hpd,
7552  * etc.) (CIK).  Certain interrupts sources are sw
7553  * generated and do not require an explicit ack.
7554  */
7555 static inline void cik_irq_ack(struct radeon_device *rdev)
7556 {
7557         u32 tmp;
7558
7559         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7560         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7561         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7562         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7563         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7564         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7565         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7566
7567         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7568                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7569         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7570                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7571         if (rdev->num_crtc >= 4) {
7572                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7573                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7574                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7575                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7576         }
7577         if (rdev->num_crtc >= 6) {
7578                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7579                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7580                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7581                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7582         }
7583
7584         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7585                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7586                        GRPH_PFLIP_INT_CLEAR);
7587         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7588                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7589                        GRPH_PFLIP_INT_CLEAR);
7590         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7591                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7592         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7593                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7594         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7595                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7596         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7597                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7598
7599         if (rdev->num_crtc >= 4) {
7600                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7601                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7602                                GRPH_PFLIP_INT_CLEAR);
7603                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7604                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7605                                GRPH_PFLIP_INT_CLEAR);
7606                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7607                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7608                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7609                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7610                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7611                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7612                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7613                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7614         }
7615
7616         if (rdev->num_crtc >= 6) {
7617                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7618                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7619                                GRPH_PFLIP_INT_CLEAR);
7620                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7621                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7622                                GRPH_PFLIP_INT_CLEAR);
7623                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7624                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7625                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7626                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7627                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7628                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7629                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7630                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7631         }
7632
7633         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7634                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7635                 tmp |= DC_HPDx_INT_ACK;
7636                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7637         }
7638         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7639                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7640                 tmp |= DC_HPDx_INT_ACK;
7641                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7642         }
7643         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7644                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7645                 tmp |= DC_HPDx_INT_ACK;
7646                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7647         }
7648         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7649                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7650                 tmp |= DC_HPDx_INT_ACK;
7651                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7652         }
7653         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7654                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7655                 tmp |= DC_HPDx_INT_ACK;
7656                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7657         }
7658         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7659                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7660                 tmp |= DC_HPDx_INT_ACK;
7661                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7662         }
7663         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7664                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7665                 tmp |= DC_HPDx_RX_INT_ACK;
7666                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7667         }
7668         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7669                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7670                 tmp |= DC_HPDx_RX_INT_ACK;
7671                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7672         }
7673         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7674                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7675                 tmp |= DC_HPDx_RX_INT_ACK;
7676                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7677         }
7678         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7679                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7680                 tmp |= DC_HPDx_RX_INT_ACK;
7681                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7682         }
7683         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7684                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7685                 tmp |= DC_HPDx_RX_INT_ACK;
7686                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7687         }
7688         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7689                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7690                 tmp |= DC_HPDx_RX_INT_ACK;
7691                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7692         }
7693 }
7694
7695 /**
7696  * cik_irq_disable - disable interrupts
7697  *
7698  * @rdev: radeon_device pointer
7699  *
7700  * Disable interrupts on the hw (CIK).
7701  */
7702 static void cik_irq_disable(struct radeon_device *rdev)
7703 {
7704         cik_disable_interrupts(rdev);
7705         /* Wait and acknowledge irq */
7706         mdelay(1);
7707         cik_irq_ack(rdev);
7708         cik_disable_interrupt_state(rdev);
7709 }
7710
7711 /**
7712  * cik_irq_disable - disable interrupts for suspend
7713  *
7714  * @rdev: radeon_device pointer
7715  *
7716  * Disable interrupts and stop the RLC (CIK).
7717  * Used for suspend.
7718  */
7719 static void cik_irq_suspend(struct radeon_device *rdev)
7720 {
7721         cik_irq_disable(rdev);
7722         cik_rlc_stop(rdev);
7723 }
7724
7725 /**
7726  * cik_irq_fini - tear down interrupt support
7727  *
7728  * @rdev: radeon_device pointer
7729  *
7730  * Disable interrupts on the hw and free the IH ring
7731  * buffer (CIK).
7732  * Used for driver unload.
7733  */
7734 static void cik_irq_fini(struct radeon_device *rdev)
7735 {
7736         cik_irq_suspend(rdev);
7737         r600_ih_ring_fini(rdev);
7738 }
7739
7740 /**
7741  * cik_get_ih_wptr - get the IH ring buffer wptr
7742  *
7743  * @rdev: radeon_device pointer
7744  *
7745  * Get the IH ring buffer wptr from either the register
7746  * or the writeback memory buffer (CIK).  Also check for
7747  * ring buffer overflow and deal with it.
7748  * Used by cik_irq_process().
7749  * Returns the value of the wptr.
7750  */
7751 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7752 {
7753         u32 wptr, tmp;
7754
7755         if (rdev->wb.enabled)
7756                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7757         else
7758                 wptr = RREG32(IH_RB_WPTR);
7759
7760         if (wptr & RB_OVERFLOW) {
7761                 wptr &= ~RB_OVERFLOW;
7762                 /* When a ring buffer overflow happen start parsing interrupt
7763                  * from the last not overwritten vector (wptr + 16). Hopefully
7764                  * this should allow us to catchup.
7765                  */
7766                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7767                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7768                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7769                 tmp = RREG32(IH_RB_CNTL);
7770                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7771                 WREG32(IH_RB_CNTL, tmp);
7772         }
7773         return (wptr & rdev->ih.ptr_mask);
7774 }
7775
7776 /*        CIK IV Ring
7777  * Each IV ring entry is 128 bits:
7778  * [7:0]    - interrupt source id
7779  * [31:8]   - reserved
7780  * [59:32]  - interrupt source data
7781  * [63:60]  - reserved
7782  * [71:64]  - RINGID
7783  *            CP:
7784  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7785  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7786  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7787  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7788  *            PIPE_ID - ME0 0=3D
7789  *                    - ME1&2 compute dispatcher (4 pipes each)
7790  *            SDMA:
7791  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7792  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7793  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7794  * [79:72]  - VMID
7795  * [95:80]  - PASID
7796  * [127:96] - reserved
7797  */
7798 /**
7799  * cik_irq_process - interrupt handler
7800  *
7801  * @rdev: radeon_device pointer
7802  *
7803  * Interrupt hander (CIK).  Walk the IH ring,
7804  * ack interrupts and schedule work to handle
7805  * interrupt events.
7806  * Returns irq process return code.
7807  */
7808 int cik_irq_process(struct radeon_device *rdev)
7809 {
7810         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7811         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7812         u32 wptr;
7813         u32 rptr;
7814         u32 src_id, src_data, ring_id;
7815         u8 me_id, pipe_id, queue_id;
7816         u32 ring_index;
7817         bool queue_hotplug = false;
7818         bool queue_dp = false;
7819         bool queue_reset = false;
7820         u32 addr, status, mc_client;
7821         bool queue_thermal = false;
7822
7823         if (!rdev->ih.enabled || rdev->shutdown)
7824                 return IRQ_NONE;
7825
7826         wptr = cik_get_ih_wptr(rdev);
7827
7828 restart_ih:
7829         /* is somebody else already processing irqs? */
7830         if (atomic_xchg(&rdev->ih.lock, 1))
7831                 return IRQ_NONE;
7832
7833         rptr = rdev->ih.rptr;
7834         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7835
7836         /* Order reading of wptr vs. reading of IH ring data */
7837         rmb();
7838
7839         /* display interrupts */
7840         cik_irq_ack(rdev);
7841
7842         while (rptr != wptr) {
7843                 /* wptr/rptr are in bytes! */
7844                 ring_index = rptr / 4;
7845
7846                 radeon_kfd_interrupt(rdev,
7847                                 (const void *) &rdev->ih.ring[ring_index]);
7848
7849                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7850                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7851                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7852
7853                 switch (src_id) {
7854                 case 1: /* D1 vblank/vline */
7855                         switch (src_data) {
7856                         case 0: /* D1 vblank */
7857                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7858                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7859
7860                                 if (rdev->irq.crtc_vblank_int[0]) {
7861                                         drm_handle_vblank(rdev->ddev, 0);
7862                                         rdev->pm.vblank_sync = true;
7863                                         wake_up(&rdev->irq.vblank_queue);
7864                                 }
7865                                 if (atomic_read(&rdev->irq.pflip[0]))
7866                                         radeon_crtc_handle_vblank(rdev, 0);
7867                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7868                                 DRM_DEBUG("IH: D1 vblank\n");
7869
7870                                 break;
7871                         case 1: /* D1 vline */
7872                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7873                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7874
7875                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7876                                 DRM_DEBUG("IH: D1 vline\n");
7877
7878                                 break;
7879                         default:
7880                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7881                                 break;
7882                         }
7883                         break;
7884                 case 2: /* D2 vblank/vline */
7885                         switch (src_data) {
7886                         case 0: /* D2 vblank */
7887                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7888                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7889
7890                                 if (rdev->irq.crtc_vblank_int[1]) {
7891                                         drm_handle_vblank(rdev->ddev, 1);
7892                                         rdev->pm.vblank_sync = true;
7893                                         wake_up(&rdev->irq.vblank_queue);
7894                                 }
7895                                 if (atomic_read(&rdev->irq.pflip[1]))
7896                                         radeon_crtc_handle_vblank(rdev, 1);
7897                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7898                                 DRM_DEBUG("IH: D2 vblank\n");
7899
7900                                 break;
7901                         case 1: /* D2 vline */
7902                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7903                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7904
7905                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7906                                 DRM_DEBUG("IH: D2 vline\n");
7907
7908                                 break;
7909                         default:
7910                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7911                                 break;
7912                         }
7913                         break;
7914                 case 3: /* D3 vblank/vline */
7915                         switch (src_data) {
7916                         case 0: /* D3 vblank */
7917                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7918                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7919
7920                                 if (rdev->irq.crtc_vblank_int[2]) {
7921                                         drm_handle_vblank(rdev->ddev, 2);
7922                                         rdev->pm.vblank_sync = true;
7923                                         wake_up(&rdev->irq.vblank_queue);
7924                                 }
7925                                 if (atomic_read(&rdev->irq.pflip[2]))
7926                                         radeon_crtc_handle_vblank(rdev, 2);
7927                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7928                                 DRM_DEBUG("IH: D3 vblank\n");
7929
7930                                 break;
7931                         case 1: /* D3 vline */
7932                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7933                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7934
7935                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7936                                 DRM_DEBUG("IH: D3 vline\n");
7937
7938                                 break;
7939                         default:
7940                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7941                                 break;
7942                         }
7943                         break;
7944                 case 4: /* D4 vblank/vline */
7945                         switch (src_data) {
7946                         case 0: /* D4 vblank */
7947                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7948                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7949
7950                                 if (rdev->irq.crtc_vblank_int[3]) {
7951                                         drm_handle_vblank(rdev->ddev, 3);
7952                                         rdev->pm.vblank_sync = true;
7953                                         wake_up(&rdev->irq.vblank_queue);
7954                                 }
7955                                 if (atomic_read(&rdev->irq.pflip[3]))
7956                                         radeon_crtc_handle_vblank(rdev, 3);
7957                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7958                                 DRM_DEBUG("IH: D4 vblank\n");
7959
7960                                 break;
7961                         case 1: /* D4 vline */
7962                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7963                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7964
7965                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7966                                 DRM_DEBUG("IH: D4 vline\n");
7967
7968                                 break;
7969                         default:
7970                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7971                                 break;
7972                         }
7973                         break;
7974                 case 5: /* D5 vblank/vline */
7975                         switch (src_data) {
7976                         case 0: /* D5 vblank */
7977                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7978                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7979
7980                                 if (rdev->irq.crtc_vblank_int[4]) {
7981                                         drm_handle_vblank(rdev->ddev, 4);
7982                                         rdev->pm.vblank_sync = true;
7983                                         wake_up(&rdev->irq.vblank_queue);
7984                                 }
7985                                 if (atomic_read(&rdev->irq.pflip[4]))
7986                                         radeon_crtc_handle_vblank(rdev, 4);
7987                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7988                                 DRM_DEBUG("IH: D5 vblank\n");
7989
7990                                 break;
7991                         case 1: /* D5 vline */
7992                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7993                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7994
7995                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7996                                 DRM_DEBUG("IH: D5 vline\n");
7997
7998                                 break;
7999                         default:
8000                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8001                                 break;
8002                         }
8003                         break;
8004                 case 6: /* D6 vblank/vline */
8005                         switch (src_data) {
8006                         case 0: /* D6 vblank */
8007                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
8008                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8009
8010                                 if (rdev->irq.crtc_vblank_int[5]) {
8011                                         drm_handle_vblank(rdev->ddev, 5);
8012                                         rdev->pm.vblank_sync = true;
8013                                         wake_up(&rdev->irq.vblank_queue);
8014                                 }
8015                                 if (atomic_read(&rdev->irq.pflip[5]))
8016                                         radeon_crtc_handle_vblank(rdev, 5);
8017                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8018                                 DRM_DEBUG("IH: D6 vblank\n");
8019
8020                                 break;
8021                         case 1: /* D6 vline */
8022                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
8023                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8024
8025                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8026                                 DRM_DEBUG("IH: D6 vline\n");
8027
8028                                 break;
8029                         default:
8030                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8031                                 break;
8032                         }
8033                         break;
8034                 case 8: /* D1 page flip */
8035                 case 10: /* D2 page flip */
8036                 case 12: /* D3 page flip */
8037                 case 14: /* D4 page flip */
8038                 case 16: /* D5 page flip */
8039                 case 18: /* D6 page flip */
8040                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8041                         if (radeon_use_pflipirq > 0)
8042                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8043                         break;
8044                 case 42: /* HPD hotplug */
8045                         switch (src_data) {
8046                         case 0:
8047                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
8048                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8049
8050                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8051                                 queue_hotplug = true;
8052                                 DRM_DEBUG("IH: HPD1\n");
8053
8054                                 break;
8055                         case 1:
8056                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
8057                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8058
8059                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8060                                 queue_hotplug = true;
8061                                 DRM_DEBUG("IH: HPD2\n");
8062
8063                                 break;
8064                         case 2:
8065                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
8066                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8067
8068                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8069                                 queue_hotplug = true;
8070                                 DRM_DEBUG("IH: HPD3\n");
8071
8072                                 break;
8073                         case 3:
8074                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
8075                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8076
8077                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8078                                 queue_hotplug = true;
8079                                 DRM_DEBUG("IH: HPD4\n");
8080
8081                                 break;
8082                         case 4:
8083                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
8084                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8085
8086                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8087                                 queue_hotplug = true;
8088                                 DRM_DEBUG("IH: HPD5\n");
8089
8090                                 break;
8091                         case 5:
8092                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
8093                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8094
8095                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8096                                 queue_hotplug = true;
8097                                 DRM_DEBUG("IH: HPD6\n");
8098
8099                                 break;
8100                         case 6:
8101                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
8102                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8103
8104                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8105                                 queue_dp = true;
8106                                 DRM_DEBUG("IH: HPD_RX 1\n");
8107
8108                                 break;
8109                         case 7:
8110                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
8111                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8112
8113                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8114                                 queue_dp = true;
8115                                 DRM_DEBUG("IH: HPD_RX 2\n");
8116
8117                                 break;
8118                         case 8:
8119                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
8120                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8121
8122                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8123                                 queue_dp = true;
8124                                 DRM_DEBUG("IH: HPD_RX 3\n");
8125
8126                                 break;
8127                         case 9:
8128                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
8129                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8130
8131                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8132                                 queue_dp = true;
8133                                 DRM_DEBUG("IH: HPD_RX 4\n");
8134
8135                                 break;
8136                         case 10:
8137                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
8138                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8139
8140                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8141                                 queue_dp = true;
8142                                 DRM_DEBUG("IH: HPD_RX 5\n");
8143
8144                                 break;
8145                         case 11:
8146                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
8147                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8148
8149                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8150                                 queue_dp = true;
8151                                 DRM_DEBUG("IH: HPD_RX 6\n");
8152
8153                                 break;
8154                         default:
8155                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8156                                 break;
8157                         }
8158                         break;
8159                 case 96:
8160                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8161                         WREG32(SRBM_INT_ACK, 0x1);
8162                         break;
8163                 case 124: /* UVD */
8164                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8165                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8166                         break;
8167                 case 146:
8168                 case 147:
8169                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8170                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8171                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8172                         /* reset addr and status */
8173                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8174                         if (addr == 0x0 && status == 0x0)
8175                                 break;
8176                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8177                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8178                                 addr);
8179                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8180                                 status);
8181                         cik_vm_decode_fault(rdev, status, addr, mc_client);
8182                         break;
8183                 case 167: /* VCE */
8184                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8185                         switch (src_data) {
8186                         case 0:
8187                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8188                                 break;
8189                         case 1:
8190                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8191                                 break;
8192                         default:
8193                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8194                                 break;
8195                         }
8196                         break;
8197                 case 176: /* GFX RB CP_INT */
8198                 case 177: /* GFX IB CP_INT */
8199                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8200                         break;
8201                 case 181: /* CP EOP event */
8202                         DRM_DEBUG("IH: CP EOP\n");
8203                         /* XXX check the bitfield order! */
8204                         me_id = (ring_id & 0x60) >> 5;
8205                         pipe_id = (ring_id & 0x18) >> 3;
8206                         queue_id = (ring_id & 0x7) >> 0;
8207                         switch (me_id) {
8208                         case 0:
8209                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8210                                 break;
8211                         case 1:
8212                         case 2:
8213                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8214                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8215                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8216                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8217                                 break;
8218                         }
8219                         break;
8220                 case 184: /* CP Privileged reg access */
8221                         DRM_ERROR("Illegal register access in command stream\n");
8222                         /* XXX check the bitfield order! */
8223                         me_id = (ring_id & 0x60) >> 5;
8224                         pipe_id = (ring_id & 0x18) >> 3;
8225                         queue_id = (ring_id & 0x7) >> 0;
8226                         switch (me_id) {
8227                         case 0:
8228                                 /* This results in a full GPU reset, but all we need to do is soft
8229                                  * reset the CP for gfx
8230                                  */
8231                                 queue_reset = true;
8232                                 break;
8233                         case 1:
8234                                 /* XXX compute */
8235                                 queue_reset = true;
8236                                 break;
8237                         case 2:
8238                                 /* XXX compute */
8239                                 queue_reset = true;
8240                                 break;
8241                         }
8242                         break;
8243                 case 185: /* CP Privileged inst */
8244                         DRM_ERROR("Illegal instruction in command stream\n");
8245                         /* XXX check the bitfield order! */
8246                         me_id = (ring_id & 0x60) >> 5;
8247                         pipe_id = (ring_id & 0x18) >> 3;
8248                         queue_id = (ring_id & 0x7) >> 0;
8249                         switch (me_id) {
8250                         case 0:
8251                                 /* This results in a full GPU reset, but all we need to do is soft
8252                                  * reset the CP for gfx
8253                                  */
8254                                 queue_reset = true;
8255                                 break;
8256                         case 1:
8257                                 /* XXX compute */
8258                                 queue_reset = true;
8259                                 break;
8260                         case 2:
8261                                 /* XXX compute */
8262                                 queue_reset = true;
8263                                 break;
8264                         }
8265                         break;
8266                 case 224: /* SDMA trap event */
8267                         /* XXX check the bitfield order! */
8268                         me_id = (ring_id & 0x3) >> 0;
8269                         queue_id = (ring_id & 0xc) >> 2;
8270                         DRM_DEBUG("IH: SDMA trap\n");
8271                         switch (me_id) {
8272                         case 0:
8273                                 switch (queue_id) {
8274                                 case 0:
8275                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8276                                         break;
8277                                 case 1:
8278                                         /* XXX compute */
8279                                         break;
8280                                 case 2:
8281                                         /* XXX compute */
8282                                         break;
8283                                 }
8284                                 break;
8285                         case 1:
8286                                 switch (queue_id) {
8287                                 case 0:
8288                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8289                                         break;
8290                                 case 1:
8291                                         /* XXX compute */
8292                                         break;
8293                                 case 2:
8294                                         /* XXX compute */
8295                                         break;
8296                                 }
8297                                 break;
8298                         }
8299                         break;
8300                 case 230: /* thermal low to high */
8301                         DRM_DEBUG("IH: thermal low to high\n");
8302                         rdev->pm.dpm.thermal.high_to_low = false;
8303                         queue_thermal = true;
8304                         break;
8305                 case 231: /* thermal high to low */
8306                         DRM_DEBUG("IH: thermal high to low\n");
8307                         rdev->pm.dpm.thermal.high_to_low = true;
8308                         queue_thermal = true;
8309                         break;
8310                 case 233: /* GUI IDLE */
8311                         DRM_DEBUG("IH: GUI idle\n");
8312                         break;
8313                 case 241: /* SDMA Privileged inst */
8314                 case 247: /* SDMA Privileged inst */
8315                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8316                         /* XXX check the bitfield order! */
8317                         me_id = (ring_id & 0x3) >> 0;
8318                         queue_id = (ring_id & 0xc) >> 2;
8319                         switch (me_id) {
8320                         case 0:
8321                                 switch (queue_id) {
8322                                 case 0:
8323                                         queue_reset = true;
8324                                         break;
8325                                 case 1:
8326                                         /* XXX compute */
8327                                         queue_reset = true;
8328                                         break;
8329                                 case 2:
8330                                         /* XXX compute */
8331                                         queue_reset = true;
8332                                         break;
8333                                 }
8334                                 break;
8335                         case 1:
8336                                 switch (queue_id) {
8337                                 case 0:
8338                                         queue_reset = true;
8339                                         break;
8340                                 case 1:
8341                                         /* XXX compute */
8342                                         queue_reset = true;
8343                                         break;
8344                                 case 2:
8345                                         /* XXX compute */
8346                                         queue_reset = true;
8347                                         break;
8348                                 }
8349                                 break;
8350                         }
8351                         break;
8352                 default:
8353                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8354                         break;
8355                 }
8356
8357                 /* wptr/rptr are in bytes! */
8358                 rptr += 16;
8359                 rptr &= rdev->ih.ptr_mask;
8360                 WREG32(IH_RB_RPTR, rptr);
8361         }
8362         if (queue_dp)
8363                 schedule_work(&rdev->dp_work);
8364         if (queue_hotplug)
8365                 schedule_delayed_work(&rdev->hotplug_work, 0);
8366         if (queue_reset) {
8367                 rdev->needs_reset = true;
8368                 wake_up_all(&rdev->fence_queue);
8369         }
8370         if (queue_thermal)
8371                 schedule_work(&rdev->pm.dpm.thermal.work);
8372         rdev->ih.rptr = rptr;
8373         atomic_set(&rdev->ih.lock, 0);
8374
8375         /* make sure wptr hasn't changed while processing */
8376         wptr = cik_get_ih_wptr(rdev);
8377         if (wptr != rptr)
8378                 goto restart_ih;
8379
8380         return IRQ_HANDLED;
8381 }
8382
8383 /*
8384  * startup/shutdown callbacks
8385  */
8386 /**
8387  * cik_startup - program the asic to a functional state
8388  *
8389  * @rdev: radeon_device pointer
8390  *
8391  * Programs the asic to a functional state (CIK).
8392  * Called by cik_init() and cik_resume().
8393  * Returns 0 for success, error for failure.
8394  */
8395 static int cik_startup(struct radeon_device *rdev)
8396 {
8397         struct radeon_ring *ring;
8398         u32 nop;
8399         int r;
8400
8401         /* enable pcie gen2/3 link */
8402         cik_pcie_gen3_enable(rdev);
8403         /* enable aspm */
8404         cik_program_aspm(rdev);
8405
8406         /* scratch needs to be initialized before MC */
8407         r = r600_vram_scratch_init(rdev);
8408         if (r)
8409                 return r;
8410
8411         cik_mc_program(rdev);
8412
8413         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8414                 r = ci_mc_load_microcode(rdev);
8415                 if (r) {
8416                         DRM_ERROR("Failed to load MC firmware!\n");
8417                         return r;
8418                 }
8419         }
8420
8421         r = cik_pcie_gart_enable(rdev);
8422         if (r)
8423                 return r;
8424         cik_gpu_init(rdev);
8425
8426         /* allocate rlc buffers */
8427         if (rdev->flags & RADEON_IS_IGP) {
8428                 if (rdev->family == CHIP_KAVERI) {
8429                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8430                         rdev->rlc.reg_list_size =
8431                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8432                 } else {
8433                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8434                         rdev->rlc.reg_list_size =
8435                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8436                 }
8437         }
8438         rdev->rlc.cs_data = ci_cs_data;
8439         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8440         r = sumo_rlc_init(rdev);
8441         if (r) {
8442                 DRM_ERROR("Failed to init rlc BOs!\n");
8443                 return r;
8444         }
8445
8446         /* allocate wb buffer */
8447         r = radeon_wb_init(rdev);
8448         if (r)
8449                 return r;
8450
8451         /* allocate mec buffers */
8452         r = cik_mec_init(rdev);
8453         if (r) {
8454                 DRM_ERROR("Failed to init MEC BOs!\n");
8455                 return r;
8456         }
8457
8458         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8459         if (r) {
8460                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8461                 return r;
8462         }
8463
8464         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8465         if (r) {
8466                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8467                 return r;
8468         }
8469
8470         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8471         if (r) {
8472                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8473                 return r;
8474         }
8475
8476         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8477         if (r) {
8478                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8479                 return r;
8480         }
8481
8482         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8483         if (r) {
8484                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8485                 return r;
8486         }
8487
8488         r = radeon_uvd_resume(rdev);
8489         if (!r) {
8490                 r = uvd_v4_2_resume(rdev);
8491                 if (!r) {
8492                         r = radeon_fence_driver_start_ring(rdev,
8493                                                            R600_RING_TYPE_UVD_INDEX);
8494                         if (r)
8495                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8496                 }
8497         }
8498         if (r)
8499                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8500
8501         r = radeon_vce_resume(rdev);
8502         if (!r) {
8503                 r = vce_v2_0_resume(rdev);
8504                 if (!r)
8505                         r = radeon_fence_driver_start_ring(rdev,
8506                                                            TN_RING_TYPE_VCE1_INDEX);
8507                 if (!r)
8508                         r = radeon_fence_driver_start_ring(rdev,
8509                                                            TN_RING_TYPE_VCE2_INDEX);
8510         }
8511         if (r) {
8512                 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8513                 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8514                 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8515         }
8516
8517         /* Enable IRQ */
8518         if (!rdev->irq.installed) {
8519                 r = radeon_irq_kms_init(rdev);
8520                 if (r)
8521                         return r;
8522         }
8523
8524         r = cik_irq_init(rdev);
8525         if (r) {
8526                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8527                 radeon_irq_kms_fini(rdev);
8528                 return r;
8529         }
8530         cik_irq_set(rdev);
8531
8532         if (rdev->family == CHIP_HAWAII) {
8533                 if (rdev->new_fw)
8534                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8535                 else
8536                         nop = RADEON_CP_PACKET2;
8537         } else {
8538                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8539         }
8540
8541         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8542         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8543                              nop);
8544         if (r)
8545                 return r;
8546
8547         /* set up the compute queues */
8548         /* type-2 packets are deprecated on MEC, use type-3 instead */
8549         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8550         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8551                              nop);
8552         if (r)
8553                 return r;
8554         ring->me = 1; /* first MEC */
8555         ring->pipe = 0; /* first pipe */
8556         ring->queue = 0; /* first queue */
8557         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8558
8559         /* type-2 packets are deprecated on MEC, use type-3 instead */
8560         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8561         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8562                              nop);
8563         if (r)
8564                 return r;
8565         /* dGPU only have 1 MEC */
8566         ring->me = 1; /* first MEC */
8567         ring->pipe = 0; /* first pipe */
8568         ring->queue = 1; /* second queue */
8569         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8570
8571         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8572         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8573                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8574         if (r)
8575                 return r;
8576
8577         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8578         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8579                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8580         if (r)
8581                 return r;
8582
8583         r = cik_cp_resume(rdev);
8584         if (r)
8585                 return r;
8586
8587         r = cik_sdma_resume(rdev);
8588         if (r)
8589                 return r;
8590
8591         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8592         if (ring->ring_size) {
8593                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8594                                      RADEON_CP_PACKET2);
8595                 if (!r)
8596                         r = uvd_v1_0_init(rdev);
8597                 if (r)
8598                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8599         }
8600
8601         r = -ENOENT;
8602
8603         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8604         if (ring->ring_size)
8605                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8606                                      VCE_CMD_NO_OP);
8607
8608         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8609         if (ring->ring_size)
8610                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8611                                      VCE_CMD_NO_OP);
8612
8613         if (!r)
8614                 r = vce_v1_0_init(rdev);
8615         else if (r != -ENOENT)
8616                 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8617
8618         r = radeon_ib_pool_init(rdev);
8619         if (r) {
8620                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8621                 return r;
8622         }
8623
8624         r = radeon_vm_manager_init(rdev);
8625         if (r) {
8626                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8627                 return r;
8628         }
8629
8630         r = radeon_audio_init(rdev);
8631         if (r)
8632                 return r;
8633
8634         r = radeon_kfd_resume(rdev);
8635         if (r)
8636                 return r;
8637
8638         return 0;
8639 }
8640
8641 /**
8642  * cik_resume - resume the asic to a functional state
8643  *
8644  * @rdev: radeon_device pointer
8645  *
8646  * Programs the asic to a functional state (CIK).
8647  * Called at resume.
8648  * Returns 0 for success, error for failure.
8649  */
8650 int cik_resume(struct radeon_device *rdev)
8651 {
8652         int r;
8653
8654         /* post card */
8655         atom_asic_init(rdev->mode_info.atom_context);
8656
8657         /* init golden registers */
8658         cik_init_golden_registers(rdev);
8659
8660         if (rdev->pm.pm_method == PM_METHOD_DPM)
8661                 radeon_pm_resume(rdev);
8662
8663         rdev->accel_working = true;
8664         r = cik_startup(rdev);
8665         if (r) {
8666                 DRM_ERROR("cik startup failed on resume\n");
8667                 rdev->accel_working = false;
8668                 return r;
8669         }
8670
8671         return r;
8672
8673 }
8674
8675 /**
8676  * cik_suspend - suspend the asic
8677  *
8678  * @rdev: radeon_device pointer
8679  *
8680  * Bring the chip into a state suitable for suspend (CIK).
8681  * Called at suspend.
8682  * Returns 0 for success.
8683  */
8684 int cik_suspend(struct radeon_device *rdev)
8685 {
8686         radeon_kfd_suspend(rdev);
8687         radeon_pm_suspend(rdev);
8688         radeon_audio_fini(rdev);
8689         radeon_vm_manager_fini(rdev);
8690         cik_cp_enable(rdev, false);
8691         cik_sdma_enable(rdev, false);
8692         uvd_v1_0_fini(rdev);
8693         radeon_uvd_suspend(rdev);
8694         radeon_vce_suspend(rdev);
8695         cik_fini_pg(rdev);
8696         cik_fini_cg(rdev);
8697         cik_irq_suspend(rdev);
8698         radeon_wb_disable(rdev);
8699         cik_pcie_gart_disable(rdev);
8700         return 0;
8701 }
8702
8703 /* Plan is to move initialization in that function and use
8704  * helper function so that radeon_device_init pretty much
8705  * do nothing more than calling asic specific function. This
8706  * should also allow to remove a bunch of callback function
8707  * like vram_info.
8708  */
8709 /**
8710  * cik_init - asic specific driver and hw init
8711  *
8712  * @rdev: radeon_device pointer
8713  *
8714  * Setup asic specific driver variables and program the hw
8715  * to a functional state (CIK).
8716  * Called at driver startup.
8717  * Returns 0 for success, errors for failure.
8718  */
8719 int cik_init(struct radeon_device *rdev)
8720 {
8721         struct radeon_ring *ring;
8722         int r;
8723
8724         /* Read BIOS */
8725         if (!radeon_get_bios(rdev)) {
8726                 if (ASIC_IS_AVIVO(rdev))
8727                         return -EINVAL;
8728         }
8729         /* Must be an ATOMBIOS */
8730         if (!rdev->is_atom_bios) {
8731                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8732                 return -EINVAL;
8733         }
8734         r = radeon_atombios_init(rdev);
8735         if (r)
8736                 return r;
8737
8738         /* Post card if necessary */
8739         if (!radeon_card_posted(rdev)) {
8740                 if (!rdev->bios) {
8741                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8742                         return -EINVAL;
8743                 }
8744                 DRM_INFO("GPU not posted. posting now...\n");
8745                 atom_asic_init(rdev->mode_info.atom_context);
8746         }
8747         /* init golden registers */
8748         cik_init_golden_registers(rdev);
8749         /* Initialize scratch registers */
8750         cik_scratch_init(rdev);
8751         /* Initialize surface registers */
8752         radeon_surface_init(rdev);
8753         /* Initialize clocks */
8754         radeon_get_clock_info(rdev->ddev);
8755
8756         /* Fence driver */
8757         r = radeon_fence_driver_init(rdev);
8758         if (r)
8759                 return r;
8760
8761         /* initialize memory controller */
8762         r = cik_mc_init(rdev);
8763         if (r)
8764                 return r;
8765         /* Memory manager */
8766         r = radeon_bo_init(rdev);
8767         if (r)
8768                 return r;
8769
8770         if (rdev->flags & RADEON_IS_IGP) {
8771                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8772                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8773                         r = cik_init_microcode(rdev);
8774                         if (r) {
8775                                 DRM_ERROR("Failed to load firmware!\n");
8776                                 return r;
8777                         }
8778                 }
8779         } else {
8780                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8781                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8782                     !rdev->mc_fw) {
8783                         r = cik_init_microcode(rdev);
8784                         if (r) {
8785                                 DRM_ERROR("Failed to load firmware!\n");
8786                                 return r;
8787                         }
8788                 }
8789         }
8790
8791         /* Initialize power management */
8792         radeon_pm_init(rdev);
8793
8794         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8795         ring->ring_obj = NULL;
8796         r600_ring_init(rdev, ring, 1024 * 1024);
8797
8798         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8799         ring->ring_obj = NULL;
8800         r600_ring_init(rdev, ring, 1024 * 1024);
8801         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8802         if (r)
8803                 return r;
8804
8805         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8806         ring->ring_obj = NULL;
8807         r600_ring_init(rdev, ring, 1024 * 1024);
8808         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8809         if (r)
8810                 return r;
8811
8812         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8813         ring->ring_obj = NULL;
8814         r600_ring_init(rdev, ring, 256 * 1024);
8815
8816         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8817         ring->ring_obj = NULL;
8818         r600_ring_init(rdev, ring, 256 * 1024);
8819
8820         r = radeon_uvd_init(rdev);
8821         if (!r) {
8822                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8823                 ring->ring_obj = NULL;
8824                 r600_ring_init(rdev, ring, 4096);
8825         }
8826
8827         r = radeon_vce_init(rdev);
8828         if (!r) {
8829                 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8830                 ring->ring_obj = NULL;
8831                 r600_ring_init(rdev, ring, 4096);
8832
8833                 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8834                 ring->ring_obj = NULL;
8835                 r600_ring_init(rdev, ring, 4096);
8836         }
8837
8838         rdev->ih.ring_obj = NULL;
8839         r600_ih_ring_init(rdev, 64 * 1024);
8840
8841         r = r600_pcie_gart_init(rdev);
8842         if (r)
8843                 return r;
8844
8845         rdev->accel_working = true;
8846         r = cik_startup(rdev);
8847         if (r) {
8848                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8849                 cik_cp_fini(rdev);
8850                 cik_sdma_fini(rdev);
8851                 cik_irq_fini(rdev);
8852                 sumo_rlc_fini(rdev);
8853                 cik_mec_fini(rdev);
8854                 radeon_wb_fini(rdev);
8855                 radeon_ib_pool_fini(rdev);
8856                 radeon_vm_manager_fini(rdev);
8857                 radeon_irq_kms_fini(rdev);
8858                 cik_pcie_gart_fini(rdev);
8859                 rdev->accel_working = false;
8860         }
8861
8862         /* Don't start up if the MC ucode is missing.
8863          * The default clocks and voltages before the MC ucode
8864          * is loaded are not suffient for advanced operations.
8865          */
8866         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8867                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8868                 return -EINVAL;
8869         }
8870
8871         return 0;
8872 }
8873
8874 /**
8875  * cik_fini - asic specific driver and hw fini
8876  *
8877  * @rdev: radeon_device pointer
8878  *
8879  * Tear down the asic specific driver variables and program the hw
8880  * to an idle state (CIK).
8881  * Called at driver unload.
8882  */
8883 void cik_fini(struct radeon_device *rdev)
8884 {
8885         radeon_pm_fini(rdev);
8886         cik_cp_fini(rdev);
8887         cik_sdma_fini(rdev);
8888         cik_fini_pg(rdev);
8889         cik_fini_cg(rdev);
8890         cik_irq_fini(rdev);
8891         sumo_rlc_fini(rdev);
8892         cik_mec_fini(rdev);
8893         radeon_wb_fini(rdev);
8894         radeon_vm_manager_fini(rdev);
8895         radeon_ib_pool_fini(rdev);
8896         radeon_irq_kms_fini(rdev);
8897         uvd_v1_0_fini(rdev);
8898         radeon_uvd_fini(rdev);
8899         radeon_vce_fini(rdev);
8900         cik_pcie_gart_fini(rdev);
8901         r600_vram_scratch_fini(rdev);
8902         radeon_gem_fini(rdev);
8903         radeon_fence_driver_fini(rdev);
8904         radeon_bo_fini(rdev);
8905         radeon_atombios_fini(rdev);
8906         kfree(rdev->bios);
8907         rdev->bios = NULL;
8908 }
8909
8910 void dce8_program_fmt(struct drm_encoder *encoder)
8911 {
8912         struct drm_device *dev = encoder->dev;
8913         struct radeon_device *rdev = dev->dev_private;
8914         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8915         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8916         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8917         int bpc = 0;
8918         u32 tmp = 0;
8919         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8920
8921         if (connector) {
8922                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8923                 bpc = radeon_get_monitor_bpc(connector);
8924                 dither = radeon_connector->dither;
8925         }
8926
8927         /* LVDS/eDP FMT is set up by atom */
8928         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8929                 return;
8930
8931         /* not needed for analog */
8932         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8933             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8934                 return;
8935
8936         if (bpc == 0)
8937                 return;
8938
8939         switch (bpc) {
8940         case 6:
8941                 if (dither == RADEON_FMT_DITHER_ENABLE)
8942                         /* XXX sort out optimal dither settings */
8943                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8944                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8945                 else
8946                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8947                 break;
8948         case 8:
8949                 if (dither == RADEON_FMT_DITHER_ENABLE)
8950                         /* XXX sort out optimal dither settings */
8951                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8952                                 FMT_RGB_RANDOM_ENABLE |
8953                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8954                 else
8955                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8956                 break;
8957         case 10:
8958                 if (dither == RADEON_FMT_DITHER_ENABLE)
8959                         /* XXX sort out optimal dither settings */
8960                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8961                                 FMT_RGB_RANDOM_ENABLE |
8962                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8963                 else
8964                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8965                 break;
8966         default:
8967                 /* not needed */
8968                 break;
8969         }
8970
8971         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8972 }
8973
8974 /* display watermark setup */
8975 /**
8976  * dce8_line_buffer_adjust - Set up the line buffer
8977  *
8978  * @rdev: radeon_device pointer
8979  * @radeon_crtc: the selected display controller
8980  * @mode: the current display mode on the selected display
8981  * controller
8982  *
8983  * Setup up the line buffer allocation for
8984  * the selected display controller (CIK).
8985  * Returns the line buffer size in pixels.
8986  */
8987 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8988                                    struct radeon_crtc *radeon_crtc,
8989                                    struct drm_display_mode *mode)
8990 {
8991         u32 tmp, buffer_alloc, i;
8992         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8993         /*
8994          * Line Buffer Setup
8995          * There are 6 line buffers, one for each display controllers.
8996          * There are 3 partitions per LB. Select the number of partitions
8997          * to enable based on the display width.  For display widths larger
8998          * than 4096, you need use to use 2 display controllers and combine
8999          * them using the stereo blender.
9000          */
9001         if (radeon_crtc->base.enabled && mode) {
9002                 if (mode->crtc_hdisplay < 1920) {
9003                         tmp = 1;
9004                         buffer_alloc = 2;
9005                 } else if (mode->crtc_hdisplay < 2560) {
9006                         tmp = 2;
9007                         buffer_alloc = 2;
9008                 } else if (mode->crtc_hdisplay < 4096) {
9009                         tmp = 0;
9010                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9011                 } else {
9012                         DRM_DEBUG_KMS("Mode too big for LB!\n");
9013                         tmp = 0;
9014                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9015                 }
9016         } else {
9017                 tmp = 1;
9018                 buffer_alloc = 0;
9019         }
9020
9021         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9022                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9023
9024         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9025                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9026         for (i = 0; i < rdev->usec_timeout; i++) {
9027                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9028                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
9029                         break;
9030                 udelay(1);
9031         }
9032
9033         if (radeon_crtc->base.enabled && mode) {
9034                 switch (tmp) {
9035                 case 0:
9036                 default:
9037                         return 4096 * 2;
9038                 case 1:
9039                         return 1920 * 2;
9040                 case 2:
9041                         return 2560 * 2;
9042                 }
9043         }
9044
9045         /* controller not enabled, so no lb used */
9046         return 0;
9047 }
9048
9049 /**
9050  * cik_get_number_of_dram_channels - get the number of dram channels
9051  *
9052  * @rdev: radeon_device pointer
9053  *
9054  * Look up the number of video ram channels (CIK).
9055  * Used for display watermark bandwidth calculations
9056  * Returns the number of dram channels
9057  */
9058 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9059 {
9060         u32 tmp = RREG32(MC_SHARED_CHMAP);
9061
9062         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9063         case 0:
9064         default:
9065                 return 1;
9066         case 1:
9067                 return 2;
9068         case 2:
9069                 return 4;
9070         case 3:
9071                 return 8;
9072         case 4:
9073                 return 3;
9074         case 5:
9075                 return 6;
9076         case 6:
9077                 return 10;
9078         case 7:
9079                 return 12;
9080         case 8:
9081                 return 16;
9082         }
9083 }
9084
9085 struct dce8_wm_params {
9086         u32 dram_channels; /* number of dram channels */
9087         u32 yclk;          /* bandwidth per dram data pin in kHz */
9088         u32 sclk;          /* engine clock in kHz */
9089         u32 disp_clk;      /* display clock in kHz */
9090         u32 src_width;     /* viewport width */
9091         u32 active_time;   /* active display time in ns */
9092         u32 blank_time;    /* blank time in ns */
9093         bool interlaced;    /* mode is interlaced */
9094         fixed20_12 vsc;    /* vertical scale ratio */
9095         u32 num_heads;     /* number of active crtcs */
9096         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9097         u32 lb_size;       /* line buffer allocated to pipe */
9098         u32 vtaps;         /* vertical scaler taps */
9099 };
9100
9101 /**
9102  * dce8_dram_bandwidth - get the dram bandwidth
9103  *
9104  * @wm: watermark calculation data
9105  *
9106  * Calculate the raw dram bandwidth (CIK).
9107  * Used for display watermark bandwidth calculations
9108  * Returns the dram bandwidth in MBytes/s
9109  */
9110 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9111 {
9112         /* Calculate raw DRAM Bandwidth */
9113         fixed20_12 dram_efficiency; /* 0.7 */
9114         fixed20_12 yclk, dram_channels, bandwidth;
9115         fixed20_12 a;
9116
9117         a.full = dfixed_const(1000);
9118         yclk.full = dfixed_const(wm->yclk);
9119         yclk.full = dfixed_div(yclk, a);
9120         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9121         a.full = dfixed_const(10);
9122         dram_efficiency.full = dfixed_const(7);
9123         dram_efficiency.full = dfixed_div(dram_efficiency, a);
9124         bandwidth.full = dfixed_mul(dram_channels, yclk);
9125         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9126
9127         return dfixed_trunc(bandwidth);
9128 }
9129
9130 /**
9131  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9132  *
9133  * @wm: watermark calculation data
9134  *
9135  * Calculate the dram bandwidth used for display (CIK).
9136  * Used for display watermark bandwidth calculations
9137  * Returns the dram bandwidth for display in MBytes/s
9138  */
9139 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9140 {
9141         /* Calculate DRAM Bandwidth and the part allocated to display. */
9142         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9143         fixed20_12 yclk, dram_channels, bandwidth;
9144         fixed20_12 a;
9145
9146         a.full = dfixed_const(1000);
9147         yclk.full = dfixed_const(wm->yclk);
9148         yclk.full = dfixed_div(yclk, a);
9149         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9150         a.full = dfixed_const(10);
9151         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9152         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9153         bandwidth.full = dfixed_mul(dram_channels, yclk);
9154         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9155
9156         return dfixed_trunc(bandwidth);
9157 }
9158
9159 /**
9160  * dce8_data_return_bandwidth - get the data return bandwidth
9161  *
9162  * @wm: watermark calculation data
9163  *
9164  * Calculate the data return bandwidth used for display (CIK).
9165  * Used for display watermark bandwidth calculations
9166  * Returns the data return bandwidth in MBytes/s
9167  */
9168 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9169 {
9170         /* Calculate the display Data return Bandwidth */
9171         fixed20_12 return_efficiency; /* 0.8 */
9172         fixed20_12 sclk, bandwidth;
9173         fixed20_12 a;
9174
9175         a.full = dfixed_const(1000);
9176         sclk.full = dfixed_const(wm->sclk);
9177         sclk.full = dfixed_div(sclk, a);
9178         a.full = dfixed_const(10);
9179         return_efficiency.full = dfixed_const(8);
9180         return_efficiency.full = dfixed_div(return_efficiency, a);
9181         a.full = dfixed_const(32);
9182         bandwidth.full = dfixed_mul(a, sclk);
9183         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9184
9185         return dfixed_trunc(bandwidth);
9186 }
9187
9188 /**
9189  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9190  *
9191  * @wm: watermark calculation data
9192  *
9193  * Calculate the dmif bandwidth used for display (CIK).
9194  * Used for display watermark bandwidth calculations
9195  * Returns the dmif bandwidth in MBytes/s
9196  */
9197 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9198 {
9199         /* Calculate the DMIF Request Bandwidth */
9200         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9201         fixed20_12 disp_clk, bandwidth;
9202         fixed20_12 a, b;
9203
9204         a.full = dfixed_const(1000);
9205         disp_clk.full = dfixed_const(wm->disp_clk);
9206         disp_clk.full = dfixed_div(disp_clk, a);
9207         a.full = dfixed_const(32);
9208         b.full = dfixed_mul(a, disp_clk);
9209
9210         a.full = dfixed_const(10);
9211         disp_clk_request_efficiency.full = dfixed_const(8);
9212         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9213
9214         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9215
9216         return dfixed_trunc(bandwidth);
9217 }
9218
9219 /**
9220  * dce8_available_bandwidth - get the min available bandwidth
9221  *
9222  * @wm: watermark calculation data
9223  *
9224  * Calculate the min available bandwidth used for display (CIK).
9225  * Used for display watermark bandwidth calculations
9226  * Returns the min available bandwidth in MBytes/s
9227  */
9228 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9229 {
9230         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9231         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9232         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9233         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9234
9235         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9236 }
9237
9238 /**
9239  * dce8_average_bandwidth - get the average available bandwidth
9240  *
9241  * @wm: watermark calculation data
9242  *
9243  * Calculate the average available bandwidth used for display (CIK).
9244  * Used for display watermark bandwidth calculations
9245  * Returns the average available bandwidth in MBytes/s
9246  */
9247 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9248 {
9249         /* Calculate the display mode Average Bandwidth
9250          * DisplayMode should contain the source and destination dimensions,
9251          * timing, etc.
9252          */
9253         fixed20_12 bpp;
9254         fixed20_12 line_time;
9255         fixed20_12 src_width;
9256         fixed20_12 bandwidth;
9257         fixed20_12 a;
9258
9259         a.full = dfixed_const(1000);
9260         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9261         line_time.full = dfixed_div(line_time, a);
9262         bpp.full = dfixed_const(wm->bytes_per_pixel);
9263         src_width.full = dfixed_const(wm->src_width);
9264         bandwidth.full = dfixed_mul(src_width, bpp);
9265         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9266         bandwidth.full = dfixed_div(bandwidth, line_time);
9267
9268         return dfixed_trunc(bandwidth);
9269 }
9270
9271 /**
9272  * dce8_latency_watermark - get the latency watermark
9273  *
9274  * @wm: watermark calculation data
9275  *
9276  * Calculate the latency watermark (CIK).
9277  * Used for display watermark bandwidth calculations
9278  * Returns the latency watermark in ns
9279  */
9280 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9281 {
9282         /* First calculate the latency in ns */
9283         u32 mc_latency = 2000; /* 2000 ns. */
9284         u32 available_bandwidth = dce8_available_bandwidth(wm);
9285         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9286         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9287         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9288         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9289                 (wm->num_heads * cursor_line_pair_return_time);
9290         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9291         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9292         u32 tmp, dmif_size = 12288;
9293         fixed20_12 a, b, c;
9294
9295         if (wm->num_heads == 0)
9296                 return 0;
9297
9298         a.full = dfixed_const(2);
9299         b.full = dfixed_const(1);
9300         if ((wm->vsc.full > a.full) ||
9301             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9302             (wm->vtaps >= 5) ||
9303             ((wm->vsc.full >= a.full) && wm->interlaced))
9304                 max_src_lines_per_dst_line = 4;
9305         else
9306                 max_src_lines_per_dst_line = 2;
9307
9308         a.full = dfixed_const(available_bandwidth);
9309         b.full = dfixed_const(wm->num_heads);
9310         a.full = dfixed_div(a, b);
9311
9312         b.full = dfixed_const(mc_latency + 512);
9313         c.full = dfixed_const(wm->disp_clk);
9314         b.full = dfixed_div(b, c);
9315
9316         c.full = dfixed_const(dmif_size);
9317         b.full = dfixed_div(c, b);
9318
9319         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9320
9321         b.full = dfixed_const(1000);
9322         c.full = dfixed_const(wm->disp_clk);
9323         b.full = dfixed_div(c, b);
9324         c.full = dfixed_const(wm->bytes_per_pixel);
9325         b.full = dfixed_mul(b, c);
9326
9327         lb_fill_bw = min(tmp, dfixed_trunc(b));
9328
9329         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9330         b.full = dfixed_const(1000);
9331         c.full = dfixed_const(lb_fill_bw);
9332         b.full = dfixed_div(c, b);
9333         a.full = dfixed_div(a, b);
9334         line_fill_time = dfixed_trunc(a);
9335
9336         if (line_fill_time < wm->active_time)
9337                 return latency;
9338         else
9339                 return latency + (line_fill_time - wm->active_time);
9340
9341 }
9342
9343 /**
9344  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9345  * average and available dram bandwidth
9346  *
9347  * @wm: watermark calculation data
9348  *
9349  * Check if the display average bandwidth fits in the display
9350  * dram bandwidth (CIK).
9351  * Used for display watermark bandwidth calculations
9352  * Returns true if the display fits, false if not.
9353  */
9354 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9355 {
9356         if (dce8_average_bandwidth(wm) <=
9357             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9358                 return true;
9359         else
9360                 return false;
9361 }
9362
9363 /**
9364  * dce8_average_bandwidth_vs_available_bandwidth - check
9365  * average and available bandwidth
9366  *
9367  * @wm: watermark calculation data
9368  *
9369  * Check if the display average bandwidth fits in the display
9370  * available bandwidth (CIK).
9371  * Used for display watermark bandwidth calculations
9372  * Returns true if the display fits, false if not.
9373  */
9374 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9375 {
9376         if (dce8_average_bandwidth(wm) <=
9377             (dce8_available_bandwidth(wm) / wm->num_heads))
9378                 return true;
9379         else
9380                 return false;
9381 }
9382
9383 /**
9384  * dce8_check_latency_hiding - check latency hiding
9385  *
9386  * @wm: watermark calculation data
9387  *
9388  * Check latency hiding (CIK).
9389  * Used for display watermark bandwidth calculations
9390  * Returns true if the display fits, false if not.
9391  */
9392 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9393 {
9394         u32 lb_partitions = wm->lb_size / wm->src_width;
9395         u32 line_time = wm->active_time + wm->blank_time;
9396         u32 latency_tolerant_lines;
9397         u32 latency_hiding;
9398         fixed20_12 a;
9399
9400         a.full = dfixed_const(1);
9401         if (wm->vsc.full > a.full)
9402                 latency_tolerant_lines = 1;
9403         else {
9404                 if (lb_partitions <= (wm->vtaps + 1))
9405                         latency_tolerant_lines = 1;
9406                 else
9407                         latency_tolerant_lines = 2;
9408         }
9409
9410         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9411
9412         if (dce8_latency_watermark(wm) <= latency_hiding)
9413                 return true;
9414         else
9415                 return false;
9416 }
9417
9418 /**
9419  * dce8_program_watermarks - program display watermarks
9420  *
9421  * @rdev: radeon_device pointer
9422  * @radeon_crtc: the selected display controller
9423  * @lb_size: line buffer size
9424  * @num_heads: number of display controllers in use
9425  *
9426  * Calculate and program the display watermarks for the
9427  * selected display controller (CIK).
9428  */
9429 static void dce8_program_watermarks(struct radeon_device *rdev,
9430                                     struct radeon_crtc *radeon_crtc,
9431                                     u32 lb_size, u32 num_heads)
9432 {
9433         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9434         struct dce8_wm_params wm_low, wm_high;
9435         u32 pixel_period;
9436         u32 line_time = 0;
9437         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9438         u32 tmp, wm_mask;
9439
9440         if (radeon_crtc->base.enabled && num_heads && mode) {
9441                 pixel_period = 1000000 / (u32)mode->clock;
9442                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9443
9444                 /* watermark for high clocks */
9445                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9446                     rdev->pm.dpm_enabled) {
9447                         wm_high.yclk =
9448                                 radeon_dpm_get_mclk(rdev, false) * 10;
9449                         wm_high.sclk =
9450                                 radeon_dpm_get_sclk(rdev, false) * 10;
9451                 } else {
9452                         wm_high.yclk = rdev->pm.current_mclk * 10;
9453                         wm_high.sclk = rdev->pm.current_sclk * 10;
9454                 }
9455
9456                 wm_high.disp_clk = mode->clock;
9457                 wm_high.src_width = mode->crtc_hdisplay;
9458                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9459                 wm_high.blank_time = line_time - wm_high.active_time;
9460                 wm_high.interlaced = false;
9461                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9462                         wm_high.interlaced = true;
9463                 wm_high.vsc = radeon_crtc->vsc;
9464                 wm_high.vtaps = 1;
9465                 if (radeon_crtc->rmx_type != RMX_OFF)
9466                         wm_high.vtaps = 2;
9467                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9468                 wm_high.lb_size = lb_size;
9469                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9470                 wm_high.num_heads = num_heads;
9471
9472                 /* set for high clocks */
9473                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9474
9475                 /* possibly force display priority to high */
9476                 /* should really do this at mode validation time... */
9477                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9478                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9479                     !dce8_check_latency_hiding(&wm_high) ||
9480                     (rdev->disp_priority == 2)) {
9481                         DRM_DEBUG_KMS("force priority to high\n");
9482                 }
9483
9484                 /* watermark for low clocks */
9485                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9486                     rdev->pm.dpm_enabled) {
9487                         wm_low.yclk =
9488                                 radeon_dpm_get_mclk(rdev, true) * 10;
9489                         wm_low.sclk =
9490                                 radeon_dpm_get_sclk(rdev, true) * 10;
9491                 } else {
9492                         wm_low.yclk = rdev->pm.current_mclk * 10;
9493                         wm_low.sclk = rdev->pm.current_sclk * 10;
9494                 }
9495
9496                 wm_low.disp_clk = mode->clock;
9497                 wm_low.src_width = mode->crtc_hdisplay;
9498                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9499                 wm_low.blank_time = line_time - wm_low.active_time;
9500                 wm_low.interlaced = false;
9501                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9502                         wm_low.interlaced = true;
9503                 wm_low.vsc = radeon_crtc->vsc;
9504                 wm_low.vtaps = 1;
9505                 if (radeon_crtc->rmx_type != RMX_OFF)
9506                         wm_low.vtaps = 2;
9507                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9508                 wm_low.lb_size = lb_size;
9509                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9510                 wm_low.num_heads = num_heads;
9511
9512                 /* set for low clocks */
9513                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9514
9515                 /* possibly force display priority to high */
9516                 /* should really do this at mode validation time... */
9517                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9518                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9519                     !dce8_check_latency_hiding(&wm_low) ||
9520                     (rdev->disp_priority == 2)) {
9521                         DRM_DEBUG_KMS("force priority to high\n");
9522                 }
9523
9524                 /* Save number of lines the linebuffer leads before the scanout */
9525                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9526         }
9527
9528         /* select wm A */
9529         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9530         tmp = wm_mask;
9531         tmp &= ~LATENCY_WATERMARK_MASK(3);
9532         tmp |= LATENCY_WATERMARK_MASK(1);
9533         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9534         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9535                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9536                 LATENCY_HIGH_WATERMARK(line_time)));
9537         /* select wm B */
9538         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9539         tmp &= ~LATENCY_WATERMARK_MASK(3);
9540         tmp |= LATENCY_WATERMARK_MASK(2);
9541         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9542         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9543                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9544                 LATENCY_HIGH_WATERMARK(line_time)));
9545         /* restore original selection */
9546         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9547
9548         /* save values for DPM */
9549         radeon_crtc->line_time = line_time;
9550         radeon_crtc->wm_high = latency_watermark_a;
9551         radeon_crtc->wm_low = latency_watermark_b;
9552 }
9553
9554 /**
9555  * dce8_bandwidth_update - program display watermarks
9556  *
9557  * @rdev: radeon_device pointer
9558  *
9559  * Calculate and program the display watermarks and line
9560  * buffer allocation (CIK).
9561  */
9562 void dce8_bandwidth_update(struct radeon_device *rdev)
9563 {
9564         struct drm_display_mode *mode = NULL;
9565         u32 num_heads = 0, lb_size;
9566         int i;
9567
9568         if (!rdev->mode_info.mode_config_initialized)
9569                 return;
9570
9571         radeon_update_display_priority(rdev);
9572
9573         for (i = 0; i < rdev->num_crtc; i++) {
9574                 if (rdev->mode_info.crtcs[i]->base.enabled)
9575                         num_heads++;
9576         }
9577         for (i = 0; i < rdev->num_crtc; i++) {
9578                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9579                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9580                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9581         }
9582 }
9583
9584 /**
9585  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9586  *
9587  * @rdev: radeon_device pointer
9588  *
9589  * Fetches a GPU clock counter snapshot (SI).
9590  * Returns the 64 bit clock counter snapshot.
9591  */
9592 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9593 {
9594         uint64_t clock;
9595
9596         mutex_lock(&rdev->gpu_clock_mutex);
9597         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9598         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9599                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9600         mutex_unlock(&rdev->gpu_clock_mutex);
9601         return clock;
9602 }
9603
9604 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9605                               u32 cntl_reg, u32 status_reg)
9606 {
9607         int r, i;
9608         struct atom_clock_dividers dividers;
9609         uint32_t tmp;
9610
9611         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9612                                            clock, false, &dividers);
9613         if (r)
9614                 return r;
9615
9616         tmp = RREG32_SMC(cntl_reg);
9617         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9618         tmp |= dividers.post_divider;
9619         WREG32_SMC(cntl_reg, tmp);
9620
9621         for (i = 0; i < 100; i++) {
9622                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9623                         break;
9624                 mdelay(10);
9625         }
9626         if (i == 100)
9627                 return -ETIMEDOUT;
9628
9629         return 0;
9630 }
9631
9632 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9633 {
9634         int r = 0;
9635
9636         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9637         if (r)
9638                 return r;
9639
9640         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9641         return r;
9642 }
9643
9644 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9645 {
9646         int r, i;
9647         struct atom_clock_dividers dividers;
9648         u32 tmp;
9649
9650         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9651                                            ecclk, false, &dividers);
9652         if (r)
9653                 return r;
9654
9655         for (i = 0; i < 100; i++) {
9656                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9657                         break;
9658                 mdelay(10);
9659         }
9660         if (i == 100)
9661                 return -ETIMEDOUT;
9662
9663         tmp = RREG32_SMC(CG_ECLK_CNTL);
9664         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9665         tmp |= dividers.post_divider;
9666         WREG32_SMC(CG_ECLK_CNTL, tmp);
9667
9668         for (i = 0; i < 100; i++) {
9669                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9670                         break;
9671                 mdelay(10);
9672         }
9673         if (i == 100)
9674                 return -ETIMEDOUT;
9675
9676         return 0;
9677 }
9678
9679 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9680 {
9681         struct pci_dev *root = rdev->pdev->bus->self;
9682         int bridge_pos, gpu_pos;
9683         u32 speed_cntl, mask, current_data_rate;
9684         int ret, i;
9685         u16 tmp16;
9686
9687         if (pci_is_root_bus(rdev->pdev->bus))
9688                 return;
9689
9690         if (radeon_pcie_gen2 == 0)
9691                 return;
9692
9693         if (rdev->flags & RADEON_IS_IGP)
9694                 return;
9695
9696         if (!(rdev->flags & RADEON_IS_PCIE))
9697                 return;
9698
9699         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9700         if (ret != 0)
9701                 return;
9702
9703         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9704                 return;
9705
9706         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9707         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9708                 LC_CURRENT_DATA_RATE_SHIFT;
9709         if (mask & DRM_PCIE_SPEED_80) {
9710                 if (current_data_rate == 2) {
9711                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9712                         return;
9713                 }
9714                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9715         } else if (mask & DRM_PCIE_SPEED_50) {
9716                 if (current_data_rate == 1) {
9717                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9718                         return;
9719                 }
9720                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9721         }
9722
9723         bridge_pos = pci_pcie_cap(root);
9724         if (!bridge_pos)
9725                 return;
9726
9727         gpu_pos = pci_pcie_cap(rdev->pdev);
9728         if (!gpu_pos)
9729                 return;
9730
9731         if (mask & DRM_PCIE_SPEED_80) {
9732                 /* re-try equalization if gen3 is not already enabled */
9733                 if (current_data_rate != 2) {
9734                         u16 bridge_cfg, gpu_cfg;
9735                         u16 bridge_cfg2, gpu_cfg2;
9736                         u32 max_lw, current_lw, tmp;
9737
9738                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9739                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9740
9741                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9742                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9743
9744                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9745                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9746
9747                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9748                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9749                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9750
9751                         if (current_lw < max_lw) {
9752                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9753                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9754                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9755                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9756                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9757                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9758                                 }
9759                         }
9760
9761                         for (i = 0; i < 10; i++) {
9762                                 /* check status */
9763                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9764                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9765                                         break;
9766
9767                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9768                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9769
9770                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9771                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9772
9773                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9774                                 tmp |= LC_SET_QUIESCE;
9775                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9776
9777                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9778                                 tmp |= LC_REDO_EQ;
9779                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9780
9781                                 mdelay(100);
9782
9783                                 /* linkctl */
9784                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9785                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9786                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9787                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9788
9789                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9790                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9791                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9792                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9793
9794                                 /* linkctl2 */
9795                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9796                                 tmp16 &= ~((1 << 4) | (7 << 9));
9797                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9798                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9799
9800                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9801                                 tmp16 &= ~((1 << 4) | (7 << 9));
9802                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9803                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9804
9805                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9806                                 tmp &= ~LC_SET_QUIESCE;
9807                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9808                         }
9809                 }
9810         }
9811
9812         /* set the link speed */
9813         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9814         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9815         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9816
9817         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9818         tmp16 &= ~0xf;
9819         if (mask & DRM_PCIE_SPEED_80)
9820                 tmp16 |= 3; /* gen3 */
9821         else if (mask & DRM_PCIE_SPEED_50)
9822                 tmp16 |= 2; /* gen2 */
9823         else
9824                 tmp16 |= 1; /* gen1 */
9825         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9826
9827         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9828         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9829         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9830
9831         for (i = 0; i < rdev->usec_timeout; i++) {
9832                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9833                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9834                         break;
9835                 udelay(1);
9836         }
9837 }
9838
9839 static void cik_program_aspm(struct radeon_device *rdev)
9840 {
9841         u32 data, orig;
9842         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9843         bool disable_clkreq = false;
9844
9845         if (radeon_aspm == 0)
9846                 return;
9847
9848         /* XXX double check IGPs */
9849         if (rdev->flags & RADEON_IS_IGP)
9850                 return;
9851
9852         if (!(rdev->flags & RADEON_IS_PCIE))
9853                 return;
9854
9855         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9856         data &= ~LC_XMIT_N_FTS_MASK;
9857         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9858         if (orig != data)
9859                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9860
9861         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9862         data |= LC_GO_TO_RECOVERY;
9863         if (orig != data)
9864                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9865
9866         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9867         data |= P_IGNORE_EDB_ERR;
9868         if (orig != data)
9869                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9870
9871         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9872         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9873         data |= LC_PMI_TO_L1_DIS;
9874         if (!disable_l0s)
9875                 data |= LC_L0S_INACTIVITY(7);
9876
9877         if (!disable_l1) {
9878                 data |= LC_L1_INACTIVITY(7);
9879                 data &= ~LC_PMI_TO_L1_DIS;
9880                 if (orig != data)
9881                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9882
9883                 if (!disable_plloff_in_l1) {
9884                         bool clk_req_support;
9885
9886                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9887                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9888                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9889                         if (orig != data)
9890                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9891
9892                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9893                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9894                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9895                         if (orig != data)
9896                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9897
9898                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9899                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9900                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9901                         if (orig != data)
9902                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9903
9904                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9905                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9906                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9907                         if (orig != data)
9908                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9909
9910                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9911                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9912                         data |= LC_DYN_LANES_PWR_STATE(3);
9913                         if (orig != data)
9914                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9915
9916                         if (!disable_clkreq &&
9917                             !pci_is_root_bus(rdev->pdev->bus)) {
9918                                 struct pci_dev *root = rdev->pdev->bus->self;
9919                                 u32 lnkcap;
9920
9921                                 clk_req_support = false;
9922                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9923                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9924                                         clk_req_support = true;
9925                         } else {
9926                                 clk_req_support = false;
9927                         }
9928
9929                         if (clk_req_support) {
9930                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9931                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9932                                 if (orig != data)
9933                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9934
9935                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9936                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9937                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9938                                 if (orig != data)
9939                                         WREG32_SMC(THM_CLK_CNTL, data);
9940
9941                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9942                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9943                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9944                                 if (orig != data)
9945                                         WREG32_SMC(MISC_CLK_CTRL, data);
9946
9947                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9948                                 data &= ~BCLK_AS_XCLK;
9949                                 if (orig != data)
9950                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9951
9952                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9953                                 data &= ~FORCE_BIF_REFCLK_EN;
9954                                 if (orig != data)
9955                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9956
9957                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9958                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9959                                 data |= MPLL_CLKOUT_SEL(4);
9960                                 if (orig != data)
9961                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9962                         }
9963                 }
9964         } else {
9965                 if (orig != data)
9966                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9967         }
9968
9969         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9970         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9971         if (orig != data)
9972                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9973
9974         if (!disable_l0s) {
9975                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9976                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9977                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9978                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9979                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9980                                 data &= ~LC_L0S_INACTIVITY_MASK;
9981                                 if (orig != data)
9982                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9983                         }
9984                 }
9985         }
9986 }