GNU Linux-libre 4.19.211-gnu1
[releases.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36
37 #define SH_MEM_CONFIG_GFX_DEFAULT \
38         ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
39
40 /*(DEBLOBBED)*/
41
42 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
43 extern void r600_ih_ring_fini(struct radeon_device *rdev);
44 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
45 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
46 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
47 extern void sumo_rlc_fini(struct radeon_device *rdev);
48 extern int sumo_rlc_init(struct radeon_device *rdev);
49 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
50 extern void si_rlc_reset(struct radeon_device *rdev);
51 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
52 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
53 extern int cik_sdma_resume(struct radeon_device *rdev);
54 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
55 extern void cik_sdma_fini(struct radeon_device *rdev);
56 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
57 static void cik_rlc_stop(struct radeon_device *rdev);
58 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
59 static void cik_program_aspm(struct radeon_device *rdev);
60 static void cik_init_pg(struct radeon_device *rdev);
61 static void cik_init_cg(struct radeon_device *rdev);
62 static void cik_fini_pg(struct radeon_device *rdev);
63 static void cik_fini_cg(struct radeon_device *rdev);
64 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
65                                           bool enable);
66
67 /**
68  * cik_get_allowed_info_register - fetch the register for the info ioctl
69  *
70  * @rdev: radeon_device pointer
71  * @reg: register offset in bytes
72  * @val: register value
73  *
74  * Returns 0 for success or -EINVAL for an invalid register
75  *
76  */
77 int cik_get_allowed_info_register(struct radeon_device *rdev,
78                                   u32 reg, u32 *val)
79 {
80         switch (reg) {
81         case GRBM_STATUS:
82         case GRBM_STATUS2:
83         case GRBM_STATUS_SE0:
84         case GRBM_STATUS_SE1:
85         case GRBM_STATUS_SE2:
86         case GRBM_STATUS_SE3:
87         case SRBM_STATUS:
88         case SRBM_STATUS2:
89         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
90         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
91         case UVD_STATUS:
92         /* TODO VCE */
93                 *val = RREG32(reg);
94                 return 0;
95         default:
96                 return -EINVAL;
97         }
98 }
99
100 /*
101  * Indirect registers accessor
102  */
103 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
104 {
105         unsigned long flags;
106         u32 r;
107
108         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
109         WREG32(CIK_DIDT_IND_INDEX, (reg));
110         r = RREG32(CIK_DIDT_IND_DATA);
111         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
112         return r;
113 }
114
115 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
116 {
117         unsigned long flags;
118
119         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
120         WREG32(CIK_DIDT_IND_INDEX, (reg));
121         WREG32(CIK_DIDT_IND_DATA, (v));
122         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
123 }
124
125 /* get temperature in millidegrees */
126 int ci_get_temp(struct radeon_device *rdev)
127 {
128         u32 temp;
129         int actual_temp = 0;
130
131         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
132                 CTF_TEMP_SHIFT;
133
134         if (temp & 0x200)
135                 actual_temp = 255;
136         else
137                 actual_temp = temp & 0x1ff;
138
139         actual_temp = actual_temp * 1000;
140
141         return actual_temp;
142 }
143
144 /* get temperature in millidegrees */
145 int kv_get_temp(struct radeon_device *rdev)
146 {
147         u32 temp;
148         int actual_temp = 0;
149
150         temp = RREG32_SMC(0xC0300E0C);
151
152         if (temp)
153                 actual_temp = (temp / 8) - 49;
154         else
155                 actual_temp = 0;
156
157         actual_temp = actual_temp * 1000;
158
159         return actual_temp;
160 }
161
162 /*
163  * Indirect registers accessor
164  */
165 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
166 {
167         unsigned long flags;
168         u32 r;
169
170         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
171         WREG32(PCIE_INDEX, reg);
172         (void)RREG32(PCIE_INDEX);
173         r = RREG32(PCIE_DATA);
174         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
175         return r;
176 }
177
178 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
179 {
180         unsigned long flags;
181
182         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
183         WREG32(PCIE_INDEX, reg);
184         (void)RREG32(PCIE_INDEX);
185         WREG32(PCIE_DATA, v);
186         (void)RREG32(PCIE_DATA);
187         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
188 }
189
190 static const u32 spectre_rlc_save_restore_register_list[] =
191 {
192         (0x0e00 << 16) | (0xc12c >> 2),
193         0x00000000,
194         (0x0e00 << 16) | (0xc140 >> 2),
195         0x00000000,
196         (0x0e00 << 16) | (0xc150 >> 2),
197         0x00000000,
198         (0x0e00 << 16) | (0xc15c >> 2),
199         0x00000000,
200         (0x0e00 << 16) | (0xc168 >> 2),
201         0x00000000,
202         (0x0e00 << 16) | (0xc170 >> 2),
203         0x00000000,
204         (0x0e00 << 16) | (0xc178 >> 2),
205         0x00000000,
206         (0x0e00 << 16) | (0xc204 >> 2),
207         0x00000000,
208         (0x0e00 << 16) | (0xc2b4 >> 2),
209         0x00000000,
210         (0x0e00 << 16) | (0xc2b8 >> 2),
211         0x00000000,
212         (0x0e00 << 16) | (0xc2bc >> 2),
213         0x00000000,
214         (0x0e00 << 16) | (0xc2c0 >> 2),
215         0x00000000,
216         (0x0e00 << 16) | (0x8228 >> 2),
217         0x00000000,
218         (0x0e00 << 16) | (0x829c >> 2),
219         0x00000000,
220         (0x0e00 << 16) | (0x869c >> 2),
221         0x00000000,
222         (0x0600 << 16) | (0x98f4 >> 2),
223         0x00000000,
224         (0x0e00 << 16) | (0x98f8 >> 2),
225         0x00000000,
226         (0x0e00 << 16) | (0x9900 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0xc260 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0x90e8 >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0x3c000 >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0x3c00c >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0x8c1c >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0x9700 >> 2),
239         0x00000000,
240         (0x0e00 << 16) | (0xcd20 >> 2),
241         0x00000000,
242         (0x4e00 << 16) | (0xcd20 >> 2),
243         0x00000000,
244         (0x5e00 << 16) | (0xcd20 >> 2),
245         0x00000000,
246         (0x6e00 << 16) | (0xcd20 >> 2),
247         0x00000000,
248         (0x7e00 << 16) | (0xcd20 >> 2),
249         0x00000000,
250         (0x8e00 << 16) | (0xcd20 >> 2),
251         0x00000000,
252         (0x9e00 << 16) | (0xcd20 >> 2),
253         0x00000000,
254         (0xae00 << 16) | (0xcd20 >> 2),
255         0x00000000,
256         (0xbe00 << 16) | (0xcd20 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0x89bc >> 2),
259         0x00000000,
260         (0x0e00 << 16) | (0x8900 >> 2),
261         0x00000000,
262         0x3,
263         (0x0e00 << 16) | (0xc130 >> 2),
264         0x00000000,
265         (0x0e00 << 16) | (0xc134 >> 2),
266         0x00000000,
267         (0x0e00 << 16) | (0xc1fc >> 2),
268         0x00000000,
269         (0x0e00 << 16) | (0xc208 >> 2),
270         0x00000000,
271         (0x0e00 << 16) | (0xc264 >> 2),
272         0x00000000,
273         (0x0e00 << 16) | (0xc268 >> 2),
274         0x00000000,
275         (0x0e00 << 16) | (0xc26c >> 2),
276         0x00000000,
277         (0x0e00 << 16) | (0xc270 >> 2),
278         0x00000000,
279         (0x0e00 << 16) | (0xc274 >> 2),
280         0x00000000,
281         (0x0e00 << 16) | (0xc278 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc27c >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc280 >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc284 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc288 >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc28c >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0xc290 >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0xc294 >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0xc298 >> 2),
298         0x00000000,
299         (0x0e00 << 16) | (0xc29c >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0xc2a0 >> 2),
302         0x00000000,
303         (0x0e00 << 16) | (0xc2a4 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0xc2a8 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0xc2ac  >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0xc2b0 >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0x301d0 >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0x30238 >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0x30250 >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0x30254 >> 2),
318         0x00000000,
319         (0x0e00 << 16) | (0x30258 >> 2),
320         0x00000000,
321         (0x0e00 << 16) | (0x3025c >> 2),
322         0x00000000,
323         (0x4e00 << 16) | (0xc900 >> 2),
324         0x00000000,
325         (0x5e00 << 16) | (0xc900 >> 2),
326         0x00000000,
327         (0x6e00 << 16) | (0xc900 >> 2),
328         0x00000000,
329         (0x7e00 << 16) | (0xc900 >> 2),
330         0x00000000,
331         (0x8e00 << 16) | (0xc900 >> 2),
332         0x00000000,
333         (0x9e00 << 16) | (0xc900 >> 2),
334         0x00000000,
335         (0xae00 << 16) | (0xc900 >> 2),
336         0x00000000,
337         (0xbe00 << 16) | (0xc900 >> 2),
338         0x00000000,
339         (0x4e00 << 16) | (0xc904 >> 2),
340         0x00000000,
341         (0x5e00 << 16) | (0xc904 >> 2),
342         0x00000000,
343         (0x6e00 << 16) | (0xc904 >> 2),
344         0x00000000,
345         (0x7e00 << 16) | (0xc904 >> 2),
346         0x00000000,
347         (0x8e00 << 16) | (0xc904 >> 2),
348         0x00000000,
349         (0x9e00 << 16) | (0xc904 >> 2),
350         0x00000000,
351         (0xae00 << 16) | (0xc904 >> 2),
352         0x00000000,
353         (0xbe00 << 16) | (0xc904 >> 2),
354         0x00000000,
355         (0x4e00 << 16) | (0xc908 >> 2),
356         0x00000000,
357         (0x5e00 << 16) | (0xc908 >> 2),
358         0x00000000,
359         (0x6e00 << 16) | (0xc908 >> 2),
360         0x00000000,
361         (0x7e00 << 16) | (0xc908 >> 2),
362         0x00000000,
363         (0x8e00 << 16) | (0xc908 >> 2),
364         0x00000000,
365         (0x9e00 << 16) | (0xc908 >> 2),
366         0x00000000,
367         (0xae00 << 16) | (0xc908 >> 2),
368         0x00000000,
369         (0xbe00 << 16) | (0xc908 >> 2),
370         0x00000000,
371         (0x4e00 << 16) | (0xc90c >> 2),
372         0x00000000,
373         (0x5e00 << 16) | (0xc90c >> 2),
374         0x00000000,
375         (0x6e00 << 16) | (0xc90c >> 2),
376         0x00000000,
377         (0x7e00 << 16) | (0xc90c >> 2),
378         0x00000000,
379         (0x8e00 << 16) | (0xc90c >> 2),
380         0x00000000,
381         (0x9e00 << 16) | (0xc90c >> 2),
382         0x00000000,
383         (0xae00 << 16) | (0xc90c >> 2),
384         0x00000000,
385         (0xbe00 << 16) | (0xc90c >> 2),
386         0x00000000,
387         (0x4e00 << 16) | (0xc910 >> 2),
388         0x00000000,
389         (0x5e00 << 16) | (0xc910 >> 2),
390         0x00000000,
391         (0x6e00 << 16) | (0xc910 >> 2),
392         0x00000000,
393         (0x7e00 << 16) | (0xc910 >> 2),
394         0x00000000,
395         (0x8e00 << 16) | (0xc910 >> 2),
396         0x00000000,
397         (0x9e00 << 16) | (0xc910 >> 2),
398         0x00000000,
399         (0xae00 << 16) | (0xc910 >> 2),
400         0x00000000,
401         (0xbe00 << 16) | (0xc910 >> 2),
402         0x00000000,
403         (0x0e00 << 16) | (0xc99c >> 2),
404         0x00000000,
405         (0x0e00 << 16) | (0x9834 >> 2),
406         0x00000000,
407         (0x0000 << 16) | (0x30f00 >> 2),
408         0x00000000,
409         (0x0001 << 16) | (0x30f00 >> 2),
410         0x00000000,
411         (0x0000 << 16) | (0x30f04 >> 2),
412         0x00000000,
413         (0x0001 << 16) | (0x30f04 >> 2),
414         0x00000000,
415         (0x0000 << 16) | (0x30f08 >> 2),
416         0x00000000,
417         (0x0001 << 16) | (0x30f08 >> 2),
418         0x00000000,
419         (0x0000 << 16) | (0x30f0c >> 2),
420         0x00000000,
421         (0x0001 << 16) | (0x30f0c >> 2),
422         0x00000000,
423         (0x0600 << 16) | (0x9b7c >> 2),
424         0x00000000,
425         (0x0e00 << 16) | (0x8a14 >> 2),
426         0x00000000,
427         (0x0e00 << 16) | (0x8a18 >> 2),
428         0x00000000,
429         (0x0600 << 16) | (0x30a00 >> 2),
430         0x00000000,
431         (0x0e00 << 16) | (0x8bf0 >> 2),
432         0x00000000,
433         (0x0e00 << 16) | (0x8bcc >> 2),
434         0x00000000,
435         (0x0e00 << 16) | (0x8b24 >> 2),
436         0x00000000,
437         (0x0e00 << 16) | (0x30a04 >> 2),
438         0x00000000,
439         (0x0600 << 16) | (0x30a10 >> 2),
440         0x00000000,
441         (0x0600 << 16) | (0x30a14 >> 2),
442         0x00000000,
443         (0x0600 << 16) | (0x30a18 >> 2),
444         0x00000000,
445         (0x0600 << 16) | (0x30a2c >> 2),
446         0x00000000,
447         (0x0e00 << 16) | (0xc700 >> 2),
448         0x00000000,
449         (0x0e00 << 16) | (0xc704 >> 2),
450         0x00000000,
451         (0x0e00 << 16) | (0xc708 >> 2),
452         0x00000000,
453         (0x0e00 << 16) | (0xc768 >> 2),
454         0x00000000,
455         (0x0400 << 16) | (0xc770 >> 2),
456         0x00000000,
457         (0x0400 << 16) | (0xc774 >> 2),
458         0x00000000,
459         (0x0400 << 16) | (0xc778 >> 2),
460         0x00000000,
461         (0x0400 << 16) | (0xc77c >> 2),
462         0x00000000,
463         (0x0400 << 16) | (0xc780 >> 2),
464         0x00000000,
465         (0x0400 << 16) | (0xc784 >> 2),
466         0x00000000,
467         (0x0400 << 16) | (0xc788 >> 2),
468         0x00000000,
469         (0x0400 << 16) | (0xc78c >> 2),
470         0x00000000,
471         (0x0400 << 16) | (0xc798 >> 2),
472         0x00000000,
473         (0x0400 << 16) | (0xc79c >> 2),
474         0x00000000,
475         (0x0400 << 16) | (0xc7a0 >> 2),
476         0x00000000,
477         (0x0400 << 16) | (0xc7a4 >> 2),
478         0x00000000,
479         (0x0400 << 16) | (0xc7a8 >> 2),
480         0x00000000,
481         (0x0400 << 16) | (0xc7ac >> 2),
482         0x00000000,
483         (0x0400 << 16) | (0xc7b0 >> 2),
484         0x00000000,
485         (0x0400 << 16) | (0xc7b4 >> 2),
486         0x00000000,
487         (0x0e00 << 16) | (0x9100 >> 2),
488         0x00000000,
489         (0x0e00 << 16) | (0x3c010 >> 2),
490         0x00000000,
491         (0x0e00 << 16) | (0x92a8 >> 2),
492         0x00000000,
493         (0x0e00 << 16) | (0x92ac >> 2),
494         0x00000000,
495         (0x0e00 << 16) | (0x92b4 >> 2),
496         0x00000000,
497         (0x0e00 << 16) | (0x92b8 >> 2),
498         0x00000000,
499         (0x0e00 << 16) | (0x92bc >> 2),
500         0x00000000,
501         (0x0e00 << 16) | (0x92c0 >> 2),
502         0x00000000,
503         (0x0e00 << 16) | (0x92c4 >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0x92c8 >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0x92cc >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0x92d0 >> 2),
510         0x00000000,
511         (0x0e00 << 16) | (0x8c00 >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0x8c04 >> 2),
514         0x00000000,
515         (0x0e00 << 16) | (0x8c20 >> 2),
516         0x00000000,
517         (0x0e00 << 16) | (0x8c38 >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0x8c3c >> 2),
520         0x00000000,
521         (0x0e00 << 16) | (0xae00 >> 2),
522         0x00000000,
523         (0x0e00 << 16) | (0x9604 >> 2),
524         0x00000000,
525         (0x0e00 << 16) | (0xac08 >> 2),
526         0x00000000,
527         (0x0e00 << 16) | (0xac0c >> 2),
528         0x00000000,
529         (0x0e00 << 16) | (0xac10 >> 2),
530         0x00000000,
531         (0x0e00 << 16) | (0xac14 >> 2),
532         0x00000000,
533         (0x0e00 << 16) | (0xac58 >> 2),
534         0x00000000,
535         (0x0e00 << 16) | (0xac68 >> 2),
536         0x00000000,
537         (0x0e00 << 16) | (0xac6c >> 2),
538         0x00000000,
539         (0x0e00 << 16) | (0xac70 >> 2),
540         0x00000000,
541         (0x0e00 << 16) | (0xac74 >> 2),
542         0x00000000,
543         (0x0e00 << 16) | (0xac78 >> 2),
544         0x00000000,
545         (0x0e00 << 16) | (0xac7c >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0xac80 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0xac84 >> 2),
550         0x00000000,
551         (0x0e00 << 16) | (0xac88 >> 2),
552         0x00000000,
553         (0x0e00 << 16) | (0xac8c >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0x970c >> 2),
556         0x00000000,
557         (0x0e00 << 16) | (0x9714 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0x9718 >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0x971c >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0x31068 >> 2),
564         0x00000000,
565         (0x4e00 << 16) | (0x31068 >> 2),
566         0x00000000,
567         (0x5e00 << 16) | (0x31068 >> 2),
568         0x00000000,
569         (0x6e00 << 16) | (0x31068 >> 2),
570         0x00000000,
571         (0x7e00 << 16) | (0x31068 >> 2),
572         0x00000000,
573         (0x8e00 << 16) | (0x31068 >> 2),
574         0x00000000,
575         (0x9e00 << 16) | (0x31068 >> 2),
576         0x00000000,
577         (0xae00 << 16) | (0x31068 >> 2),
578         0x00000000,
579         (0xbe00 << 16) | (0x31068 >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0xcd10 >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0xcd14 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0x88b0 >> 2),
586         0x00000000,
587         (0x0e00 << 16) | (0x88b4 >> 2),
588         0x00000000,
589         (0x0e00 << 16) | (0x88b8 >> 2),
590         0x00000000,
591         (0x0e00 << 16) | (0x88bc >> 2),
592         0x00000000,
593         (0x0400 << 16) | (0x89c0 >> 2),
594         0x00000000,
595         (0x0e00 << 16) | (0x88c4 >> 2),
596         0x00000000,
597         (0x0e00 << 16) | (0x88c8 >> 2),
598         0x00000000,
599         (0x0e00 << 16) | (0x88d0 >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0x88d4 >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0x88d8 >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0x8980 >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0x30938 >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0x3093c >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0x30940 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0x89a0 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0x30900 >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0x30904 >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0x89b4 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0x3c210 >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0x3c214 >> 2),
624         0x00000000,
625         (0x0e00 << 16) | (0x3c218 >> 2),
626         0x00000000,
627         (0x0e00 << 16) | (0x8904 >> 2),
628         0x00000000,
629         0x5,
630         (0x0e00 << 16) | (0x8c28 >> 2),
631         (0x0e00 << 16) | (0x8c2c >> 2),
632         (0x0e00 << 16) | (0x8c30 >> 2),
633         (0x0e00 << 16) | (0x8c34 >> 2),
634         (0x0e00 << 16) | (0x9600 >> 2),
635 };
636
637 static const u32 kalindi_rlc_save_restore_register_list[] =
638 {
639         (0x0e00 << 16) | (0xc12c >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0xc140 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0xc150 >> 2),
644         0x00000000,
645         (0x0e00 << 16) | (0xc15c >> 2),
646         0x00000000,
647         (0x0e00 << 16) | (0xc168 >> 2),
648         0x00000000,
649         (0x0e00 << 16) | (0xc170 >> 2),
650         0x00000000,
651         (0x0e00 << 16) | (0xc204 >> 2),
652         0x00000000,
653         (0x0e00 << 16) | (0xc2b4 >> 2),
654         0x00000000,
655         (0x0e00 << 16) | (0xc2b8 >> 2),
656         0x00000000,
657         (0x0e00 << 16) | (0xc2bc >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0xc2c0 >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0x8228 >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0x829c >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0x869c >> 2),
666         0x00000000,
667         (0x0600 << 16) | (0x98f4 >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0x98f8 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0x9900 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0xc260 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0x90e8 >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0x3c000 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0x3c00c >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0x8c1c >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0x9700 >> 2),
684         0x00000000,
685         (0x0e00 << 16) | (0xcd20 >> 2),
686         0x00000000,
687         (0x4e00 << 16) | (0xcd20 >> 2),
688         0x00000000,
689         (0x5e00 << 16) | (0xcd20 >> 2),
690         0x00000000,
691         (0x6e00 << 16) | (0xcd20 >> 2),
692         0x00000000,
693         (0x7e00 << 16) | (0xcd20 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0x89bc >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0x8900 >> 2),
698         0x00000000,
699         0x3,
700         (0x0e00 << 16) | (0xc130 >> 2),
701         0x00000000,
702         (0x0e00 << 16) | (0xc134 >> 2),
703         0x00000000,
704         (0x0e00 << 16) | (0xc1fc >> 2),
705         0x00000000,
706         (0x0e00 << 16) | (0xc208 >> 2),
707         0x00000000,
708         (0x0e00 << 16) | (0xc264 >> 2),
709         0x00000000,
710         (0x0e00 << 16) | (0xc268 >> 2),
711         0x00000000,
712         (0x0e00 << 16) | (0xc26c >> 2),
713         0x00000000,
714         (0x0e00 << 16) | (0xc270 >> 2),
715         0x00000000,
716         (0x0e00 << 16) | (0xc274 >> 2),
717         0x00000000,
718         (0x0e00 << 16) | (0xc28c >> 2),
719         0x00000000,
720         (0x0e00 << 16) | (0xc290 >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc294 >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc298 >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc2a0 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc2a4 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc2a8 >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc2ac >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0x301d0 >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0x30238 >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0x30250 >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0x30254 >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0x30258 >> 2),
743         0x00000000,
744         (0x0e00 << 16) | (0x3025c >> 2),
745         0x00000000,
746         (0x4e00 << 16) | (0xc900 >> 2),
747         0x00000000,
748         (0x5e00 << 16) | (0xc900 >> 2),
749         0x00000000,
750         (0x6e00 << 16) | (0xc900 >> 2),
751         0x00000000,
752         (0x7e00 << 16) | (0xc900 >> 2),
753         0x00000000,
754         (0x4e00 << 16) | (0xc904 >> 2),
755         0x00000000,
756         (0x5e00 << 16) | (0xc904 >> 2),
757         0x00000000,
758         (0x6e00 << 16) | (0xc904 >> 2),
759         0x00000000,
760         (0x7e00 << 16) | (0xc904 >> 2),
761         0x00000000,
762         (0x4e00 << 16) | (0xc908 >> 2),
763         0x00000000,
764         (0x5e00 << 16) | (0xc908 >> 2),
765         0x00000000,
766         (0x6e00 << 16) | (0xc908 >> 2),
767         0x00000000,
768         (0x7e00 << 16) | (0xc908 >> 2),
769         0x00000000,
770         (0x4e00 << 16) | (0xc90c >> 2),
771         0x00000000,
772         (0x5e00 << 16) | (0xc90c >> 2),
773         0x00000000,
774         (0x6e00 << 16) | (0xc90c >> 2),
775         0x00000000,
776         (0x7e00 << 16) | (0xc90c >> 2),
777         0x00000000,
778         (0x4e00 << 16) | (0xc910 >> 2),
779         0x00000000,
780         (0x5e00 << 16) | (0xc910 >> 2),
781         0x00000000,
782         (0x6e00 << 16) | (0xc910 >> 2),
783         0x00000000,
784         (0x7e00 << 16) | (0xc910 >> 2),
785         0x00000000,
786         (0x0e00 << 16) | (0xc99c >> 2),
787         0x00000000,
788         (0x0e00 << 16) | (0x9834 >> 2),
789         0x00000000,
790         (0x0000 << 16) | (0x30f00 >> 2),
791         0x00000000,
792         (0x0000 << 16) | (0x30f04 >> 2),
793         0x00000000,
794         (0x0000 << 16) | (0x30f08 >> 2),
795         0x00000000,
796         (0x0000 << 16) | (0x30f0c >> 2),
797         0x00000000,
798         (0x0600 << 16) | (0x9b7c >> 2),
799         0x00000000,
800         (0x0e00 << 16) | (0x8a14 >> 2),
801         0x00000000,
802         (0x0e00 << 16) | (0x8a18 >> 2),
803         0x00000000,
804         (0x0600 << 16) | (0x30a00 >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0x8bf0 >> 2),
807         0x00000000,
808         (0x0e00 << 16) | (0x8bcc >> 2),
809         0x00000000,
810         (0x0e00 << 16) | (0x8b24 >> 2),
811         0x00000000,
812         (0x0e00 << 16) | (0x30a04 >> 2),
813         0x00000000,
814         (0x0600 << 16) | (0x30a10 >> 2),
815         0x00000000,
816         (0x0600 << 16) | (0x30a14 >> 2),
817         0x00000000,
818         (0x0600 << 16) | (0x30a18 >> 2),
819         0x00000000,
820         (0x0600 << 16) | (0x30a2c >> 2),
821         0x00000000,
822         (0x0e00 << 16) | (0xc700 >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0xc704 >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0xc708 >> 2),
827         0x00000000,
828         (0x0e00 << 16) | (0xc768 >> 2),
829         0x00000000,
830         (0x0400 << 16) | (0xc770 >> 2),
831         0x00000000,
832         (0x0400 << 16) | (0xc774 >> 2),
833         0x00000000,
834         (0x0400 << 16) | (0xc798 >> 2),
835         0x00000000,
836         (0x0400 << 16) | (0xc79c >> 2),
837         0x00000000,
838         (0x0e00 << 16) | (0x9100 >> 2),
839         0x00000000,
840         (0x0e00 << 16) | (0x3c010 >> 2),
841         0x00000000,
842         (0x0e00 << 16) | (0x8c00 >> 2),
843         0x00000000,
844         (0x0e00 << 16) | (0x8c04 >> 2),
845         0x00000000,
846         (0x0e00 << 16) | (0x8c20 >> 2),
847         0x00000000,
848         (0x0e00 << 16) | (0x8c38 >> 2),
849         0x00000000,
850         (0x0e00 << 16) | (0x8c3c >> 2),
851         0x00000000,
852         (0x0e00 << 16) | (0xae00 >> 2),
853         0x00000000,
854         (0x0e00 << 16) | (0x9604 >> 2),
855         0x00000000,
856         (0x0e00 << 16) | (0xac08 >> 2),
857         0x00000000,
858         (0x0e00 << 16) | (0xac0c >> 2),
859         0x00000000,
860         (0x0e00 << 16) | (0xac10 >> 2),
861         0x00000000,
862         (0x0e00 << 16) | (0xac14 >> 2),
863         0x00000000,
864         (0x0e00 << 16) | (0xac58 >> 2),
865         0x00000000,
866         (0x0e00 << 16) | (0xac68 >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0xac6c >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0xac70 >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0xac74 >> 2),
873         0x00000000,
874         (0x0e00 << 16) | (0xac78 >> 2),
875         0x00000000,
876         (0x0e00 << 16) | (0xac7c >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0xac80 >> 2),
879         0x00000000,
880         (0x0e00 << 16) | (0xac84 >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0xac88 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0xac8c >> 2),
885         0x00000000,
886         (0x0e00 << 16) | (0x970c >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0x9714 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0x9718 >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0x971c >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0x31068 >> 2),
895         0x00000000,
896         (0x4e00 << 16) | (0x31068 >> 2),
897         0x00000000,
898         (0x5e00 << 16) | (0x31068 >> 2),
899         0x00000000,
900         (0x6e00 << 16) | (0x31068 >> 2),
901         0x00000000,
902         (0x7e00 << 16) | (0x31068 >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0xcd10 >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0xcd14 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0x88b0 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0x88b4 >> 2),
911         0x00000000,
912         (0x0e00 << 16) | (0x88b8 >> 2),
913         0x00000000,
914         (0x0e00 << 16) | (0x88bc >> 2),
915         0x00000000,
916         (0x0400 << 16) | (0x89c0 >> 2),
917         0x00000000,
918         (0x0e00 << 16) | (0x88c4 >> 2),
919         0x00000000,
920         (0x0e00 << 16) | (0x88c8 >> 2),
921         0x00000000,
922         (0x0e00 << 16) | (0x88d0 >> 2),
923         0x00000000,
924         (0x0e00 << 16) | (0x88d4 >> 2),
925         0x00000000,
926         (0x0e00 << 16) | (0x88d8 >> 2),
927         0x00000000,
928         (0x0e00 << 16) | (0x8980 >> 2),
929         0x00000000,
930         (0x0e00 << 16) | (0x30938 >> 2),
931         0x00000000,
932         (0x0e00 << 16) | (0x3093c >> 2),
933         0x00000000,
934         (0x0e00 << 16) | (0x30940 >> 2),
935         0x00000000,
936         (0x0e00 << 16) | (0x89a0 >> 2),
937         0x00000000,
938         (0x0e00 << 16) | (0x30900 >> 2),
939         0x00000000,
940         (0x0e00 << 16) | (0x30904 >> 2),
941         0x00000000,
942         (0x0e00 << 16) | (0x89b4 >> 2),
943         0x00000000,
944         (0x0e00 << 16) | (0x3e1fc >> 2),
945         0x00000000,
946         (0x0e00 << 16) | (0x3c210 >> 2),
947         0x00000000,
948         (0x0e00 << 16) | (0x3c214 >> 2),
949         0x00000000,
950         (0x0e00 << 16) | (0x3c218 >> 2),
951         0x00000000,
952         (0x0e00 << 16) | (0x8904 >> 2),
953         0x00000000,
954         0x5,
955         (0x0e00 << 16) | (0x8c28 >> 2),
956         (0x0e00 << 16) | (0x8c2c >> 2),
957         (0x0e00 << 16) | (0x8c30 >> 2),
958         (0x0e00 << 16) | (0x8c34 >> 2),
959         (0x0e00 << 16) | (0x9600 >> 2),
960 };
961
962 static const u32 bonaire_golden_spm_registers[] =
963 {
964         0x30800, 0xe0ffffff, 0xe0000000
965 };
966
967 static const u32 bonaire_golden_common_registers[] =
968 {
969         0xc770, 0xffffffff, 0x00000800,
970         0xc774, 0xffffffff, 0x00000800,
971         0xc798, 0xffffffff, 0x00007fbf,
972         0xc79c, 0xffffffff, 0x00007faf
973 };
974
975 static const u32 bonaire_golden_registers[] =
976 {
977         0x3354, 0x00000333, 0x00000333,
978         0x3350, 0x000c0fc0, 0x00040200,
979         0x9a10, 0x00010000, 0x00058208,
980         0x3c000, 0xffff1fff, 0x00140000,
981         0x3c200, 0xfdfc0fff, 0x00000100,
982         0x3c234, 0x40000000, 0x40000200,
983         0x9830, 0xffffffff, 0x00000000,
984         0x9834, 0xf00fffff, 0x00000400,
985         0x9838, 0x0002021c, 0x00020200,
986         0xc78, 0x00000080, 0x00000000,
987         0x5bb0, 0x000000f0, 0x00000070,
988         0x5bc0, 0xf0311fff, 0x80300000,
989         0x98f8, 0x73773777, 0x12010001,
990         0x350c, 0x00810000, 0x408af000,
991         0x7030, 0x31000111, 0x00000011,
992         0x2f48, 0x73773777, 0x12010001,
993         0x220c, 0x00007fb6, 0x0021a1b1,
994         0x2210, 0x00007fb6, 0x002021b1,
995         0x2180, 0x00007fb6, 0x00002191,
996         0x2218, 0x00007fb6, 0x002121b1,
997         0x221c, 0x00007fb6, 0x002021b1,
998         0x21dc, 0x00007fb6, 0x00002191,
999         0x21e0, 0x00007fb6, 0x00002191,
1000         0x3628, 0x0000003f, 0x0000000a,
1001         0x362c, 0x0000003f, 0x0000000a,
1002         0x2ae4, 0x00073ffe, 0x000022a2,
1003         0x240c, 0x000007ff, 0x00000000,
1004         0x8a14, 0xf000003f, 0x00000007,
1005         0x8bf0, 0x00002001, 0x00000001,
1006         0x8b24, 0xffffffff, 0x00ffffff,
1007         0x30a04, 0x0000ff0f, 0x00000000,
1008         0x28a4c, 0x07ffffff, 0x06000000,
1009         0x4d8, 0x00000fff, 0x00000100,
1010         0x3e78, 0x00000001, 0x00000002,
1011         0x9100, 0x03000000, 0x0362c688,
1012         0x8c00, 0x000000ff, 0x00000001,
1013         0xe40, 0x00001fff, 0x00001fff,
1014         0x9060, 0x0000007f, 0x00000020,
1015         0x9508, 0x00010000, 0x00010000,
1016         0xac14, 0x000003ff, 0x000000f3,
1017         0xac0c, 0xffffffff, 0x00001032
1018 };
1019
1020 static const u32 bonaire_mgcg_cgcg_init[] =
1021 {
1022         0xc420, 0xffffffff, 0xfffffffc,
1023         0x30800, 0xffffffff, 0xe0000000,
1024         0x3c2a0, 0xffffffff, 0x00000100,
1025         0x3c208, 0xffffffff, 0x00000100,
1026         0x3c2c0, 0xffffffff, 0xc0000100,
1027         0x3c2c8, 0xffffffff, 0xc0000100,
1028         0x3c2c4, 0xffffffff, 0xc0000100,
1029         0x55e4, 0xffffffff, 0x00600100,
1030         0x3c280, 0xffffffff, 0x00000100,
1031         0x3c214, 0xffffffff, 0x06000100,
1032         0x3c220, 0xffffffff, 0x00000100,
1033         0x3c218, 0xffffffff, 0x06000100,
1034         0x3c204, 0xffffffff, 0x00000100,
1035         0x3c2e0, 0xffffffff, 0x00000100,
1036         0x3c224, 0xffffffff, 0x00000100,
1037         0x3c200, 0xffffffff, 0x00000100,
1038         0x3c230, 0xffffffff, 0x00000100,
1039         0x3c234, 0xffffffff, 0x00000100,
1040         0x3c250, 0xffffffff, 0x00000100,
1041         0x3c254, 0xffffffff, 0x00000100,
1042         0x3c258, 0xffffffff, 0x00000100,
1043         0x3c25c, 0xffffffff, 0x00000100,
1044         0x3c260, 0xffffffff, 0x00000100,
1045         0x3c27c, 0xffffffff, 0x00000100,
1046         0x3c278, 0xffffffff, 0x00000100,
1047         0x3c210, 0xffffffff, 0x06000100,
1048         0x3c290, 0xffffffff, 0x00000100,
1049         0x3c274, 0xffffffff, 0x00000100,
1050         0x3c2b4, 0xffffffff, 0x00000100,
1051         0x3c2b0, 0xffffffff, 0x00000100,
1052         0x3c270, 0xffffffff, 0x00000100,
1053         0x30800, 0xffffffff, 0xe0000000,
1054         0x3c020, 0xffffffff, 0x00010000,
1055         0x3c024, 0xffffffff, 0x00030002,
1056         0x3c028, 0xffffffff, 0x00040007,
1057         0x3c02c, 0xffffffff, 0x00060005,
1058         0x3c030, 0xffffffff, 0x00090008,
1059         0x3c034, 0xffffffff, 0x00010000,
1060         0x3c038, 0xffffffff, 0x00030002,
1061         0x3c03c, 0xffffffff, 0x00040007,
1062         0x3c040, 0xffffffff, 0x00060005,
1063         0x3c044, 0xffffffff, 0x00090008,
1064         0x3c048, 0xffffffff, 0x00010000,
1065         0x3c04c, 0xffffffff, 0x00030002,
1066         0x3c050, 0xffffffff, 0x00040007,
1067         0x3c054, 0xffffffff, 0x00060005,
1068         0x3c058, 0xffffffff, 0x00090008,
1069         0x3c05c, 0xffffffff, 0x00010000,
1070         0x3c060, 0xffffffff, 0x00030002,
1071         0x3c064, 0xffffffff, 0x00040007,
1072         0x3c068, 0xffffffff, 0x00060005,
1073         0x3c06c, 0xffffffff, 0x00090008,
1074         0x3c070, 0xffffffff, 0x00010000,
1075         0x3c074, 0xffffffff, 0x00030002,
1076         0x3c078, 0xffffffff, 0x00040007,
1077         0x3c07c, 0xffffffff, 0x00060005,
1078         0x3c080, 0xffffffff, 0x00090008,
1079         0x3c084, 0xffffffff, 0x00010000,
1080         0x3c088, 0xffffffff, 0x00030002,
1081         0x3c08c, 0xffffffff, 0x00040007,
1082         0x3c090, 0xffffffff, 0x00060005,
1083         0x3c094, 0xffffffff, 0x00090008,
1084         0x3c098, 0xffffffff, 0x00010000,
1085         0x3c09c, 0xffffffff, 0x00030002,
1086         0x3c0a0, 0xffffffff, 0x00040007,
1087         0x3c0a4, 0xffffffff, 0x00060005,
1088         0x3c0a8, 0xffffffff, 0x00090008,
1089         0x3c000, 0xffffffff, 0x96e00200,
1090         0x8708, 0xffffffff, 0x00900100,
1091         0xc424, 0xffffffff, 0x0020003f,
1092         0x38, 0xffffffff, 0x0140001c,
1093         0x3c, 0x000f0000, 0x000f0000,
1094         0x220, 0xffffffff, 0xC060000C,
1095         0x224, 0xc0000fff, 0x00000100,
1096         0xf90, 0xffffffff, 0x00000100,
1097         0xf98, 0x00000101, 0x00000000,
1098         0x20a8, 0xffffffff, 0x00000104,
1099         0x55e4, 0xff000fff, 0x00000100,
1100         0x30cc, 0xc0000fff, 0x00000104,
1101         0xc1e4, 0x00000001, 0x00000001,
1102         0xd00c, 0xff000ff0, 0x00000100,
1103         0xd80c, 0xff000ff0, 0x00000100
1104 };
1105
1106 static const u32 spectre_golden_spm_registers[] =
1107 {
1108         0x30800, 0xe0ffffff, 0xe0000000
1109 };
1110
1111 static const u32 spectre_golden_common_registers[] =
1112 {
1113         0xc770, 0xffffffff, 0x00000800,
1114         0xc774, 0xffffffff, 0x00000800,
1115         0xc798, 0xffffffff, 0x00007fbf,
1116         0xc79c, 0xffffffff, 0x00007faf
1117 };
1118
1119 static const u32 spectre_golden_registers[] =
1120 {
1121         0x3c000, 0xffff1fff, 0x96940200,
1122         0x3c00c, 0xffff0001, 0xff000000,
1123         0x3c200, 0xfffc0fff, 0x00000100,
1124         0x6ed8, 0x00010101, 0x00010000,
1125         0x9834, 0xf00fffff, 0x00000400,
1126         0x9838, 0xfffffffc, 0x00020200,
1127         0x5bb0, 0x000000f0, 0x00000070,
1128         0x5bc0, 0xf0311fff, 0x80300000,
1129         0x98f8, 0x73773777, 0x12010001,
1130         0x9b7c, 0x00ff0000, 0x00fc0000,
1131         0x2f48, 0x73773777, 0x12010001,
1132         0x8a14, 0xf000003f, 0x00000007,
1133         0x8b24, 0xffffffff, 0x00ffffff,
1134         0x28350, 0x3f3f3fff, 0x00000082,
1135         0x28354, 0x0000003f, 0x00000000,
1136         0x3e78, 0x00000001, 0x00000002,
1137         0x913c, 0xffff03df, 0x00000004,
1138         0xc768, 0x00000008, 0x00000008,
1139         0x8c00, 0x000008ff, 0x00000800,
1140         0x9508, 0x00010000, 0x00010000,
1141         0xac0c, 0xffffffff, 0x54763210,
1142         0x214f8, 0x01ff01ff, 0x00000002,
1143         0x21498, 0x007ff800, 0x00200000,
1144         0x2015c, 0xffffffff, 0x00000f40,
1145         0x30934, 0xffffffff, 0x00000001
1146 };
1147
1148 static const u32 spectre_mgcg_cgcg_init[] =
1149 {
1150         0xc420, 0xffffffff, 0xfffffffc,
1151         0x30800, 0xffffffff, 0xe0000000,
1152         0x3c2a0, 0xffffffff, 0x00000100,
1153         0x3c208, 0xffffffff, 0x00000100,
1154         0x3c2c0, 0xffffffff, 0x00000100,
1155         0x3c2c8, 0xffffffff, 0x00000100,
1156         0x3c2c4, 0xffffffff, 0x00000100,
1157         0x55e4, 0xffffffff, 0x00600100,
1158         0x3c280, 0xffffffff, 0x00000100,
1159         0x3c214, 0xffffffff, 0x06000100,
1160         0x3c220, 0xffffffff, 0x00000100,
1161         0x3c218, 0xffffffff, 0x06000100,
1162         0x3c204, 0xffffffff, 0x00000100,
1163         0x3c2e0, 0xffffffff, 0x00000100,
1164         0x3c224, 0xffffffff, 0x00000100,
1165         0x3c200, 0xffffffff, 0x00000100,
1166         0x3c230, 0xffffffff, 0x00000100,
1167         0x3c234, 0xffffffff, 0x00000100,
1168         0x3c250, 0xffffffff, 0x00000100,
1169         0x3c254, 0xffffffff, 0x00000100,
1170         0x3c258, 0xffffffff, 0x00000100,
1171         0x3c25c, 0xffffffff, 0x00000100,
1172         0x3c260, 0xffffffff, 0x00000100,
1173         0x3c27c, 0xffffffff, 0x00000100,
1174         0x3c278, 0xffffffff, 0x00000100,
1175         0x3c210, 0xffffffff, 0x06000100,
1176         0x3c290, 0xffffffff, 0x00000100,
1177         0x3c274, 0xffffffff, 0x00000100,
1178         0x3c2b4, 0xffffffff, 0x00000100,
1179         0x3c2b0, 0xffffffff, 0x00000100,
1180         0x3c270, 0xffffffff, 0x00000100,
1181         0x30800, 0xffffffff, 0xe0000000,
1182         0x3c020, 0xffffffff, 0x00010000,
1183         0x3c024, 0xffffffff, 0x00030002,
1184         0x3c028, 0xffffffff, 0x00040007,
1185         0x3c02c, 0xffffffff, 0x00060005,
1186         0x3c030, 0xffffffff, 0x00090008,
1187         0x3c034, 0xffffffff, 0x00010000,
1188         0x3c038, 0xffffffff, 0x00030002,
1189         0x3c03c, 0xffffffff, 0x00040007,
1190         0x3c040, 0xffffffff, 0x00060005,
1191         0x3c044, 0xffffffff, 0x00090008,
1192         0x3c048, 0xffffffff, 0x00010000,
1193         0x3c04c, 0xffffffff, 0x00030002,
1194         0x3c050, 0xffffffff, 0x00040007,
1195         0x3c054, 0xffffffff, 0x00060005,
1196         0x3c058, 0xffffffff, 0x00090008,
1197         0x3c05c, 0xffffffff, 0x00010000,
1198         0x3c060, 0xffffffff, 0x00030002,
1199         0x3c064, 0xffffffff, 0x00040007,
1200         0x3c068, 0xffffffff, 0x00060005,
1201         0x3c06c, 0xffffffff, 0x00090008,
1202         0x3c070, 0xffffffff, 0x00010000,
1203         0x3c074, 0xffffffff, 0x00030002,
1204         0x3c078, 0xffffffff, 0x00040007,
1205         0x3c07c, 0xffffffff, 0x00060005,
1206         0x3c080, 0xffffffff, 0x00090008,
1207         0x3c084, 0xffffffff, 0x00010000,
1208         0x3c088, 0xffffffff, 0x00030002,
1209         0x3c08c, 0xffffffff, 0x00040007,
1210         0x3c090, 0xffffffff, 0x00060005,
1211         0x3c094, 0xffffffff, 0x00090008,
1212         0x3c098, 0xffffffff, 0x00010000,
1213         0x3c09c, 0xffffffff, 0x00030002,
1214         0x3c0a0, 0xffffffff, 0x00040007,
1215         0x3c0a4, 0xffffffff, 0x00060005,
1216         0x3c0a8, 0xffffffff, 0x00090008,
1217         0x3c0ac, 0xffffffff, 0x00010000,
1218         0x3c0b0, 0xffffffff, 0x00030002,
1219         0x3c0b4, 0xffffffff, 0x00040007,
1220         0x3c0b8, 0xffffffff, 0x00060005,
1221         0x3c0bc, 0xffffffff, 0x00090008,
1222         0x3c000, 0xffffffff, 0x96e00200,
1223         0x8708, 0xffffffff, 0x00900100,
1224         0xc424, 0xffffffff, 0x0020003f,
1225         0x38, 0xffffffff, 0x0140001c,
1226         0x3c, 0x000f0000, 0x000f0000,
1227         0x220, 0xffffffff, 0xC060000C,
1228         0x224, 0xc0000fff, 0x00000100,
1229         0xf90, 0xffffffff, 0x00000100,
1230         0xf98, 0x00000101, 0x00000000,
1231         0x20a8, 0xffffffff, 0x00000104,
1232         0x55e4, 0xff000fff, 0x00000100,
1233         0x30cc, 0xc0000fff, 0x00000104,
1234         0xc1e4, 0x00000001, 0x00000001,
1235         0xd00c, 0xff000ff0, 0x00000100,
1236         0xd80c, 0xff000ff0, 0x00000100
1237 };
1238
1239 static const u32 kalindi_golden_spm_registers[] =
1240 {
1241         0x30800, 0xe0ffffff, 0xe0000000
1242 };
1243
1244 static const u32 kalindi_golden_common_registers[] =
1245 {
1246         0xc770, 0xffffffff, 0x00000800,
1247         0xc774, 0xffffffff, 0x00000800,
1248         0xc798, 0xffffffff, 0x00007fbf,
1249         0xc79c, 0xffffffff, 0x00007faf
1250 };
1251
1252 static const u32 kalindi_golden_registers[] =
1253 {
1254         0x3c000, 0xffffdfff, 0x6e944040,
1255         0x55e4, 0xff607fff, 0xfc000100,
1256         0x3c220, 0xff000fff, 0x00000100,
1257         0x3c224, 0xff000fff, 0x00000100,
1258         0x3c200, 0xfffc0fff, 0x00000100,
1259         0x6ed8, 0x00010101, 0x00010000,
1260         0x9830, 0xffffffff, 0x00000000,
1261         0x9834, 0xf00fffff, 0x00000400,
1262         0x5bb0, 0x000000f0, 0x00000070,
1263         0x5bc0, 0xf0311fff, 0x80300000,
1264         0x98f8, 0x73773777, 0x12010001,
1265         0x98fc, 0xffffffff, 0x00000010,
1266         0x9b7c, 0x00ff0000, 0x00fc0000,
1267         0x8030, 0x00001f0f, 0x0000100a,
1268         0x2f48, 0x73773777, 0x12010001,
1269         0x2408, 0x000fffff, 0x000c007f,
1270         0x8a14, 0xf000003f, 0x00000007,
1271         0x8b24, 0x3fff3fff, 0x00ffcfff,
1272         0x30a04, 0x0000ff0f, 0x00000000,
1273         0x28a4c, 0x07ffffff, 0x06000000,
1274         0x4d8, 0x00000fff, 0x00000100,
1275         0x3e78, 0x00000001, 0x00000002,
1276         0xc768, 0x00000008, 0x00000008,
1277         0x8c00, 0x000000ff, 0x00000003,
1278         0x214f8, 0x01ff01ff, 0x00000002,
1279         0x21498, 0x007ff800, 0x00200000,
1280         0x2015c, 0xffffffff, 0x00000f40,
1281         0x88c4, 0x001f3ae3, 0x00000082,
1282         0x88d4, 0x0000001f, 0x00000010,
1283         0x30934, 0xffffffff, 0x00000000
1284 };
1285
1286 static const u32 kalindi_mgcg_cgcg_init[] =
1287 {
1288         0xc420, 0xffffffff, 0xfffffffc,
1289         0x30800, 0xffffffff, 0xe0000000,
1290         0x3c2a0, 0xffffffff, 0x00000100,
1291         0x3c208, 0xffffffff, 0x00000100,
1292         0x3c2c0, 0xffffffff, 0x00000100,
1293         0x3c2c8, 0xffffffff, 0x00000100,
1294         0x3c2c4, 0xffffffff, 0x00000100,
1295         0x55e4, 0xffffffff, 0x00600100,
1296         0x3c280, 0xffffffff, 0x00000100,
1297         0x3c214, 0xffffffff, 0x06000100,
1298         0x3c220, 0xffffffff, 0x00000100,
1299         0x3c218, 0xffffffff, 0x06000100,
1300         0x3c204, 0xffffffff, 0x00000100,
1301         0x3c2e0, 0xffffffff, 0x00000100,
1302         0x3c224, 0xffffffff, 0x00000100,
1303         0x3c200, 0xffffffff, 0x00000100,
1304         0x3c230, 0xffffffff, 0x00000100,
1305         0x3c234, 0xffffffff, 0x00000100,
1306         0x3c250, 0xffffffff, 0x00000100,
1307         0x3c254, 0xffffffff, 0x00000100,
1308         0x3c258, 0xffffffff, 0x00000100,
1309         0x3c25c, 0xffffffff, 0x00000100,
1310         0x3c260, 0xffffffff, 0x00000100,
1311         0x3c27c, 0xffffffff, 0x00000100,
1312         0x3c278, 0xffffffff, 0x00000100,
1313         0x3c210, 0xffffffff, 0x06000100,
1314         0x3c290, 0xffffffff, 0x00000100,
1315         0x3c274, 0xffffffff, 0x00000100,
1316         0x3c2b4, 0xffffffff, 0x00000100,
1317         0x3c2b0, 0xffffffff, 0x00000100,
1318         0x3c270, 0xffffffff, 0x00000100,
1319         0x30800, 0xffffffff, 0xe0000000,
1320         0x3c020, 0xffffffff, 0x00010000,
1321         0x3c024, 0xffffffff, 0x00030002,
1322         0x3c028, 0xffffffff, 0x00040007,
1323         0x3c02c, 0xffffffff, 0x00060005,
1324         0x3c030, 0xffffffff, 0x00090008,
1325         0x3c034, 0xffffffff, 0x00010000,
1326         0x3c038, 0xffffffff, 0x00030002,
1327         0x3c03c, 0xffffffff, 0x00040007,
1328         0x3c040, 0xffffffff, 0x00060005,
1329         0x3c044, 0xffffffff, 0x00090008,
1330         0x3c000, 0xffffffff, 0x96e00200,
1331         0x8708, 0xffffffff, 0x00900100,
1332         0xc424, 0xffffffff, 0x0020003f,
1333         0x38, 0xffffffff, 0x0140001c,
1334         0x3c, 0x000f0000, 0x000f0000,
1335         0x220, 0xffffffff, 0xC060000C,
1336         0x224, 0xc0000fff, 0x00000100,
1337         0x20a8, 0xffffffff, 0x00000104,
1338         0x55e4, 0xff000fff, 0x00000100,
1339         0x30cc, 0xc0000fff, 0x00000104,
1340         0xc1e4, 0x00000001, 0x00000001,
1341         0xd00c, 0xff000ff0, 0x00000100,
1342         0xd80c, 0xff000ff0, 0x00000100
1343 };
1344
1345 static const u32 hawaii_golden_spm_registers[] =
1346 {
1347         0x30800, 0xe0ffffff, 0xe0000000
1348 };
1349
1350 static const u32 hawaii_golden_common_registers[] =
1351 {
1352         0x30800, 0xffffffff, 0xe0000000,
1353         0x28350, 0xffffffff, 0x3a00161a,
1354         0x28354, 0xffffffff, 0x0000002e,
1355         0x9a10, 0xffffffff, 0x00018208,
1356         0x98f8, 0xffffffff, 0x12011003
1357 };
1358
1359 static const u32 hawaii_golden_registers[] =
1360 {
1361         0x3354, 0x00000333, 0x00000333,
1362         0x9a10, 0x00010000, 0x00058208,
1363         0x9830, 0xffffffff, 0x00000000,
1364         0x9834, 0xf00fffff, 0x00000400,
1365         0x9838, 0x0002021c, 0x00020200,
1366         0xc78, 0x00000080, 0x00000000,
1367         0x5bb0, 0x000000f0, 0x00000070,
1368         0x5bc0, 0xf0311fff, 0x80300000,
1369         0x350c, 0x00810000, 0x408af000,
1370         0x7030, 0x31000111, 0x00000011,
1371         0x2f48, 0x73773777, 0x12010001,
1372         0x2120, 0x0000007f, 0x0000001b,
1373         0x21dc, 0x00007fb6, 0x00002191,
1374         0x3628, 0x0000003f, 0x0000000a,
1375         0x362c, 0x0000003f, 0x0000000a,
1376         0x2ae4, 0x00073ffe, 0x000022a2,
1377         0x240c, 0x000007ff, 0x00000000,
1378         0x8bf0, 0x00002001, 0x00000001,
1379         0x8b24, 0xffffffff, 0x00ffffff,
1380         0x30a04, 0x0000ff0f, 0x00000000,
1381         0x28a4c, 0x07ffffff, 0x06000000,
1382         0x3e78, 0x00000001, 0x00000002,
1383         0xc768, 0x00000008, 0x00000008,
1384         0xc770, 0x00000f00, 0x00000800,
1385         0xc774, 0x00000f00, 0x00000800,
1386         0xc798, 0x00ffffff, 0x00ff7fbf,
1387         0xc79c, 0x00ffffff, 0x00ff7faf,
1388         0x8c00, 0x000000ff, 0x00000800,
1389         0xe40, 0x00001fff, 0x00001fff,
1390         0x9060, 0x0000007f, 0x00000020,
1391         0x9508, 0x00010000, 0x00010000,
1392         0xae00, 0x00100000, 0x000ff07c,
1393         0xac14, 0x000003ff, 0x0000000f,
1394         0xac10, 0xffffffff, 0x7564fdec,
1395         0xac0c, 0xffffffff, 0x3120b9a8,
1396         0xac08, 0x20000000, 0x0f9c0000
1397 };
1398
1399 static const u32 hawaii_mgcg_cgcg_init[] =
1400 {
1401         0xc420, 0xffffffff, 0xfffffffd,
1402         0x30800, 0xffffffff, 0xe0000000,
1403         0x3c2a0, 0xffffffff, 0x00000100,
1404         0x3c208, 0xffffffff, 0x00000100,
1405         0x3c2c0, 0xffffffff, 0x00000100,
1406         0x3c2c8, 0xffffffff, 0x00000100,
1407         0x3c2c4, 0xffffffff, 0x00000100,
1408         0x55e4, 0xffffffff, 0x00200100,
1409         0x3c280, 0xffffffff, 0x00000100,
1410         0x3c214, 0xffffffff, 0x06000100,
1411         0x3c220, 0xffffffff, 0x00000100,
1412         0x3c218, 0xffffffff, 0x06000100,
1413         0x3c204, 0xffffffff, 0x00000100,
1414         0x3c2e0, 0xffffffff, 0x00000100,
1415         0x3c224, 0xffffffff, 0x00000100,
1416         0x3c200, 0xffffffff, 0x00000100,
1417         0x3c230, 0xffffffff, 0x00000100,
1418         0x3c234, 0xffffffff, 0x00000100,
1419         0x3c250, 0xffffffff, 0x00000100,
1420         0x3c254, 0xffffffff, 0x00000100,
1421         0x3c258, 0xffffffff, 0x00000100,
1422         0x3c25c, 0xffffffff, 0x00000100,
1423         0x3c260, 0xffffffff, 0x00000100,
1424         0x3c27c, 0xffffffff, 0x00000100,
1425         0x3c278, 0xffffffff, 0x00000100,
1426         0x3c210, 0xffffffff, 0x06000100,
1427         0x3c290, 0xffffffff, 0x00000100,
1428         0x3c274, 0xffffffff, 0x00000100,
1429         0x3c2b4, 0xffffffff, 0x00000100,
1430         0x3c2b0, 0xffffffff, 0x00000100,
1431         0x3c270, 0xffffffff, 0x00000100,
1432         0x30800, 0xffffffff, 0xe0000000,
1433         0x3c020, 0xffffffff, 0x00010000,
1434         0x3c024, 0xffffffff, 0x00030002,
1435         0x3c028, 0xffffffff, 0x00040007,
1436         0x3c02c, 0xffffffff, 0x00060005,
1437         0x3c030, 0xffffffff, 0x00090008,
1438         0x3c034, 0xffffffff, 0x00010000,
1439         0x3c038, 0xffffffff, 0x00030002,
1440         0x3c03c, 0xffffffff, 0x00040007,
1441         0x3c040, 0xffffffff, 0x00060005,
1442         0x3c044, 0xffffffff, 0x00090008,
1443         0x3c048, 0xffffffff, 0x00010000,
1444         0x3c04c, 0xffffffff, 0x00030002,
1445         0x3c050, 0xffffffff, 0x00040007,
1446         0x3c054, 0xffffffff, 0x00060005,
1447         0x3c058, 0xffffffff, 0x00090008,
1448         0x3c05c, 0xffffffff, 0x00010000,
1449         0x3c060, 0xffffffff, 0x00030002,
1450         0x3c064, 0xffffffff, 0x00040007,
1451         0x3c068, 0xffffffff, 0x00060005,
1452         0x3c06c, 0xffffffff, 0x00090008,
1453         0x3c070, 0xffffffff, 0x00010000,
1454         0x3c074, 0xffffffff, 0x00030002,
1455         0x3c078, 0xffffffff, 0x00040007,
1456         0x3c07c, 0xffffffff, 0x00060005,
1457         0x3c080, 0xffffffff, 0x00090008,
1458         0x3c084, 0xffffffff, 0x00010000,
1459         0x3c088, 0xffffffff, 0x00030002,
1460         0x3c08c, 0xffffffff, 0x00040007,
1461         0x3c090, 0xffffffff, 0x00060005,
1462         0x3c094, 0xffffffff, 0x00090008,
1463         0x3c098, 0xffffffff, 0x00010000,
1464         0x3c09c, 0xffffffff, 0x00030002,
1465         0x3c0a0, 0xffffffff, 0x00040007,
1466         0x3c0a4, 0xffffffff, 0x00060005,
1467         0x3c0a8, 0xffffffff, 0x00090008,
1468         0x3c0ac, 0xffffffff, 0x00010000,
1469         0x3c0b0, 0xffffffff, 0x00030002,
1470         0x3c0b4, 0xffffffff, 0x00040007,
1471         0x3c0b8, 0xffffffff, 0x00060005,
1472         0x3c0bc, 0xffffffff, 0x00090008,
1473         0x3c0c0, 0xffffffff, 0x00010000,
1474         0x3c0c4, 0xffffffff, 0x00030002,
1475         0x3c0c8, 0xffffffff, 0x00040007,
1476         0x3c0cc, 0xffffffff, 0x00060005,
1477         0x3c0d0, 0xffffffff, 0x00090008,
1478         0x3c0d4, 0xffffffff, 0x00010000,
1479         0x3c0d8, 0xffffffff, 0x00030002,
1480         0x3c0dc, 0xffffffff, 0x00040007,
1481         0x3c0e0, 0xffffffff, 0x00060005,
1482         0x3c0e4, 0xffffffff, 0x00090008,
1483         0x3c0e8, 0xffffffff, 0x00010000,
1484         0x3c0ec, 0xffffffff, 0x00030002,
1485         0x3c0f0, 0xffffffff, 0x00040007,
1486         0x3c0f4, 0xffffffff, 0x00060005,
1487         0x3c0f8, 0xffffffff, 0x00090008,
1488         0xc318, 0xffffffff, 0x00020200,
1489         0x3350, 0xffffffff, 0x00000200,
1490         0x15c0, 0xffffffff, 0x00000400,
1491         0x55e8, 0xffffffff, 0x00000000,
1492         0x2f50, 0xffffffff, 0x00000902,
1493         0x3c000, 0xffffffff, 0x96940200,
1494         0x8708, 0xffffffff, 0x00900100,
1495         0xc424, 0xffffffff, 0x0020003f,
1496         0x38, 0xffffffff, 0x0140001c,
1497         0x3c, 0x000f0000, 0x000f0000,
1498         0x220, 0xffffffff, 0xc060000c,
1499         0x224, 0xc0000fff, 0x00000100,
1500         0xf90, 0xffffffff, 0x00000100,
1501         0xf98, 0x00000101, 0x00000000,
1502         0x20a8, 0xffffffff, 0x00000104,
1503         0x55e4, 0xff000fff, 0x00000100,
1504         0x30cc, 0xc0000fff, 0x00000104,
1505         0xc1e4, 0x00000001, 0x00000001,
1506         0xd00c, 0xff000ff0, 0x00000100,
1507         0xd80c, 0xff000ff0, 0x00000100
1508 };
1509
1510 static const u32 godavari_golden_registers[] =
1511 {
1512         0x55e4, 0xff607fff, 0xfc000100,
1513         0x6ed8, 0x00010101, 0x00010000,
1514         0x9830, 0xffffffff, 0x00000000,
1515         0x98302, 0xf00fffff, 0x00000400,
1516         0x6130, 0xffffffff, 0x00010000,
1517         0x5bb0, 0x000000f0, 0x00000070,
1518         0x5bc0, 0xf0311fff, 0x80300000,
1519         0x98f8, 0x73773777, 0x12010001,
1520         0x98fc, 0xffffffff, 0x00000010,
1521         0x8030, 0x00001f0f, 0x0000100a,
1522         0x2f48, 0x73773777, 0x12010001,
1523         0x2408, 0x000fffff, 0x000c007f,
1524         0x8a14, 0xf000003f, 0x00000007,
1525         0x8b24, 0xffffffff, 0x00ff0fff,
1526         0x30a04, 0x0000ff0f, 0x00000000,
1527         0x28a4c, 0x07ffffff, 0x06000000,
1528         0x4d8, 0x00000fff, 0x00000100,
1529         0xd014, 0x00010000, 0x00810001,
1530         0xd814, 0x00010000, 0x00810001,
1531         0x3e78, 0x00000001, 0x00000002,
1532         0xc768, 0x00000008, 0x00000008,
1533         0xc770, 0x00000f00, 0x00000800,
1534         0xc774, 0x00000f00, 0x00000800,
1535         0xc798, 0x00ffffff, 0x00ff7fbf,
1536         0xc79c, 0x00ffffff, 0x00ff7faf,
1537         0x8c00, 0x000000ff, 0x00000001,
1538         0x214f8, 0x01ff01ff, 0x00000002,
1539         0x21498, 0x007ff800, 0x00200000,
1540         0x2015c, 0xffffffff, 0x00000f40,
1541         0x88c4, 0x001f3ae3, 0x00000082,
1542         0x88d4, 0x0000001f, 0x00000010,
1543         0x30934, 0xffffffff, 0x00000000
1544 };
1545
1546
1547 static void cik_init_golden_registers(struct radeon_device *rdev)
1548 {
1549         switch (rdev->family) {
1550         case CHIP_BONAIRE:
1551                 radeon_program_register_sequence(rdev,
1552                                                  bonaire_mgcg_cgcg_init,
1553                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1554                 radeon_program_register_sequence(rdev,
1555                                                  bonaire_golden_registers,
1556                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1557                 radeon_program_register_sequence(rdev,
1558                                                  bonaire_golden_common_registers,
1559                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1560                 radeon_program_register_sequence(rdev,
1561                                                  bonaire_golden_spm_registers,
1562                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1563                 break;
1564         case CHIP_KABINI:
1565                 radeon_program_register_sequence(rdev,
1566                                                  kalindi_mgcg_cgcg_init,
1567                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1568                 radeon_program_register_sequence(rdev,
1569                                                  kalindi_golden_registers,
1570                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1571                 radeon_program_register_sequence(rdev,
1572                                                  kalindi_golden_common_registers,
1573                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1574                 radeon_program_register_sequence(rdev,
1575                                                  kalindi_golden_spm_registers,
1576                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1577                 break;
1578         case CHIP_MULLINS:
1579                 radeon_program_register_sequence(rdev,
1580                                                  kalindi_mgcg_cgcg_init,
1581                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1582                 radeon_program_register_sequence(rdev,
1583                                                  godavari_golden_registers,
1584                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1585                 radeon_program_register_sequence(rdev,
1586                                                  kalindi_golden_common_registers,
1587                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1588                 radeon_program_register_sequence(rdev,
1589                                                  kalindi_golden_spm_registers,
1590                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1591                 break;
1592         case CHIP_KAVERI:
1593                 radeon_program_register_sequence(rdev,
1594                                                  spectre_mgcg_cgcg_init,
1595                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1596                 radeon_program_register_sequence(rdev,
1597                                                  spectre_golden_registers,
1598                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1599                 radeon_program_register_sequence(rdev,
1600                                                  spectre_golden_common_registers,
1601                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1602                 radeon_program_register_sequence(rdev,
1603                                                  spectre_golden_spm_registers,
1604                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1605                 break;
1606         case CHIP_HAWAII:
1607                 radeon_program_register_sequence(rdev,
1608                                                  hawaii_mgcg_cgcg_init,
1609                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1610                 radeon_program_register_sequence(rdev,
1611                                                  hawaii_golden_registers,
1612                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1613                 radeon_program_register_sequence(rdev,
1614                                                  hawaii_golden_common_registers,
1615                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1616                 radeon_program_register_sequence(rdev,
1617                                                  hawaii_golden_spm_registers,
1618                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1619                 break;
1620         default:
1621                 break;
1622         }
1623 }
1624
1625 /**
1626  * cik_get_xclk - get the xclk
1627  *
1628  * @rdev: radeon_device pointer
1629  *
1630  * Returns the reference clock used by the gfx engine
1631  * (CIK).
1632  */
1633 u32 cik_get_xclk(struct radeon_device *rdev)
1634 {
1635         u32 reference_clock = rdev->clock.spll.reference_freq;
1636
1637         if (rdev->flags & RADEON_IS_IGP) {
1638                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1639                         return reference_clock / 2;
1640         } else {
1641                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1642                         return reference_clock / 4;
1643         }
1644         return reference_clock;
1645 }
1646
1647 /**
1648  * cik_mm_rdoorbell - read a doorbell dword
1649  *
1650  * @rdev: radeon_device pointer
1651  * @index: doorbell index
1652  *
1653  * Returns the value in the doorbell aperture at the
1654  * requested doorbell index (CIK).
1655  */
1656 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1657 {
1658         if (index < rdev->doorbell.num_doorbells) {
1659                 return readl(rdev->doorbell.ptr + index);
1660         } else {
1661                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1662                 return 0;
1663         }
1664 }
1665
1666 /**
1667  * cik_mm_wdoorbell - write a doorbell dword
1668  *
1669  * @rdev: radeon_device pointer
1670  * @index: doorbell index
1671  * @v: value to write
1672  *
1673  * Writes @v to the doorbell aperture at the
1674  * requested doorbell index (CIK).
1675  */
1676 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1677 {
1678         if (index < rdev->doorbell.num_doorbells) {
1679                 writel(v, rdev->doorbell.ptr + index);
1680         } else {
1681                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1682         }
1683 }
1684
1685 #define BONAIRE_IO_MC_REGS_SIZE 36
1686
1687 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1688 {
1689         {0x00000070, 0x04400000},
1690         {0x00000071, 0x80c01803},
1691         {0x00000072, 0x00004004},
1692         {0x00000073, 0x00000100},
1693         {0x00000074, 0x00ff0000},
1694         {0x00000075, 0x34000000},
1695         {0x00000076, 0x08000014},
1696         {0x00000077, 0x00cc08ec},
1697         {0x00000078, 0x00000400},
1698         {0x00000079, 0x00000000},
1699         {0x0000007a, 0x04090000},
1700         {0x0000007c, 0x00000000},
1701         {0x0000007e, 0x4408a8e8},
1702         {0x0000007f, 0x00000304},
1703         {0x00000080, 0x00000000},
1704         {0x00000082, 0x00000001},
1705         {0x00000083, 0x00000002},
1706         {0x00000084, 0xf3e4f400},
1707         {0x00000085, 0x052024e3},
1708         {0x00000087, 0x00000000},
1709         {0x00000088, 0x01000000},
1710         {0x0000008a, 0x1c0a0000},
1711         {0x0000008b, 0xff010000},
1712         {0x0000008d, 0xffffefff},
1713         {0x0000008e, 0xfff3efff},
1714         {0x0000008f, 0xfff3efbf},
1715         {0x00000092, 0xf7ffffff},
1716         {0x00000093, 0xffffff7f},
1717         {0x00000095, 0x00101101},
1718         {0x00000096, 0x00000fff},
1719         {0x00000097, 0x00116fff},
1720         {0x00000098, 0x60010000},
1721         {0x00000099, 0x10010000},
1722         {0x0000009a, 0x00006000},
1723         {0x0000009b, 0x00001000},
1724         {0x0000009f, 0x00b48000}
1725 };
1726
1727 #define HAWAII_IO_MC_REGS_SIZE 22
1728
1729 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1730 {
1731         {0x0000007d, 0x40000000},
1732         {0x0000007e, 0x40180304},
1733         {0x0000007f, 0x0000ff00},
1734         {0x00000081, 0x00000000},
1735         {0x00000083, 0x00000800},
1736         {0x00000086, 0x00000000},
1737         {0x00000087, 0x00000100},
1738         {0x00000088, 0x00020100},
1739         {0x00000089, 0x00000000},
1740         {0x0000008b, 0x00040000},
1741         {0x0000008c, 0x00000100},
1742         {0x0000008e, 0xff010000},
1743         {0x00000090, 0xffffefff},
1744         {0x00000091, 0xfff3efff},
1745         {0x00000092, 0xfff3efbf},
1746         {0x00000093, 0xf7ffffff},
1747         {0x00000094, 0xffffff7f},
1748         {0x00000095, 0x00000fff},
1749         {0x00000096, 0x00116fff},
1750         {0x00000097, 0x60010000},
1751         {0x00000098, 0x10010000},
1752         {0x0000009f, 0x00c79000}
1753 };
1754
1755
1756 /**
1757  * cik_srbm_select - select specific register instances
1758  *
1759  * @rdev: radeon_device pointer
1760  * @me: selected ME (micro engine)
1761  * @pipe: pipe
1762  * @queue: queue
1763  * @vmid: VMID
1764  *
1765  * Switches the currently active registers instances.  Some
1766  * registers are instanced per VMID, others are instanced per
1767  * me/pipe/queue combination.
1768  */
1769 static void cik_srbm_select(struct radeon_device *rdev,
1770                             u32 me, u32 pipe, u32 queue, u32 vmid)
1771 {
1772         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1773                              MEID(me & 0x3) |
1774                              VMID(vmid & 0xf) |
1775                              QUEUEID(queue & 0x7));
1776         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1777 }
1778
1779 /* ucode loading */
1780 /**
1781  * ci_mc_load_microcode - load MC ucode into the hw
1782  *
1783  * @rdev: radeon_device pointer
1784  *
1785  * Load the GDDR MC ucode into the hw (CIK).
1786  * Returns 0 on success, error on failure.
1787  */
1788 int ci_mc_load_microcode(struct radeon_device *rdev)
1789 {
1790         const __be32 *fw_data = NULL;
1791         const __le32 *new_fw_data = NULL;
1792         u32 running, tmp;
1793         u32 *io_mc_regs = NULL;
1794         const __le32 *new_io_mc_regs = NULL;
1795         int i, regs_size, ucode_size;
1796
1797         if (!rdev->mc_fw)
1798                 return -EINVAL;
1799
1800         if (rdev->new_fw) {
1801                 const struct mc_firmware_header_v1_0 *hdr =
1802                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1803
1804                 radeon_ucode_print_mc_hdr(&hdr->header);
1805
1806                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1807                 new_io_mc_regs = (const __le32 *)
1808                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1809                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1810                 new_fw_data = (const __le32 *)
1811                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1812         } else {
1813                 ucode_size = rdev->mc_fw->size / 4;
1814
1815                 switch (rdev->family) {
1816                 case CHIP_BONAIRE:
1817                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1818                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1819                         break;
1820                 case CHIP_HAWAII:
1821                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1822                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1823                         break;
1824                 default:
1825                         return -EINVAL;
1826                 }
1827                 fw_data = (const __be32 *)rdev->mc_fw->data;
1828         }
1829
1830         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1831
1832         if (running == 0) {
1833                 /* reset the engine and set to writable */
1834                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1835                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1836
1837                 /* load mc io regs */
1838                 for (i = 0; i < regs_size; i++) {
1839                         if (rdev->new_fw) {
1840                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1841                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1842                         } else {
1843                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1844                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1845                         }
1846                 }
1847
1848                 tmp = RREG32(MC_SEQ_MISC0);
1849                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1850                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1851                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1852                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1853                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1854                 }
1855
1856                 /* load the MC ucode */
1857                 for (i = 0; i < ucode_size; i++) {
1858                         if (rdev->new_fw)
1859                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1860                         else
1861                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1862                 }
1863
1864                 /* put the engine back into the active state */
1865                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1866                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1867                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1868
1869                 /* wait for training to complete */
1870                 for (i = 0; i < rdev->usec_timeout; i++) {
1871                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1872                                 break;
1873                         udelay(1);
1874                 }
1875                 for (i = 0; i < rdev->usec_timeout; i++) {
1876                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1877                                 break;
1878                         udelay(1);
1879                 }
1880         }
1881
1882         return 0;
1883 }
1884
1885 /**
1886  * cik_init_microcode - load ucode images from disk
1887  *
1888  * @rdev: radeon_device pointer
1889  *
1890  * Use the firmware interface to load the ucode images into
1891  * the driver (not loaded into hw).
1892  * Returns 0 on success, error on failure.
1893  */
1894 static int cik_init_microcode(struct radeon_device *rdev)
1895 {
1896         const char *chip_name;
1897         const char *new_chip_name;
1898         size_t pfp_req_size, me_req_size, ce_req_size,
1899                 mec_req_size, rlc_req_size, mc_req_size = 0,
1900                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1901         char fw_name[30];
1902         int new_fw = 0;
1903         int err;
1904         int num_fw;
1905         bool new_smc = false;
1906
1907         DRM_DEBUG("\n");
1908
1909         switch (rdev->family) {
1910         case CHIP_BONAIRE:
1911                 chip_name = "BONAIRE";
1912                 if ((rdev->pdev->revision == 0x80) ||
1913                     (rdev->pdev->revision == 0x81) ||
1914                     (rdev->pdev->device == 0x665f))
1915                         new_smc = true;
1916                 new_chip_name = "bonaire";
1917                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1918                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1919                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1920                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1921                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1922                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1923                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1924                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1925                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1926                 num_fw = 8;
1927                 break;
1928         case CHIP_HAWAII:
1929                 chip_name = "HAWAII";
1930                 if (rdev->pdev->revision == 0x80)
1931                         new_smc = true;
1932                 new_chip_name = "hawaii";
1933                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1934                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1935                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1936                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1937                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1938                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1939                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1940                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1941                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1942                 num_fw = 8;
1943                 break;
1944         case CHIP_KAVERI:
1945                 chip_name = "KAVERI";
1946                 new_chip_name = "kaveri";
1947                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1948                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1949                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1950                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1951                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1952                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1953                 num_fw = 7;
1954                 break;
1955         case CHIP_KABINI:
1956                 chip_name = "KABINI";
1957                 new_chip_name = "kabini";
1958                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1959                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1960                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1961                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1962                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1963                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1964                 num_fw = 6;
1965                 break;
1966         case CHIP_MULLINS:
1967                 chip_name = "MULLINS";
1968                 new_chip_name = "mullins";
1969                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1970                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1971                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1972                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1973                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1974                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1975                 num_fw = 6;
1976                 break;
1977         default: BUG();
1978         }
1979
1980         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1981
1982         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1983         err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1984         if (err) {
1985                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1986                 err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1987                 if (err)
1988                         goto out;
1989                 if (rdev->pfp_fw->size != pfp_req_size) {
1990                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
1991                                rdev->pfp_fw->size, fw_name);
1992                         err = -EINVAL;
1993                         goto out;
1994                 }
1995         } else {
1996                 err = radeon_ucode_validate(rdev->pfp_fw);
1997                 if (err) {
1998                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
1999                                fw_name);
2000                         goto out;
2001                 } else {
2002                         new_fw++;
2003                 }
2004         }
2005
2006         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2007         err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2008         if (err) {
2009                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2010                 err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2011                 if (err)
2012                         goto out;
2013                 if (rdev->me_fw->size != me_req_size) {
2014                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2015                                rdev->me_fw->size, fw_name);
2016                         err = -EINVAL;
2017                 }
2018         } else {
2019                 err = radeon_ucode_validate(rdev->me_fw);
2020                 if (err) {
2021                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2022                                fw_name);
2023                         goto out;
2024                 } else {
2025                         new_fw++;
2026                 }
2027         }
2028
2029         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2030         err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2031         if (err) {
2032                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2033                 err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2034                 if (err)
2035                         goto out;
2036                 if (rdev->ce_fw->size != ce_req_size) {
2037                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2038                                rdev->ce_fw->size, fw_name);
2039                         err = -EINVAL;
2040                 }
2041         } else {
2042                 err = radeon_ucode_validate(rdev->ce_fw);
2043                 if (err) {
2044                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2045                                fw_name);
2046                         goto out;
2047                 } else {
2048                         new_fw++;
2049                 }
2050         }
2051
2052         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2053         err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2054         if (err) {
2055                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2056                 err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2057                 if (err)
2058                         goto out;
2059                 if (rdev->mec_fw->size != mec_req_size) {
2060                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2061                                rdev->mec_fw->size, fw_name);
2062                         err = -EINVAL;
2063                 }
2064         } else {
2065                 err = radeon_ucode_validate(rdev->mec_fw);
2066                 if (err) {
2067                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2068                                fw_name);
2069                         goto out;
2070                 } else {
2071                         new_fw++;
2072                 }
2073         }
2074
2075         if (rdev->family == CHIP_KAVERI) {
2076                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2077                 err = reject_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2078                 if (err) {
2079                         goto out;
2080                 } else {
2081                         err = radeon_ucode_validate(rdev->mec2_fw);
2082                         if (err) {
2083                                 goto out;
2084                         } else {
2085                                 new_fw++;
2086                         }
2087                 }
2088         }
2089
2090         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2091         err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2092         if (err) {
2093                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2094                 err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2095                 if (err)
2096                         goto out;
2097                 if (rdev->rlc_fw->size != rlc_req_size) {
2098                         pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2099                                rdev->rlc_fw->size, fw_name);
2100                         err = -EINVAL;
2101                 }
2102         } else {
2103                 err = radeon_ucode_validate(rdev->rlc_fw);
2104                 if (err) {
2105                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2106                                fw_name);
2107                         goto out;
2108                 } else {
2109                         new_fw++;
2110                 }
2111         }
2112
2113         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2114         err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2115         if (err) {
2116                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2117                 err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2118                 if (err)
2119                         goto out;
2120                 if (rdev->sdma_fw->size != sdma_req_size) {
2121                         pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2122                                rdev->sdma_fw->size, fw_name);
2123                         err = -EINVAL;
2124                 }
2125         } else {
2126                 err = radeon_ucode_validate(rdev->sdma_fw);
2127                 if (err) {
2128                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2129                                fw_name);
2130                         goto out;
2131                 } else {
2132                         new_fw++;
2133                 }
2134         }
2135
2136         /* No SMC, MC ucode on APUs */
2137         if (!(rdev->flags & RADEON_IS_IGP)) {
2138                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2139                 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2140                 if (err) {
2141                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2142                         err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2143                         if (err) {
2144                                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2145                                 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2146                                 if (err)
2147                                         goto out;
2148                         }
2149                         if ((rdev->mc_fw->size != mc_req_size) &&
2150                             (rdev->mc_fw->size != mc2_req_size)){
2151                                 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2152                                        rdev->mc_fw->size, fw_name);
2153                                 err = -EINVAL;
2154                         }
2155                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2156                 } else {
2157                         err = radeon_ucode_validate(rdev->mc_fw);
2158                         if (err) {
2159                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2160                                        fw_name);
2161                                 goto out;
2162                         } else {
2163                                 new_fw++;
2164                         }
2165                 }
2166
2167                 if (new_smc)
2168                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2169                 else
2170                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2171                 err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2172                 if (err) {
2173                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2174                         err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2175                         if (err) {
2176                                 pr_err("smc: error loading firmware \"%s\"\n",
2177                                        fw_name);
2178                                 release_firmware(rdev->smc_fw);
2179                                 rdev->smc_fw = NULL;
2180                                 err = 0;
2181                         } else if (rdev->smc_fw->size != smc_req_size) {
2182                                 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2183                                        rdev->smc_fw->size, fw_name);
2184                                 err = -EINVAL;
2185                         }
2186                 } else {
2187                         err = radeon_ucode_validate(rdev->smc_fw);
2188                         if (err) {
2189                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2190                                        fw_name);
2191                                 goto out;
2192                         } else {
2193                                 new_fw++;
2194                         }
2195                 }
2196         }
2197
2198         if (new_fw == 0) {
2199                 rdev->new_fw = false;
2200         } else if (new_fw < num_fw) {
2201                 pr_err("ci_fw: mixing new and old firmware!\n");
2202                 err = -EINVAL;
2203         } else {
2204                 rdev->new_fw = true;
2205         }
2206
2207 out:
2208         if (err) {
2209                 if (err != -EINVAL)
2210                         pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2211                                fw_name);
2212                 release_firmware(rdev->pfp_fw);
2213                 rdev->pfp_fw = NULL;
2214                 release_firmware(rdev->me_fw);
2215                 rdev->me_fw = NULL;
2216                 release_firmware(rdev->ce_fw);
2217                 rdev->ce_fw = NULL;
2218                 release_firmware(rdev->mec_fw);
2219                 rdev->mec_fw = NULL;
2220                 release_firmware(rdev->mec2_fw);
2221                 rdev->mec2_fw = NULL;
2222                 release_firmware(rdev->rlc_fw);
2223                 rdev->rlc_fw = NULL;
2224                 release_firmware(rdev->sdma_fw);
2225                 rdev->sdma_fw = NULL;
2226                 release_firmware(rdev->mc_fw);
2227                 rdev->mc_fw = NULL;
2228                 release_firmware(rdev->smc_fw);
2229                 rdev->smc_fw = NULL;
2230         }
2231         return err;
2232 }
2233
2234 /*
2235  * Core functions
2236  */
2237 /**
2238  * cik_tiling_mode_table_init - init the hw tiling table
2239  *
2240  * @rdev: radeon_device pointer
2241  *
2242  * Starting with SI, the tiling setup is done globally in a
2243  * set of 32 tiling modes.  Rather than selecting each set of
2244  * parameters per surface as on older asics, we just select
2245  * which index in the tiling table we want to use, and the
2246  * surface uses those parameters (CIK).
2247  */
2248 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2249 {
2250         u32 *tile = rdev->config.cik.tile_mode_array;
2251         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2252         const u32 num_tile_mode_states =
2253                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2254         const u32 num_secondary_tile_mode_states =
2255                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2256         u32 reg_offset, split_equal_to_row_size;
2257         u32 num_pipe_configs;
2258         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2259                 rdev->config.cik.max_shader_engines;
2260
2261         switch (rdev->config.cik.mem_row_size_in_kb) {
2262         case 1:
2263                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2264                 break;
2265         case 2:
2266         default:
2267                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2268                 break;
2269         case 4:
2270                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2271                 break;
2272         }
2273
2274         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2275         if (num_pipe_configs > 8)
2276                 num_pipe_configs = 16;
2277
2278         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2279                 tile[reg_offset] = 0;
2280         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2281                 macrotile[reg_offset] = 0;
2282
2283         switch(num_pipe_configs) {
2284         case 16:
2285                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2286                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2287                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2289                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2291                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2293                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2295                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2297                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2298                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2299                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2301                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2303                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                            TILE_SPLIT(split_equal_to_row_size));
2305                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2306                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2307                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2308                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2309                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2310                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2312                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2313                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2314                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315                            TILE_SPLIT(split_equal_to_row_size));
2316                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2317                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2318                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2319                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2321                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2323                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2325                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2326                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2328                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2329                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2330                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2331                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2333                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2334                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2335                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2336                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2338                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2340                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2341                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2342                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2343                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2345                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2346                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2349                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2351                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2352                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2353                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2355                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2356                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2357                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2358                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2360                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2361                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363
2364                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2366                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2367                            NUM_BANKS(ADDR_SURF_16_BANK));
2368                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2370                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2371                            NUM_BANKS(ADDR_SURF_16_BANK));
2372                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2374                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2375                            NUM_BANKS(ADDR_SURF_16_BANK));
2376                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2378                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2379                            NUM_BANKS(ADDR_SURF_16_BANK));
2380                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2382                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2383                            NUM_BANKS(ADDR_SURF_8_BANK));
2384                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2386                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2387                            NUM_BANKS(ADDR_SURF_4_BANK));
2388                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2390                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2391                            NUM_BANKS(ADDR_SURF_2_BANK));
2392                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2394                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2395                            NUM_BANKS(ADDR_SURF_16_BANK));
2396                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2398                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2399                            NUM_BANKS(ADDR_SURF_16_BANK));
2400                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2402                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2403                             NUM_BANKS(ADDR_SURF_16_BANK));
2404                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2406                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2407                             NUM_BANKS(ADDR_SURF_8_BANK));
2408                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2410                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2411                             NUM_BANKS(ADDR_SURF_4_BANK));
2412                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2414                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2415                             NUM_BANKS(ADDR_SURF_2_BANK));
2416                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2418                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2419                             NUM_BANKS(ADDR_SURF_2_BANK));
2420
2421                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2422                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2423                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2424                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2425                 break;
2426
2427         case 8:
2428                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2430                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2431                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2432                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2434                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2435                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2436                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2438                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2439                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2440                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2441                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2442                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2443                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2444                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2446                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2447                            TILE_SPLIT(split_equal_to_row_size));
2448                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2450                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2451                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2452                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2453                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2454                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2455                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2456                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2457                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2458                            TILE_SPLIT(split_equal_to_row_size));
2459                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2460                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2461                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2462                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2463                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2464                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2465                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2466                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2467                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2468                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2469                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2470                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2471                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2473                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2474                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2475                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2476                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2477                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2478                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2479                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2481                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2483                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2484                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2485                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2486                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2487                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2488                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2489                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2490                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2491                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2492                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2494                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2495                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2496                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2498                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2500                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2501                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2503                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2504                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2506
2507                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510                                 NUM_BANKS(ADDR_SURF_16_BANK));
2511                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2513                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2514                                 NUM_BANKS(ADDR_SURF_16_BANK));
2515                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518                                 NUM_BANKS(ADDR_SURF_16_BANK));
2519                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522                                 NUM_BANKS(ADDR_SURF_16_BANK));
2523                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2526                                 NUM_BANKS(ADDR_SURF_8_BANK));
2527                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2530                                 NUM_BANKS(ADDR_SURF_4_BANK));
2531                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2533                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2534                                 NUM_BANKS(ADDR_SURF_2_BANK));
2535                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2537                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2538                                 NUM_BANKS(ADDR_SURF_16_BANK));
2539                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2542                                 NUM_BANKS(ADDR_SURF_16_BANK));
2543                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2545                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2546                                 NUM_BANKS(ADDR_SURF_16_BANK));
2547                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2549                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2550                                 NUM_BANKS(ADDR_SURF_16_BANK));
2551                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2553                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2554                                 NUM_BANKS(ADDR_SURF_8_BANK));
2555                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2557                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2558                                 NUM_BANKS(ADDR_SURF_4_BANK));
2559                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2561                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2562                                 NUM_BANKS(ADDR_SURF_2_BANK));
2563
2564                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2565                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2566                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2567                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2568                 break;
2569
2570         case 4:
2571                 if (num_rbs == 4) {
2572                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2574                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2575                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2576                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2578                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2579                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2580                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2581                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2582                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2583                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2584                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2586                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2587                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2588                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2590                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2591                            TILE_SPLIT(split_equal_to_row_size));
2592                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2593                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2594                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2595                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2596                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2597                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2598                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2599                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2600                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2601                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2602                            TILE_SPLIT(split_equal_to_row_size));
2603                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2604                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2605                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2606                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2607                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2608                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2609                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2610                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2611                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2612                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2613                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2614                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2615                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2616                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2617                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2618                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2619                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2621                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2622                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2623                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2625                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2627                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2628                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2629                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2630                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2631                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2632                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2633                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2635                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2636                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2637                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2638                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2639                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2640                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2641                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2642                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2643                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2644                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2645                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2646                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2647                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2648                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2649                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650
2651                 } else if (num_rbs < 4) {
2652                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2653                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2654                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2655                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2656                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2658                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2659                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2660                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2663                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2664                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2666                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2667                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2668                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2670                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671                            TILE_SPLIT(split_equal_to_row_size));
2672                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2673                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2674                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2675                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2676                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2677                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2679                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2680                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2681                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2682                            TILE_SPLIT(split_equal_to_row_size));
2683                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2684                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2685                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2686                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2687                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2688                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2690                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2691                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2693                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2694                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2695                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2696                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2697                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2698                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2699                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2700                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2701                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2702                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2703                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2704                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2705                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2706                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2708                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2709                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2710                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2712                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2713                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2714                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2716                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2717                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2718                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2719                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2720                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2721                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2724                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2725                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2727                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2728                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2729                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2730                 }
2731
2732                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2734                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735                                 NUM_BANKS(ADDR_SURF_16_BANK));
2736                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2738                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2739                                 NUM_BANKS(ADDR_SURF_16_BANK));
2740                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2742                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2743                                 NUM_BANKS(ADDR_SURF_16_BANK));
2744                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747                                 NUM_BANKS(ADDR_SURF_16_BANK));
2748                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2750                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2751                                 NUM_BANKS(ADDR_SURF_16_BANK));
2752                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2754                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2755                                 NUM_BANKS(ADDR_SURF_8_BANK));
2756                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2759                                 NUM_BANKS(ADDR_SURF_4_BANK));
2760                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2761                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2762                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2763                                 NUM_BANKS(ADDR_SURF_16_BANK));
2764                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2765                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2766                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2767                                 NUM_BANKS(ADDR_SURF_16_BANK));
2768                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2770                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771                                 NUM_BANKS(ADDR_SURF_16_BANK));
2772                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2774                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2775                                 NUM_BANKS(ADDR_SURF_16_BANK));
2776                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2778                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2779                                 NUM_BANKS(ADDR_SURF_16_BANK));
2780                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783                                 NUM_BANKS(ADDR_SURF_8_BANK));
2784                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2785                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2786                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2787                                 NUM_BANKS(ADDR_SURF_4_BANK));
2788
2789                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2790                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2791                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2792                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2793                 break;
2794
2795         case 2:
2796                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798                            PIPE_CONFIG(ADDR_SURF_P2) |
2799                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2800                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2801                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2802                            PIPE_CONFIG(ADDR_SURF_P2) |
2803                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2804                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2805                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2806                            PIPE_CONFIG(ADDR_SURF_P2) |
2807                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2808                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810                            PIPE_CONFIG(ADDR_SURF_P2) |
2811                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2812                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2813                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2814                            PIPE_CONFIG(ADDR_SURF_P2) |
2815                            TILE_SPLIT(split_equal_to_row_size));
2816                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2817                            PIPE_CONFIG(ADDR_SURF_P2) |
2818                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2819                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2820                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2821                            PIPE_CONFIG(ADDR_SURF_P2) |
2822                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2823                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2824                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2825                            PIPE_CONFIG(ADDR_SURF_P2) |
2826                            TILE_SPLIT(split_equal_to_row_size));
2827                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2828                            PIPE_CONFIG(ADDR_SURF_P2);
2829                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2830                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2831                            PIPE_CONFIG(ADDR_SURF_P2));
2832                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2833                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2834                             PIPE_CONFIG(ADDR_SURF_P2) |
2835                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2836                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2837                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2838                             PIPE_CONFIG(ADDR_SURF_P2) |
2839                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2840                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2841                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2842                             PIPE_CONFIG(ADDR_SURF_P2) |
2843                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2845                             PIPE_CONFIG(ADDR_SURF_P2) |
2846                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2847                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2848                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2849                             PIPE_CONFIG(ADDR_SURF_P2) |
2850                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2852                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2853                             PIPE_CONFIG(ADDR_SURF_P2) |
2854                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2856                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2857                             PIPE_CONFIG(ADDR_SURF_P2) |
2858                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2859                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2860                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2861                             PIPE_CONFIG(ADDR_SURF_P2));
2862                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2863                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2864                             PIPE_CONFIG(ADDR_SURF_P2) |
2865                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2866                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2867                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2868                             PIPE_CONFIG(ADDR_SURF_P2) |
2869                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2870                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2871                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2872                             PIPE_CONFIG(ADDR_SURF_P2) |
2873                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2874
2875                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2876                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2877                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2878                                 NUM_BANKS(ADDR_SURF_16_BANK));
2879                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2880                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2881                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2882                                 NUM_BANKS(ADDR_SURF_16_BANK));
2883                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2884                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2885                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2886                                 NUM_BANKS(ADDR_SURF_16_BANK));
2887                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2888                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2889                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2890                                 NUM_BANKS(ADDR_SURF_16_BANK));
2891                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2892                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2893                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2894                                 NUM_BANKS(ADDR_SURF_16_BANK));
2895                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2896                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2897                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2898                                 NUM_BANKS(ADDR_SURF_16_BANK));
2899                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2900                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2901                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2902                                 NUM_BANKS(ADDR_SURF_8_BANK));
2903                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2904                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2905                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2906                                 NUM_BANKS(ADDR_SURF_16_BANK));
2907                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2908                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2909                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2910                                 NUM_BANKS(ADDR_SURF_16_BANK));
2911                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2912                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2913                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2914                                 NUM_BANKS(ADDR_SURF_16_BANK));
2915                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2916                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2917                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2918                                 NUM_BANKS(ADDR_SURF_16_BANK));
2919                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2920                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2921                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2922                                 NUM_BANKS(ADDR_SURF_16_BANK));
2923                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2925                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2926                                 NUM_BANKS(ADDR_SURF_16_BANK));
2927                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2928                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2929                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2930                                 NUM_BANKS(ADDR_SURF_8_BANK));
2931
2932                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2933                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2934                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2935                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2936                 break;
2937
2938         default:
2939                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2940         }
2941 }
2942
2943 /**
2944  * cik_select_se_sh - select which SE, SH to address
2945  *
2946  * @rdev: radeon_device pointer
2947  * @se_num: shader engine to address
2948  * @sh_num: sh block to address
2949  *
2950  * Select which SE, SH combinations to address. Certain
2951  * registers are instanced per SE or SH.  0xffffffff means
2952  * broadcast to all SEs or SHs (CIK).
2953  */
2954 static void cik_select_se_sh(struct radeon_device *rdev,
2955                              u32 se_num, u32 sh_num)
2956 {
2957         u32 data = INSTANCE_BROADCAST_WRITES;
2958
2959         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2960                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2961         else if (se_num == 0xffffffff)
2962                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2963         else if (sh_num == 0xffffffff)
2964                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2965         else
2966                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2967         WREG32(GRBM_GFX_INDEX, data);
2968 }
2969
2970 /**
2971  * cik_create_bitmask - create a bitmask
2972  *
2973  * @bit_width: length of the mask
2974  *
2975  * create a variable length bit mask (CIK).
2976  * Returns the bitmask.
2977  */
2978 static u32 cik_create_bitmask(u32 bit_width)
2979 {
2980         u32 i, mask = 0;
2981
2982         for (i = 0; i < bit_width; i++) {
2983                 mask <<= 1;
2984                 mask |= 1;
2985         }
2986         return mask;
2987 }
2988
2989 /**
2990  * cik_get_rb_disabled - computes the mask of disabled RBs
2991  *
2992  * @rdev: radeon_device pointer
2993  * @max_rb_num: max RBs (render backends) for the asic
2994  * @se_num: number of SEs (shader engines) for the asic
2995  * @sh_per_se: number of SH blocks per SE for the asic
2996  *
2997  * Calculates the bitmask of disabled RBs (CIK).
2998  * Returns the disabled RB bitmask.
2999  */
3000 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3001                               u32 max_rb_num_per_se,
3002                               u32 sh_per_se)
3003 {
3004         u32 data, mask;
3005
3006         data = RREG32(CC_RB_BACKEND_DISABLE);
3007         if (data & 1)
3008                 data &= BACKEND_DISABLE_MASK;
3009         else
3010                 data = 0;
3011         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3012
3013         data >>= BACKEND_DISABLE_SHIFT;
3014
3015         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3016
3017         return data & mask;
3018 }
3019
3020 /**
3021  * cik_setup_rb - setup the RBs on the asic
3022  *
3023  * @rdev: radeon_device pointer
3024  * @se_num: number of SEs (shader engines) for the asic
3025  * @sh_per_se: number of SH blocks per SE for the asic
3026  * @max_rb_num: max RBs (render backends) for the asic
3027  *
3028  * Configures per-SE/SH RB registers (CIK).
3029  */
3030 static void cik_setup_rb(struct radeon_device *rdev,
3031                          u32 se_num, u32 sh_per_se,
3032                          u32 max_rb_num_per_se)
3033 {
3034         int i, j;
3035         u32 data, mask;
3036         u32 disabled_rbs = 0;
3037         u32 enabled_rbs = 0;
3038
3039         for (i = 0; i < se_num; i++) {
3040                 for (j = 0; j < sh_per_se; j++) {
3041                         cik_select_se_sh(rdev, i, j);
3042                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3043                         if (rdev->family == CHIP_HAWAII)
3044                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3045                         else
3046                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3047                 }
3048         }
3049         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3050
3051         mask = 1;
3052         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3053                 if (!(disabled_rbs & mask))
3054                         enabled_rbs |= mask;
3055                 mask <<= 1;
3056         }
3057
3058         rdev->config.cik.backend_enable_mask = enabled_rbs;
3059
3060         for (i = 0; i < se_num; i++) {
3061                 cik_select_se_sh(rdev, i, 0xffffffff);
3062                 data = 0;
3063                 for (j = 0; j < sh_per_se; j++) {
3064                         switch (enabled_rbs & 3) {
3065                         case 0:
3066                                 if (j == 0)
3067                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3068                                 else
3069                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3070                                 break;
3071                         case 1:
3072                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3073                                 break;
3074                         case 2:
3075                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3076                                 break;
3077                         case 3:
3078                         default:
3079                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3080                                 break;
3081                         }
3082                         enabled_rbs >>= 2;
3083                 }
3084                 WREG32(PA_SC_RASTER_CONFIG, data);
3085         }
3086         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3087 }
3088
3089 /**
3090  * cik_gpu_init - setup the 3D engine
3091  *
3092  * @rdev: radeon_device pointer
3093  *
3094  * Configures the 3D engine and tiling configuration
3095  * registers so that the 3D engine is usable.
3096  */
3097 static void cik_gpu_init(struct radeon_device *rdev)
3098 {
3099         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3100         u32 mc_shared_chmap, mc_arb_ramcfg;
3101         u32 hdp_host_path_cntl;
3102         u32 tmp;
3103         int i, j;
3104
3105         switch (rdev->family) {
3106         case CHIP_BONAIRE:
3107                 rdev->config.cik.max_shader_engines = 2;
3108                 rdev->config.cik.max_tile_pipes = 4;
3109                 rdev->config.cik.max_cu_per_sh = 7;
3110                 rdev->config.cik.max_sh_per_se = 1;
3111                 rdev->config.cik.max_backends_per_se = 2;
3112                 rdev->config.cik.max_texture_channel_caches = 4;
3113                 rdev->config.cik.max_gprs = 256;
3114                 rdev->config.cik.max_gs_threads = 32;
3115                 rdev->config.cik.max_hw_contexts = 8;
3116
3117                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3118                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3119                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3120                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3121                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3122                 break;
3123         case CHIP_HAWAII:
3124                 rdev->config.cik.max_shader_engines = 4;
3125                 rdev->config.cik.max_tile_pipes = 16;
3126                 rdev->config.cik.max_cu_per_sh = 11;
3127                 rdev->config.cik.max_sh_per_se = 1;
3128                 rdev->config.cik.max_backends_per_se = 4;
3129                 rdev->config.cik.max_texture_channel_caches = 16;
3130                 rdev->config.cik.max_gprs = 256;
3131                 rdev->config.cik.max_gs_threads = 32;
3132                 rdev->config.cik.max_hw_contexts = 8;
3133
3134                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3135                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3136                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3137                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3138                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3139                 break;
3140         case CHIP_KAVERI:
3141                 rdev->config.cik.max_shader_engines = 1;
3142                 rdev->config.cik.max_tile_pipes = 4;
3143                 rdev->config.cik.max_cu_per_sh = 8;
3144                 rdev->config.cik.max_backends_per_se = 2;
3145                 rdev->config.cik.max_sh_per_se = 1;
3146                 rdev->config.cik.max_texture_channel_caches = 4;
3147                 rdev->config.cik.max_gprs = 256;
3148                 rdev->config.cik.max_gs_threads = 16;
3149                 rdev->config.cik.max_hw_contexts = 8;
3150
3151                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3152                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3153                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3154                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3155                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3156                 break;
3157         case CHIP_KABINI:
3158         case CHIP_MULLINS:
3159         default:
3160                 rdev->config.cik.max_shader_engines = 1;
3161                 rdev->config.cik.max_tile_pipes = 2;
3162                 rdev->config.cik.max_cu_per_sh = 2;
3163                 rdev->config.cik.max_sh_per_se = 1;
3164                 rdev->config.cik.max_backends_per_se = 1;
3165                 rdev->config.cik.max_texture_channel_caches = 2;
3166                 rdev->config.cik.max_gprs = 256;
3167                 rdev->config.cik.max_gs_threads = 16;
3168                 rdev->config.cik.max_hw_contexts = 8;
3169
3170                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3171                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3172                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3173                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3174                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3175                 break;
3176         }
3177
3178         /* Initialize HDP */
3179         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3180                 WREG32((0x2c14 + j), 0x00000000);
3181                 WREG32((0x2c18 + j), 0x00000000);
3182                 WREG32((0x2c1c + j), 0x00000000);
3183                 WREG32((0x2c20 + j), 0x00000000);
3184                 WREG32((0x2c24 + j), 0x00000000);
3185         }
3186
3187         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3188         WREG32(SRBM_INT_CNTL, 0x1);
3189         WREG32(SRBM_INT_ACK, 0x1);
3190
3191         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3192
3193         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3194         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3195
3196         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3197         rdev->config.cik.mem_max_burst_length_bytes = 256;
3198         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3199         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3200         if (rdev->config.cik.mem_row_size_in_kb > 4)
3201                 rdev->config.cik.mem_row_size_in_kb = 4;
3202         /* XXX use MC settings? */
3203         rdev->config.cik.shader_engine_tile_size = 32;
3204         rdev->config.cik.num_gpus = 1;
3205         rdev->config.cik.multi_gpu_tile_size = 64;
3206
3207         /* fix up row size */
3208         gb_addr_config &= ~ROW_SIZE_MASK;
3209         switch (rdev->config.cik.mem_row_size_in_kb) {
3210         case 1:
3211         default:
3212                 gb_addr_config |= ROW_SIZE(0);
3213                 break;
3214         case 2:
3215                 gb_addr_config |= ROW_SIZE(1);
3216                 break;
3217         case 4:
3218                 gb_addr_config |= ROW_SIZE(2);
3219                 break;
3220         }
3221
3222         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3223          * not have bank info, so create a custom tiling dword.
3224          * bits 3:0   num_pipes
3225          * bits 7:4   num_banks
3226          * bits 11:8  group_size
3227          * bits 15:12 row_size
3228          */
3229         rdev->config.cik.tile_config = 0;
3230         switch (rdev->config.cik.num_tile_pipes) {
3231         case 1:
3232                 rdev->config.cik.tile_config |= (0 << 0);
3233                 break;
3234         case 2:
3235                 rdev->config.cik.tile_config |= (1 << 0);
3236                 break;
3237         case 4:
3238                 rdev->config.cik.tile_config |= (2 << 0);
3239                 break;
3240         case 8:
3241         default:
3242                 /* XXX what about 12? */
3243                 rdev->config.cik.tile_config |= (3 << 0);
3244                 break;
3245         }
3246         rdev->config.cik.tile_config |=
3247                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3248         rdev->config.cik.tile_config |=
3249                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3250         rdev->config.cik.tile_config |=
3251                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3252
3253         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3254         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3255         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3256         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3257         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3258         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3259         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3260         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3261
3262         cik_tiling_mode_table_init(rdev);
3263
3264         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3265                      rdev->config.cik.max_sh_per_se,
3266                      rdev->config.cik.max_backends_per_se);
3267
3268         rdev->config.cik.active_cus = 0;
3269         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3270                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3271                         rdev->config.cik.active_cus +=
3272                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3273                 }
3274         }
3275
3276         /* set HW defaults for 3D engine */
3277         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3278
3279         WREG32(SX_DEBUG_1, 0x20);
3280
3281         WREG32(TA_CNTL_AUX, 0x00010000);
3282
3283         tmp = RREG32(SPI_CONFIG_CNTL);
3284         tmp |= 0x03000000;
3285         WREG32(SPI_CONFIG_CNTL, tmp);
3286
3287         WREG32(SQ_CONFIG, 1);
3288
3289         WREG32(DB_DEBUG, 0);
3290
3291         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3292         tmp |= 0x00000400;
3293         WREG32(DB_DEBUG2, tmp);
3294
3295         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3296         tmp |= 0x00020200;
3297         WREG32(DB_DEBUG3, tmp);
3298
3299         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3300         tmp |= 0x00018208;
3301         WREG32(CB_HW_CONTROL, tmp);
3302
3303         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3304
3305         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3306                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3307                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3308                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3309
3310         WREG32(VGT_NUM_INSTANCES, 1);
3311
3312         WREG32(CP_PERFMON_CNTL, 0);
3313
3314         WREG32(SQ_CONFIG, 0);
3315
3316         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3317                                           FORCE_EOV_MAX_REZ_CNT(255)));
3318
3319         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3320                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3321
3322         WREG32(VGT_GS_VERTEX_REUSE, 16);
3323         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3324
3325         tmp = RREG32(HDP_MISC_CNTL);
3326         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3327         WREG32(HDP_MISC_CNTL, tmp);
3328
3329         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3330         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3331
3332         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3333         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3334
3335         udelay(50);
3336 }
3337
3338 /*
3339  * GPU scratch registers helpers function.
3340  */
3341 /**
3342  * cik_scratch_init - setup driver info for CP scratch regs
3343  *
3344  * @rdev: radeon_device pointer
3345  *
3346  * Set up the number and offset of the CP scratch registers.
3347  * NOTE: use of CP scratch registers is a legacy inferface and
3348  * is not used by default on newer asics (r6xx+).  On newer asics,
3349  * memory buffers are used for fences rather than scratch regs.
3350  */
3351 static void cik_scratch_init(struct radeon_device *rdev)
3352 {
3353         int i;
3354
3355         rdev->scratch.num_reg = 7;
3356         rdev->scratch.reg_base = SCRATCH_REG0;
3357         for (i = 0; i < rdev->scratch.num_reg; i++) {
3358                 rdev->scratch.free[i] = true;
3359                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3360         }
3361 }
3362
3363 /**
3364  * cik_ring_test - basic gfx ring test
3365  *
3366  * @rdev: radeon_device pointer
3367  * @ring: radeon_ring structure holding ring information
3368  *
3369  * Allocate a scratch register and write to it using the gfx ring (CIK).
3370  * Provides a basic gfx ring test to verify that the ring is working.
3371  * Used by cik_cp_gfx_resume();
3372  * Returns 0 on success, error on failure.
3373  */
3374 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3375 {
3376         uint32_t scratch;
3377         uint32_t tmp = 0;
3378         unsigned i;
3379         int r;
3380
3381         r = radeon_scratch_get(rdev, &scratch);
3382         if (r) {
3383                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3384                 return r;
3385         }
3386         WREG32(scratch, 0xCAFEDEAD);
3387         r = radeon_ring_lock(rdev, ring, 3);
3388         if (r) {
3389                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3390                 radeon_scratch_free(rdev, scratch);
3391                 return r;
3392         }
3393         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3394         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3395         radeon_ring_write(ring, 0xDEADBEEF);
3396         radeon_ring_unlock_commit(rdev, ring, false);
3397
3398         for (i = 0; i < rdev->usec_timeout; i++) {
3399                 tmp = RREG32(scratch);
3400                 if (tmp == 0xDEADBEEF)
3401                         break;
3402                 DRM_UDELAY(1);
3403         }
3404         if (i < rdev->usec_timeout) {
3405                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3406         } else {
3407                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3408                           ring->idx, scratch, tmp);
3409                 r = -EINVAL;
3410         }
3411         radeon_scratch_free(rdev, scratch);
3412         return r;
3413 }
3414
3415 /**
3416  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3417  *
3418  * @rdev: radeon_device pointer
3419  * @ridx: radeon ring index
3420  *
3421  * Emits an hdp flush on the cp.
3422  */
3423 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3424                                        int ridx)
3425 {
3426         struct radeon_ring *ring = &rdev->ring[ridx];
3427         u32 ref_and_mask;
3428
3429         switch (ring->idx) {
3430         case CAYMAN_RING_TYPE_CP1_INDEX:
3431         case CAYMAN_RING_TYPE_CP2_INDEX:
3432         default:
3433                 switch (ring->me) {
3434                 case 0:
3435                         ref_and_mask = CP2 << ring->pipe;
3436                         break;
3437                 case 1:
3438                         ref_and_mask = CP6 << ring->pipe;
3439                         break;
3440                 default:
3441                         return;
3442                 }
3443                 break;
3444         case RADEON_RING_TYPE_GFX_INDEX:
3445                 ref_and_mask = CP0;
3446                 break;
3447         }
3448
3449         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3450         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3451                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3452                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3453         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3454         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3455         radeon_ring_write(ring, ref_and_mask);
3456         radeon_ring_write(ring, ref_and_mask);
3457         radeon_ring_write(ring, 0x20); /* poll interval */
3458 }
3459
3460 /**
3461  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3462  *
3463  * @rdev: radeon_device pointer
3464  * @fence: radeon fence object
3465  *
3466  * Emits a fence sequnce number on the gfx ring and flushes
3467  * GPU caches.
3468  */
3469 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3470                              struct radeon_fence *fence)
3471 {
3472         struct radeon_ring *ring = &rdev->ring[fence->ring];
3473         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3474
3475         /* Workaround for cache flush problems. First send a dummy EOP
3476          * event down the pipe with seq one below.
3477          */
3478         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3479         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3480                                  EOP_TC_ACTION_EN |
3481                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3482                                  EVENT_INDEX(5)));
3483         radeon_ring_write(ring, addr & 0xfffffffc);
3484         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3485                                 DATA_SEL(1) | INT_SEL(0));
3486         radeon_ring_write(ring, fence->seq - 1);
3487         radeon_ring_write(ring, 0);
3488
3489         /* Then send the real EOP event down the pipe. */
3490         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3491         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3492                                  EOP_TC_ACTION_EN |
3493                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3494                                  EVENT_INDEX(5)));
3495         radeon_ring_write(ring, addr & 0xfffffffc);
3496         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3497         radeon_ring_write(ring, fence->seq);
3498         radeon_ring_write(ring, 0);
3499 }
3500
3501 /**
3502  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3503  *
3504  * @rdev: radeon_device pointer
3505  * @fence: radeon fence object
3506  *
3507  * Emits a fence sequnce number on the compute ring and flushes
3508  * GPU caches.
3509  */
3510 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3511                                  struct radeon_fence *fence)
3512 {
3513         struct radeon_ring *ring = &rdev->ring[fence->ring];
3514         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3515
3516         /* RELEASE_MEM - flush caches, send int */
3517         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3518         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3519                                  EOP_TC_ACTION_EN |
3520                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3521                                  EVENT_INDEX(5)));
3522         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3523         radeon_ring_write(ring, addr & 0xfffffffc);
3524         radeon_ring_write(ring, upper_32_bits(addr));
3525         radeon_ring_write(ring, fence->seq);
3526         radeon_ring_write(ring, 0);
3527 }
3528
3529 /**
3530  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3531  *
3532  * @rdev: radeon_device pointer
3533  * @ring: radeon ring buffer object
3534  * @semaphore: radeon semaphore object
3535  * @emit_wait: Is this a sempahore wait?
3536  *
3537  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3538  * from running ahead of semaphore waits.
3539  */
3540 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3541                              struct radeon_ring *ring,
3542                              struct radeon_semaphore *semaphore,
3543                              bool emit_wait)
3544 {
3545         uint64_t addr = semaphore->gpu_addr;
3546         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3547
3548         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3549         radeon_ring_write(ring, lower_32_bits(addr));
3550         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3551
3552         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3553                 /* Prevent the PFP from running ahead of the semaphore wait */
3554                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3555                 radeon_ring_write(ring, 0x0);
3556         }
3557
3558         return true;
3559 }
3560
3561 /**
3562  * cik_copy_cpdma - copy pages using the CP DMA engine
3563  *
3564  * @rdev: radeon_device pointer
3565  * @src_offset: src GPU address
3566  * @dst_offset: dst GPU address
3567  * @num_gpu_pages: number of GPU pages to xfer
3568  * @resv: reservation object to sync to
3569  *
3570  * Copy GPU paging using the CP DMA engine (CIK+).
3571  * Used by the radeon ttm implementation to move pages if
3572  * registered as the asic copy callback.
3573  */
3574 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3575                                     uint64_t src_offset, uint64_t dst_offset,
3576                                     unsigned num_gpu_pages,
3577                                     struct reservation_object *resv)
3578 {
3579         struct radeon_fence *fence;
3580         struct radeon_sync sync;
3581         int ring_index = rdev->asic->copy.blit_ring_index;
3582         struct radeon_ring *ring = &rdev->ring[ring_index];
3583         u32 size_in_bytes, cur_size_in_bytes, control;
3584         int i, num_loops;
3585         int r = 0;
3586
3587         radeon_sync_create(&sync);
3588
3589         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3590         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3591         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3592         if (r) {
3593                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3594                 radeon_sync_free(rdev, &sync, NULL);
3595                 return ERR_PTR(r);
3596         }
3597
3598         radeon_sync_resv(rdev, &sync, resv, false);
3599         radeon_sync_rings(rdev, &sync, ring->idx);
3600
3601         for (i = 0; i < num_loops; i++) {
3602                 cur_size_in_bytes = size_in_bytes;
3603                 if (cur_size_in_bytes > 0x1fffff)
3604                         cur_size_in_bytes = 0x1fffff;
3605                 size_in_bytes -= cur_size_in_bytes;
3606                 control = 0;
3607                 if (size_in_bytes == 0)
3608                         control |= PACKET3_DMA_DATA_CP_SYNC;
3609                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3610                 radeon_ring_write(ring, control);
3611                 radeon_ring_write(ring, lower_32_bits(src_offset));
3612                 radeon_ring_write(ring, upper_32_bits(src_offset));
3613                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3614                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3615                 radeon_ring_write(ring, cur_size_in_bytes);
3616                 src_offset += cur_size_in_bytes;
3617                 dst_offset += cur_size_in_bytes;
3618         }
3619
3620         r = radeon_fence_emit(rdev, &fence, ring->idx);
3621         if (r) {
3622                 radeon_ring_unlock_undo(rdev, ring);
3623                 radeon_sync_free(rdev, &sync, NULL);
3624                 return ERR_PTR(r);
3625         }
3626
3627         radeon_ring_unlock_commit(rdev, ring, false);
3628         radeon_sync_free(rdev, &sync, fence);
3629
3630         return fence;
3631 }
3632
3633 /*
3634  * IB stuff
3635  */
3636 /**
3637  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3638  *
3639  * @rdev: radeon_device pointer
3640  * @ib: radeon indirect buffer object
3641  *
3642  * Emits a DE (drawing engine) or CE (constant engine) IB
3643  * on the gfx ring.  IBs are usually generated by userspace
3644  * acceleration drivers and submitted to the kernel for
3645  * scheduling on the ring.  This function schedules the IB
3646  * on the gfx ring for execution by the GPU.
3647  */
3648 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3649 {
3650         struct radeon_ring *ring = &rdev->ring[ib->ring];
3651         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3652         u32 header, control = INDIRECT_BUFFER_VALID;
3653
3654         if (ib->is_const_ib) {
3655                 /* set switch buffer packet before const IB */
3656                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3657                 radeon_ring_write(ring, 0);
3658
3659                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3660         } else {
3661                 u32 next_rptr;
3662                 if (ring->rptr_save_reg) {
3663                         next_rptr = ring->wptr + 3 + 4;
3664                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3665                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3666                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3667                         radeon_ring_write(ring, next_rptr);
3668                 } else if (rdev->wb.enabled) {
3669                         next_rptr = ring->wptr + 5 + 4;
3670                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3671                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3672                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3673                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3674                         radeon_ring_write(ring, next_rptr);
3675                 }
3676
3677                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3678         }
3679
3680         control |= ib->length_dw | (vm_id << 24);
3681
3682         radeon_ring_write(ring, header);
3683         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3684         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3685         radeon_ring_write(ring, control);
3686 }
3687
3688 /**
3689  * cik_ib_test - basic gfx ring IB test
3690  *
3691  * @rdev: radeon_device pointer
3692  * @ring: radeon_ring structure holding ring information
3693  *
3694  * Allocate an IB and execute it on the gfx ring (CIK).
3695  * Provides a basic gfx ring test to verify that IBs are working.
3696  * Returns 0 on success, error on failure.
3697  */
3698 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3699 {
3700         struct radeon_ib ib;
3701         uint32_t scratch;
3702         uint32_t tmp = 0;
3703         unsigned i;
3704         int r;
3705
3706         r = radeon_scratch_get(rdev, &scratch);
3707         if (r) {
3708                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3709                 return r;
3710         }
3711         WREG32(scratch, 0xCAFEDEAD);
3712         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3713         if (r) {
3714                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3715                 radeon_scratch_free(rdev, scratch);
3716                 return r;
3717         }
3718         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3719         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3720         ib.ptr[2] = 0xDEADBEEF;
3721         ib.length_dw = 3;
3722         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3723         if (r) {
3724                 radeon_scratch_free(rdev, scratch);
3725                 radeon_ib_free(rdev, &ib);
3726                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3727                 return r;
3728         }
3729         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3730                 RADEON_USEC_IB_TEST_TIMEOUT));
3731         if (r < 0) {
3732                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3733                 radeon_scratch_free(rdev, scratch);
3734                 radeon_ib_free(rdev, &ib);
3735                 return r;
3736         } else if (r == 0) {
3737                 DRM_ERROR("radeon: fence wait timed out.\n");
3738                 radeon_scratch_free(rdev, scratch);
3739                 radeon_ib_free(rdev, &ib);
3740                 return -ETIMEDOUT;
3741         }
3742         r = 0;
3743         for (i = 0; i < rdev->usec_timeout; i++) {
3744                 tmp = RREG32(scratch);
3745                 if (tmp == 0xDEADBEEF)
3746                         break;
3747                 DRM_UDELAY(1);
3748         }
3749         if (i < rdev->usec_timeout) {
3750                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3751         } else {
3752                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3753                           scratch, tmp);
3754                 r = -EINVAL;
3755         }
3756         radeon_scratch_free(rdev, scratch);
3757         radeon_ib_free(rdev, &ib);
3758         return r;
3759 }
3760
3761 /*
3762  * CP.
3763  * On CIK, gfx and compute now have independant command processors.
3764  *
3765  * GFX
3766  * Gfx consists of a single ring and can process both gfx jobs and
3767  * compute jobs.  The gfx CP consists of three microengines (ME):
3768  * PFP - Pre-Fetch Parser
3769  * ME - Micro Engine
3770  * CE - Constant Engine
3771  * The PFP and ME make up what is considered the Drawing Engine (DE).
3772  * The CE is an asynchronous engine used for updating buffer desciptors
3773  * used by the DE so that they can be loaded into cache in parallel
3774  * while the DE is processing state update packets.
3775  *
3776  * Compute
3777  * The compute CP consists of two microengines (ME):
3778  * MEC1 - Compute MicroEngine 1
3779  * MEC2 - Compute MicroEngine 2
3780  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3781  * The queues are exposed to userspace and are programmed directly
3782  * by the compute runtime.
3783  */
3784 /**
3785  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3786  *
3787  * @rdev: radeon_device pointer
3788  * @enable: enable or disable the MEs
3789  *
3790  * Halts or unhalts the gfx MEs.
3791  */
3792 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3793 {
3794         if (enable)
3795                 WREG32(CP_ME_CNTL, 0);
3796         else {
3797                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3798                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3799                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3800                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3801         }
3802         udelay(50);
3803 }
3804
3805 /**
3806  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3807  *
3808  * @rdev: radeon_device pointer
3809  *
3810  * Loads the gfx PFP, ME, and CE ucode.
3811  * Returns 0 for success, -EINVAL if the ucode is not available.
3812  */
3813 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3814 {
3815         int i;
3816
3817         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3818                 return -EINVAL;
3819
3820         cik_cp_gfx_enable(rdev, false);
3821
3822         if (rdev->new_fw) {
3823                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3824                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3825                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3826                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3827                 const struct gfx_firmware_header_v1_0 *me_hdr =
3828                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3829                 const __le32 *fw_data;
3830                 u32 fw_size;
3831
3832                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3833                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3834                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3835
3836                 /* PFP */
3837                 fw_data = (const __le32 *)
3838                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3839                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3840                 WREG32(CP_PFP_UCODE_ADDR, 0);
3841                 for (i = 0; i < fw_size; i++)
3842                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3843                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3844
3845                 /* CE */
3846                 fw_data = (const __le32 *)
3847                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3848                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3849                 WREG32(CP_CE_UCODE_ADDR, 0);
3850                 for (i = 0; i < fw_size; i++)
3851                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3852                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3853
3854                 /* ME */
3855                 fw_data = (const __be32 *)
3856                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3857                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3858                 WREG32(CP_ME_RAM_WADDR, 0);
3859                 for (i = 0; i < fw_size; i++)
3860                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3861                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3862                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3863         } else {
3864                 const __be32 *fw_data;
3865
3866                 /* PFP */
3867                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3868                 WREG32(CP_PFP_UCODE_ADDR, 0);
3869                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3870                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3871                 WREG32(CP_PFP_UCODE_ADDR, 0);
3872
3873                 /* CE */
3874                 fw_data = (const __be32 *)rdev->ce_fw->data;
3875                 WREG32(CP_CE_UCODE_ADDR, 0);
3876                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3877                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3878                 WREG32(CP_CE_UCODE_ADDR, 0);
3879
3880                 /* ME */
3881                 fw_data = (const __be32 *)rdev->me_fw->data;
3882                 WREG32(CP_ME_RAM_WADDR, 0);
3883                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3884                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3885                 WREG32(CP_ME_RAM_WADDR, 0);
3886         }
3887
3888         return 0;
3889 }
3890
3891 /**
3892  * cik_cp_gfx_start - start the gfx ring
3893  *
3894  * @rdev: radeon_device pointer
3895  *
3896  * Enables the ring and loads the clear state context and other
3897  * packets required to init the ring.
3898  * Returns 0 for success, error for failure.
3899  */
3900 static int cik_cp_gfx_start(struct radeon_device *rdev)
3901 {
3902         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3903         int r, i;
3904
3905         /* init the CP */
3906         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3907         WREG32(CP_ENDIAN_SWAP, 0);
3908         WREG32(CP_DEVICE_ID, 1);
3909
3910         cik_cp_gfx_enable(rdev, true);
3911
3912         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3913         if (r) {
3914                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3915                 return r;
3916         }
3917
3918         /* init the CE partitions.  CE only used for gfx on CIK */
3919         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3920         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3921         radeon_ring_write(ring, 0x8000);
3922         radeon_ring_write(ring, 0x8000);
3923
3924         /* setup clear context state */
3925         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3926         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3927
3928         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3929         radeon_ring_write(ring, 0x80000000);
3930         radeon_ring_write(ring, 0x80000000);
3931
3932         for (i = 0; i < cik_default_size; i++)
3933                 radeon_ring_write(ring, cik_default_state[i]);
3934
3935         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3936         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3937
3938         /* set clear context state */
3939         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3940         radeon_ring_write(ring, 0);
3941
3942         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3943         radeon_ring_write(ring, 0x00000316);
3944         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3945         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3946
3947         radeon_ring_unlock_commit(rdev, ring, false);
3948
3949         return 0;
3950 }
3951
3952 /**
3953  * cik_cp_gfx_fini - stop the gfx ring
3954  *
3955  * @rdev: radeon_device pointer
3956  *
3957  * Stop the gfx ring and tear down the driver ring
3958  * info.
3959  */
3960 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3961 {
3962         cik_cp_gfx_enable(rdev, false);
3963         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3964 }
3965
3966 /**
3967  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3968  *
3969  * @rdev: radeon_device pointer
3970  *
3971  * Program the location and size of the gfx ring buffer
3972  * and test it to make sure it's working.
3973  * Returns 0 for success, error for failure.
3974  */
3975 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3976 {
3977         struct radeon_ring *ring;
3978         u32 tmp;
3979         u32 rb_bufsz;
3980         u64 rb_addr;
3981         int r;
3982
3983         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3984         if (rdev->family != CHIP_HAWAII)
3985                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3986
3987         /* Set the write pointer delay */
3988         WREG32(CP_RB_WPTR_DELAY, 0);
3989
3990         /* set the RB to use vmid 0 */
3991         WREG32(CP_RB_VMID, 0);
3992
3993         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3994
3995         /* ring 0 - compute and gfx */
3996         /* Set ring buffer size */
3997         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3998         rb_bufsz = order_base_2(ring->ring_size / 8);
3999         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4000 #ifdef __BIG_ENDIAN
4001         tmp |= BUF_SWAP_32BIT;
4002 #endif
4003         WREG32(CP_RB0_CNTL, tmp);
4004
4005         /* Initialize the ring buffer's read and write pointers */
4006         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4007         ring->wptr = 0;
4008         WREG32(CP_RB0_WPTR, ring->wptr);
4009
4010         /* set the wb address wether it's enabled or not */
4011         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4012         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4013
4014         /* scratch register shadowing is no longer supported */
4015         WREG32(SCRATCH_UMSK, 0);
4016
4017         if (!rdev->wb.enabled)
4018                 tmp |= RB_NO_UPDATE;
4019
4020         mdelay(1);
4021         WREG32(CP_RB0_CNTL, tmp);
4022
4023         rb_addr = ring->gpu_addr >> 8;
4024         WREG32(CP_RB0_BASE, rb_addr);
4025         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4026
4027         /* start the ring */
4028         cik_cp_gfx_start(rdev);
4029         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4030         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4031         if (r) {
4032                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4033                 return r;
4034         }
4035
4036         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4037                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4038
4039         return 0;
4040 }
4041
4042 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4043                      struct radeon_ring *ring)
4044 {
4045         u32 rptr;
4046
4047         if (rdev->wb.enabled)
4048                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4049         else
4050                 rptr = RREG32(CP_RB0_RPTR);
4051
4052         return rptr;
4053 }
4054
4055 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4056                      struct radeon_ring *ring)
4057 {
4058         return RREG32(CP_RB0_WPTR);
4059 }
4060
4061 void cik_gfx_set_wptr(struct radeon_device *rdev,
4062                       struct radeon_ring *ring)
4063 {
4064         WREG32(CP_RB0_WPTR, ring->wptr);
4065         (void)RREG32(CP_RB0_WPTR);
4066 }
4067
4068 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4069                          struct radeon_ring *ring)
4070 {
4071         u32 rptr;
4072
4073         if (rdev->wb.enabled) {
4074                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4075         } else {
4076                 mutex_lock(&rdev->srbm_mutex);
4077                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4078                 rptr = RREG32(CP_HQD_PQ_RPTR);
4079                 cik_srbm_select(rdev, 0, 0, 0, 0);
4080                 mutex_unlock(&rdev->srbm_mutex);
4081         }
4082
4083         return rptr;
4084 }
4085
4086 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4087                          struct radeon_ring *ring)
4088 {
4089         u32 wptr;
4090
4091         if (rdev->wb.enabled) {
4092                 /* XXX check if swapping is necessary on BE */
4093                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4094         } else {
4095                 mutex_lock(&rdev->srbm_mutex);
4096                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4097                 wptr = RREG32(CP_HQD_PQ_WPTR);
4098                 cik_srbm_select(rdev, 0, 0, 0, 0);
4099                 mutex_unlock(&rdev->srbm_mutex);
4100         }
4101
4102         return wptr;
4103 }
4104
4105 void cik_compute_set_wptr(struct radeon_device *rdev,
4106                           struct radeon_ring *ring)
4107 {
4108         /* XXX check if swapping is necessary on BE */
4109         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4110         WDOORBELL32(ring->doorbell_index, ring->wptr);
4111 }
4112
4113 static void cik_compute_stop(struct radeon_device *rdev,
4114                              struct radeon_ring *ring)
4115 {
4116         u32 j, tmp;
4117
4118         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4119         /* Disable wptr polling. */
4120         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4121         tmp &= ~WPTR_POLL_EN;
4122         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4123         /* Disable HQD. */
4124         if (RREG32(CP_HQD_ACTIVE) & 1) {
4125                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4126                 for (j = 0; j < rdev->usec_timeout; j++) {
4127                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4128                                 break;
4129                         udelay(1);
4130                 }
4131                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4132                 WREG32(CP_HQD_PQ_RPTR, 0);
4133                 WREG32(CP_HQD_PQ_WPTR, 0);
4134         }
4135         cik_srbm_select(rdev, 0, 0, 0, 0);
4136 }
4137
4138 /**
4139  * cik_cp_compute_enable - enable/disable the compute CP MEs
4140  *
4141  * @rdev: radeon_device pointer
4142  * @enable: enable or disable the MEs
4143  *
4144  * Halts or unhalts the compute MEs.
4145  */
4146 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4147 {
4148         if (enable)
4149                 WREG32(CP_MEC_CNTL, 0);
4150         else {
4151                 /*
4152                  * To make hibernation reliable we need to clear compute ring
4153                  * configuration before halting the compute ring.
4154                  */
4155                 mutex_lock(&rdev->srbm_mutex);
4156                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4157                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4158                 mutex_unlock(&rdev->srbm_mutex);
4159
4160                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4161                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4162                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4163         }
4164         udelay(50);
4165 }
4166
4167 /**
4168  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4169  *
4170  * @rdev: radeon_device pointer
4171  *
4172  * Loads the compute MEC1&2 ucode.
4173  * Returns 0 for success, -EINVAL if the ucode is not available.
4174  */
4175 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4176 {
4177         int i;
4178
4179         if (!rdev->mec_fw)
4180                 return -EINVAL;
4181
4182         cik_cp_compute_enable(rdev, false);
4183
4184         if (rdev->new_fw) {
4185                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4186                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4187                 const __le32 *fw_data;
4188                 u32 fw_size;
4189
4190                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4191
4192                 /* MEC1 */
4193                 fw_data = (const __le32 *)
4194                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4195                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4196                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4197                 for (i = 0; i < fw_size; i++)
4198                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4199                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4200
4201                 /* MEC2 */
4202                 if (rdev->family == CHIP_KAVERI) {
4203                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4204                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4205
4206                         fw_data = (const __le32 *)
4207                                 (rdev->mec2_fw->data +
4208                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4209                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4210                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4211                         for (i = 0; i < fw_size; i++)
4212                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4213                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4214                 }
4215         } else {
4216                 const __be32 *fw_data;
4217
4218                 /* MEC1 */
4219                 fw_data = (const __be32 *)rdev->mec_fw->data;
4220                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4221                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4222                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4223                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4224
4225                 if (rdev->family == CHIP_KAVERI) {
4226                         /* MEC2 */
4227                         fw_data = (const __be32 *)rdev->mec_fw->data;
4228                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4229                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4230                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4231                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4232                 }
4233         }
4234
4235         return 0;
4236 }
4237
4238 /**
4239  * cik_cp_compute_start - start the compute queues
4240  *
4241  * @rdev: radeon_device pointer
4242  *
4243  * Enable the compute queues.
4244  * Returns 0 for success, error for failure.
4245  */
4246 static int cik_cp_compute_start(struct radeon_device *rdev)
4247 {
4248         cik_cp_compute_enable(rdev, true);
4249
4250         return 0;
4251 }
4252
4253 /**
4254  * cik_cp_compute_fini - stop the compute queues
4255  *
4256  * @rdev: radeon_device pointer
4257  *
4258  * Stop the compute queues and tear down the driver queue
4259  * info.
4260  */
4261 static void cik_cp_compute_fini(struct radeon_device *rdev)
4262 {
4263         int i, idx, r;
4264
4265         cik_cp_compute_enable(rdev, false);
4266
4267         for (i = 0; i < 2; i++) {
4268                 if (i == 0)
4269                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4270                 else
4271                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4272
4273                 if (rdev->ring[idx].mqd_obj) {
4274                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4275                         if (unlikely(r != 0))
4276                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4277
4278                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4279                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4280
4281                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4282                         rdev->ring[idx].mqd_obj = NULL;
4283                 }
4284         }
4285 }
4286
4287 static void cik_mec_fini(struct radeon_device *rdev)
4288 {
4289         int r;
4290
4291         if (rdev->mec.hpd_eop_obj) {
4292                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4293                 if (unlikely(r != 0))
4294                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4295                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4296                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4297
4298                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4299                 rdev->mec.hpd_eop_obj = NULL;
4300         }
4301 }
4302
4303 #define MEC_HPD_SIZE 2048
4304
4305 static int cik_mec_init(struct radeon_device *rdev)
4306 {
4307         int r;
4308         u32 *hpd;
4309
4310         /*
4311          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4312          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4313          */
4314         if (rdev->family == CHIP_KAVERI)
4315                 rdev->mec.num_mec = 2;
4316         else
4317                 rdev->mec.num_mec = 1;
4318         rdev->mec.num_pipe = 4;
4319         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4320
4321         if (rdev->mec.hpd_eop_obj == NULL) {
4322                 r = radeon_bo_create(rdev,
4323                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4324                                      PAGE_SIZE, true,
4325                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4326                                      &rdev->mec.hpd_eop_obj);
4327                 if (r) {
4328                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4329                         return r;
4330                 }
4331         }
4332
4333         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4334         if (unlikely(r != 0)) {
4335                 cik_mec_fini(rdev);
4336                 return r;
4337         }
4338         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4339                           &rdev->mec.hpd_eop_gpu_addr);
4340         if (r) {
4341                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4342                 cik_mec_fini(rdev);
4343                 return r;
4344         }
4345         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4346         if (r) {
4347                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4348                 cik_mec_fini(rdev);
4349                 return r;
4350         }
4351
4352         /* clear memory.  Not sure if this is required or not */
4353         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4354
4355         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4356         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4357
4358         return 0;
4359 }
4360
4361 struct hqd_registers
4362 {
4363         u32 cp_mqd_base_addr;
4364         u32 cp_mqd_base_addr_hi;
4365         u32 cp_hqd_active;
4366         u32 cp_hqd_vmid;
4367         u32 cp_hqd_persistent_state;
4368         u32 cp_hqd_pipe_priority;
4369         u32 cp_hqd_queue_priority;
4370         u32 cp_hqd_quantum;
4371         u32 cp_hqd_pq_base;
4372         u32 cp_hqd_pq_base_hi;
4373         u32 cp_hqd_pq_rptr;
4374         u32 cp_hqd_pq_rptr_report_addr;
4375         u32 cp_hqd_pq_rptr_report_addr_hi;
4376         u32 cp_hqd_pq_wptr_poll_addr;
4377         u32 cp_hqd_pq_wptr_poll_addr_hi;
4378         u32 cp_hqd_pq_doorbell_control;
4379         u32 cp_hqd_pq_wptr;
4380         u32 cp_hqd_pq_control;
4381         u32 cp_hqd_ib_base_addr;
4382         u32 cp_hqd_ib_base_addr_hi;
4383         u32 cp_hqd_ib_rptr;
4384         u32 cp_hqd_ib_control;
4385         u32 cp_hqd_iq_timer;
4386         u32 cp_hqd_iq_rptr;
4387         u32 cp_hqd_dequeue_request;
4388         u32 cp_hqd_dma_offload;
4389         u32 cp_hqd_sema_cmd;
4390         u32 cp_hqd_msg_type;
4391         u32 cp_hqd_atomic0_preop_lo;
4392         u32 cp_hqd_atomic0_preop_hi;
4393         u32 cp_hqd_atomic1_preop_lo;
4394         u32 cp_hqd_atomic1_preop_hi;
4395         u32 cp_hqd_hq_scheduler0;
4396         u32 cp_hqd_hq_scheduler1;
4397         u32 cp_mqd_control;
4398 };
4399
4400 struct bonaire_mqd
4401 {
4402         u32 header;
4403         u32 dispatch_initiator;
4404         u32 dimensions[3];
4405         u32 start_idx[3];
4406         u32 num_threads[3];
4407         u32 pipeline_stat_enable;
4408         u32 perf_counter_enable;
4409         u32 pgm[2];
4410         u32 tba[2];
4411         u32 tma[2];
4412         u32 pgm_rsrc[2];
4413         u32 vmid;
4414         u32 resource_limits;
4415         u32 static_thread_mgmt01[2];
4416         u32 tmp_ring_size;
4417         u32 static_thread_mgmt23[2];
4418         u32 restart[3];
4419         u32 thread_trace_enable;
4420         u32 reserved1;
4421         u32 user_data[16];
4422         u32 vgtcs_invoke_count[2];
4423         struct hqd_registers queue_state;
4424         u32 dequeue_cntr;
4425         u32 interrupt_queue[64];
4426 };
4427
4428 /**
4429  * cik_cp_compute_resume - setup the compute queue registers
4430  *
4431  * @rdev: radeon_device pointer
4432  *
4433  * Program the compute queues and test them to make sure they
4434  * are working.
4435  * Returns 0 for success, error for failure.
4436  */
4437 static int cik_cp_compute_resume(struct radeon_device *rdev)
4438 {
4439         int r, i, j, idx;
4440         u32 tmp;
4441         bool use_doorbell = true;
4442         u64 hqd_gpu_addr;
4443         u64 mqd_gpu_addr;
4444         u64 eop_gpu_addr;
4445         u64 wb_gpu_addr;
4446         u32 *buf;
4447         struct bonaire_mqd *mqd;
4448
4449         r = cik_cp_compute_start(rdev);
4450         if (r)
4451                 return r;
4452
4453         /* fix up chicken bits */
4454         tmp = RREG32(CP_CPF_DEBUG);
4455         tmp |= (1 << 23);
4456         WREG32(CP_CPF_DEBUG, tmp);
4457
4458         /* init the pipes */
4459         mutex_lock(&rdev->srbm_mutex);
4460
4461         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4462                 int me = (i < 4) ? 1 : 2;
4463                 int pipe = (i < 4) ? i : (i - 4);
4464
4465                 cik_srbm_select(rdev, me, pipe, 0, 0);
4466
4467                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4468                 /* write the EOP addr */
4469                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4470                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4471
4472                 /* set the VMID assigned */
4473                 WREG32(CP_HPD_EOP_VMID, 0);
4474
4475                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4476                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4477                 tmp &= ~EOP_SIZE_MASK;
4478                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4479                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4480
4481         }
4482         cik_srbm_select(rdev, 0, 0, 0, 0);
4483         mutex_unlock(&rdev->srbm_mutex);
4484
4485         /* init the queues.  Just two for now. */
4486         for (i = 0; i < 2; i++) {
4487                 if (i == 0)
4488                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4489                 else
4490                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4491
4492                 if (rdev->ring[idx].mqd_obj == NULL) {
4493                         r = radeon_bo_create(rdev,
4494                                              sizeof(struct bonaire_mqd),
4495                                              PAGE_SIZE, true,
4496                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4497                                              NULL, &rdev->ring[idx].mqd_obj);
4498                         if (r) {
4499                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4500                                 return r;
4501                         }
4502                 }
4503
4504                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4505                 if (unlikely(r != 0)) {
4506                         cik_cp_compute_fini(rdev);
4507                         return r;
4508                 }
4509                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4510                                   &mqd_gpu_addr);
4511                 if (r) {
4512                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4513                         cik_cp_compute_fini(rdev);
4514                         return r;
4515                 }
4516                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4517                 if (r) {
4518                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4519                         cik_cp_compute_fini(rdev);
4520                         return r;
4521                 }
4522
4523                 /* init the mqd struct */
4524                 memset(buf, 0, sizeof(struct bonaire_mqd));
4525
4526                 mqd = (struct bonaire_mqd *)buf;
4527                 mqd->header = 0xC0310800;
4528                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4529                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4530                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4531                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4532
4533                 mutex_lock(&rdev->srbm_mutex);
4534                 cik_srbm_select(rdev, rdev->ring[idx].me,
4535                                 rdev->ring[idx].pipe,
4536                                 rdev->ring[idx].queue, 0);
4537
4538                 /* disable wptr polling */
4539                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4540                 tmp &= ~WPTR_POLL_EN;
4541                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4542
4543                 /* enable doorbell? */
4544                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4545                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4546                 if (use_doorbell)
4547                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4548                 else
4549                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4550                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4551                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4552
4553                 /* disable the queue if it's active */
4554                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4555                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4556                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4557                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4558                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4559                         for (j = 0; j < rdev->usec_timeout; j++) {
4560                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4561                                         break;
4562                                 udelay(1);
4563                         }
4564                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4565                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4566                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4567                 }
4568
4569                 /* set the pointer to the MQD */
4570                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4571                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4572                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4573                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4574                 /* set MQD vmid to 0 */
4575                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4576                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4577                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4578
4579                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4580                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4581                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4582                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4583                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4584                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4585
4586                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4587                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4588                 mqd->queue_state.cp_hqd_pq_control &=
4589                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4590
4591                 mqd->queue_state.cp_hqd_pq_control |=
4592                         order_base_2(rdev->ring[idx].ring_size / 8);
4593                 mqd->queue_state.cp_hqd_pq_control |=
4594                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4595 #ifdef __BIG_ENDIAN
4596                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4597 #endif
4598                 mqd->queue_state.cp_hqd_pq_control &=
4599                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4600                 mqd->queue_state.cp_hqd_pq_control |=
4601                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4602                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4603
4604                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4605                 if (i == 0)
4606                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4607                 else
4608                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4609                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4610                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4611                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4612                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4613                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4614
4615                 /* set the wb address wether it's enabled or not */
4616                 if (i == 0)
4617                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4618                 else
4619                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4620                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4621                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4622                         upper_32_bits(wb_gpu_addr) & 0xffff;
4623                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4624                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4625                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4626                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4627
4628                 /* enable the doorbell if requested */
4629                 if (use_doorbell) {
4630                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4631                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4632                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4633                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4634                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4635                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4636                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4637                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4638
4639                 } else {
4640                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4641                 }
4642                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4643                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4644
4645                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4646                 rdev->ring[idx].wptr = 0;
4647                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4648                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4649                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4650
4651                 /* set the vmid for the queue */
4652                 mqd->queue_state.cp_hqd_vmid = 0;
4653                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4654
4655                 /* activate the queue */
4656                 mqd->queue_state.cp_hqd_active = 1;
4657                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4658
4659                 cik_srbm_select(rdev, 0, 0, 0, 0);
4660                 mutex_unlock(&rdev->srbm_mutex);
4661
4662                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4663                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4664
4665                 rdev->ring[idx].ready = true;
4666                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4667                 if (r)
4668                         rdev->ring[idx].ready = false;
4669         }
4670
4671         return 0;
4672 }
4673
4674 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4675 {
4676         cik_cp_gfx_enable(rdev, enable);
4677         cik_cp_compute_enable(rdev, enable);
4678 }
4679
4680 static int cik_cp_load_microcode(struct radeon_device *rdev)
4681 {
4682         int r;
4683
4684         r = cik_cp_gfx_load_microcode(rdev);
4685         if (r)
4686                 return r;
4687         r = cik_cp_compute_load_microcode(rdev);
4688         if (r)
4689                 return r;
4690
4691         return 0;
4692 }
4693
4694 static void cik_cp_fini(struct radeon_device *rdev)
4695 {
4696         cik_cp_gfx_fini(rdev);
4697         cik_cp_compute_fini(rdev);
4698 }
4699
4700 static int cik_cp_resume(struct radeon_device *rdev)
4701 {
4702         int r;
4703
4704         cik_enable_gui_idle_interrupt(rdev, false);
4705
4706         r = cik_cp_load_microcode(rdev);
4707         if (r)
4708                 return r;
4709
4710         r = cik_cp_gfx_resume(rdev);
4711         if (r)
4712                 return r;
4713         r = cik_cp_compute_resume(rdev);
4714         if (r)
4715                 return r;
4716
4717         cik_enable_gui_idle_interrupt(rdev, true);
4718
4719         return 0;
4720 }
4721
4722 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4723 {
4724         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4725                 RREG32(GRBM_STATUS));
4726         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4727                 RREG32(GRBM_STATUS2));
4728         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4729                 RREG32(GRBM_STATUS_SE0));
4730         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4731                 RREG32(GRBM_STATUS_SE1));
4732         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4733                 RREG32(GRBM_STATUS_SE2));
4734         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4735                 RREG32(GRBM_STATUS_SE3));
4736         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4737                 RREG32(SRBM_STATUS));
4738         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4739                 RREG32(SRBM_STATUS2));
4740         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4741                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4742         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4743                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4744         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4745         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4746                  RREG32(CP_STALLED_STAT1));
4747         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4748                  RREG32(CP_STALLED_STAT2));
4749         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4750                  RREG32(CP_STALLED_STAT3));
4751         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4752                  RREG32(CP_CPF_BUSY_STAT));
4753         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4754                  RREG32(CP_CPF_STALLED_STAT1));
4755         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4756         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4757         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4758                  RREG32(CP_CPC_STALLED_STAT1));
4759         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4760 }
4761
4762 /**
4763  * cik_gpu_check_soft_reset - check which blocks are busy
4764  *
4765  * @rdev: radeon_device pointer
4766  *
4767  * Check which blocks are busy and return the relevant reset
4768  * mask to be used by cik_gpu_soft_reset().
4769  * Returns a mask of the blocks to be reset.
4770  */
4771 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4772 {
4773         u32 reset_mask = 0;
4774         u32 tmp;
4775
4776         /* GRBM_STATUS */
4777         tmp = RREG32(GRBM_STATUS);
4778         if (tmp & (PA_BUSY | SC_BUSY |
4779                    BCI_BUSY | SX_BUSY |
4780                    TA_BUSY | VGT_BUSY |
4781                    DB_BUSY | CB_BUSY |
4782                    GDS_BUSY | SPI_BUSY |
4783                    IA_BUSY | IA_BUSY_NO_DMA))
4784                 reset_mask |= RADEON_RESET_GFX;
4785
4786         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4787                 reset_mask |= RADEON_RESET_CP;
4788
4789         /* GRBM_STATUS2 */
4790         tmp = RREG32(GRBM_STATUS2);
4791         if (tmp & RLC_BUSY)
4792                 reset_mask |= RADEON_RESET_RLC;
4793
4794         /* SDMA0_STATUS_REG */
4795         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4796         if (!(tmp & SDMA_IDLE))
4797                 reset_mask |= RADEON_RESET_DMA;
4798
4799         /* SDMA1_STATUS_REG */
4800         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4801         if (!(tmp & SDMA_IDLE))
4802                 reset_mask |= RADEON_RESET_DMA1;
4803
4804         /* SRBM_STATUS2 */
4805         tmp = RREG32(SRBM_STATUS2);
4806         if (tmp & SDMA_BUSY)
4807                 reset_mask |= RADEON_RESET_DMA;
4808
4809         if (tmp & SDMA1_BUSY)
4810                 reset_mask |= RADEON_RESET_DMA1;
4811
4812         /* SRBM_STATUS */
4813         tmp = RREG32(SRBM_STATUS);
4814
4815         if (tmp & IH_BUSY)
4816                 reset_mask |= RADEON_RESET_IH;
4817
4818         if (tmp & SEM_BUSY)
4819                 reset_mask |= RADEON_RESET_SEM;
4820
4821         if (tmp & GRBM_RQ_PENDING)
4822                 reset_mask |= RADEON_RESET_GRBM;
4823
4824         if (tmp & VMC_BUSY)
4825                 reset_mask |= RADEON_RESET_VMC;
4826
4827         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4828                    MCC_BUSY | MCD_BUSY))
4829                 reset_mask |= RADEON_RESET_MC;
4830
4831         if (evergreen_is_display_hung(rdev))
4832                 reset_mask |= RADEON_RESET_DISPLAY;
4833
4834         /* Skip MC reset as it's mostly likely not hung, just busy */
4835         if (reset_mask & RADEON_RESET_MC) {
4836                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4837                 reset_mask &= ~RADEON_RESET_MC;
4838         }
4839
4840         return reset_mask;
4841 }
4842
4843 /**
4844  * cik_gpu_soft_reset - soft reset GPU
4845  *
4846  * @rdev: radeon_device pointer
4847  * @reset_mask: mask of which blocks to reset
4848  *
4849  * Soft reset the blocks specified in @reset_mask.
4850  */
4851 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4852 {
4853         struct evergreen_mc_save save;
4854         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4855         u32 tmp;
4856
4857         if (reset_mask == 0)
4858                 return;
4859
4860         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4861
4862         cik_print_gpu_status_regs(rdev);
4863         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4864                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4865         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4866                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4867
4868         /* disable CG/PG */
4869         cik_fini_pg(rdev);
4870         cik_fini_cg(rdev);
4871
4872         /* stop the rlc */
4873         cik_rlc_stop(rdev);
4874
4875         /* Disable GFX parsing/prefetching */
4876         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4877
4878         /* Disable MEC parsing/prefetching */
4879         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4880
4881         if (reset_mask & RADEON_RESET_DMA) {
4882                 /* sdma0 */
4883                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4884                 tmp |= SDMA_HALT;
4885                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4886         }
4887         if (reset_mask & RADEON_RESET_DMA1) {
4888                 /* sdma1 */
4889                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4890                 tmp |= SDMA_HALT;
4891                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4892         }
4893
4894         evergreen_mc_stop(rdev, &save);
4895         if (evergreen_mc_wait_for_idle(rdev)) {
4896                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4897         }
4898
4899         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4900                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4901
4902         if (reset_mask & RADEON_RESET_CP) {
4903                 grbm_soft_reset |= SOFT_RESET_CP;
4904
4905                 srbm_soft_reset |= SOFT_RESET_GRBM;
4906         }
4907
4908         if (reset_mask & RADEON_RESET_DMA)
4909                 srbm_soft_reset |= SOFT_RESET_SDMA;
4910
4911         if (reset_mask & RADEON_RESET_DMA1)
4912                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4913
4914         if (reset_mask & RADEON_RESET_DISPLAY)
4915                 srbm_soft_reset |= SOFT_RESET_DC;
4916
4917         if (reset_mask & RADEON_RESET_RLC)
4918                 grbm_soft_reset |= SOFT_RESET_RLC;
4919
4920         if (reset_mask & RADEON_RESET_SEM)
4921                 srbm_soft_reset |= SOFT_RESET_SEM;
4922
4923         if (reset_mask & RADEON_RESET_IH)
4924                 srbm_soft_reset |= SOFT_RESET_IH;
4925
4926         if (reset_mask & RADEON_RESET_GRBM)
4927                 srbm_soft_reset |= SOFT_RESET_GRBM;
4928
4929         if (reset_mask & RADEON_RESET_VMC)
4930                 srbm_soft_reset |= SOFT_RESET_VMC;
4931
4932         if (!(rdev->flags & RADEON_IS_IGP)) {
4933                 if (reset_mask & RADEON_RESET_MC)
4934                         srbm_soft_reset |= SOFT_RESET_MC;
4935         }
4936
4937         if (grbm_soft_reset) {
4938                 tmp = RREG32(GRBM_SOFT_RESET);
4939                 tmp |= grbm_soft_reset;
4940                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4941                 WREG32(GRBM_SOFT_RESET, tmp);
4942                 tmp = RREG32(GRBM_SOFT_RESET);
4943
4944                 udelay(50);
4945
4946                 tmp &= ~grbm_soft_reset;
4947                 WREG32(GRBM_SOFT_RESET, tmp);
4948                 tmp = RREG32(GRBM_SOFT_RESET);
4949         }
4950
4951         if (srbm_soft_reset) {
4952                 tmp = RREG32(SRBM_SOFT_RESET);
4953                 tmp |= srbm_soft_reset;
4954                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4955                 WREG32(SRBM_SOFT_RESET, tmp);
4956                 tmp = RREG32(SRBM_SOFT_RESET);
4957
4958                 udelay(50);
4959
4960                 tmp &= ~srbm_soft_reset;
4961                 WREG32(SRBM_SOFT_RESET, tmp);
4962                 tmp = RREG32(SRBM_SOFT_RESET);
4963         }
4964
4965         /* Wait a little for things to settle down */
4966         udelay(50);
4967
4968         evergreen_mc_resume(rdev, &save);
4969         udelay(50);
4970
4971         cik_print_gpu_status_regs(rdev);
4972 }
4973
4974 struct kv_reset_save_regs {
4975         u32 gmcon_reng_execute;
4976         u32 gmcon_misc;
4977         u32 gmcon_misc3;
4978 };
4979
4980 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4981                                    struct kv_reset_save_regs *save)
4982 {
4983         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4984         save->gmcon_misc = RREG32(GMCON_MISC);
4985         save->gmcon_misc3 = RREG32(GMCON_MISC3);
4986
4987         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4988         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4989                                                 STCTRL_STUTTER_EN));
4990 }
4991
4992 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
4993                                       struct kv_reset_save_regs *save)
4994 {
4995         int i;
4996
4997         WREG32(GMCON_PGFSM_WRITE, 0);
4998         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
4999
5000         for (i = 0; i < 5; i++)
5001                 WREG32(GMCON_PGFSM_WRITE, 0);
5002
5003         WREG32(GMCON_PGFSM_WRITE, 0);
5004         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5005
5006         for (i = 0; i < 5; i++)
5007                 WREG32(GMCON_PGFSM_WRITE, 0);
5008
5009         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5010         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5011
5012         for (i = 0; i < 5; i++)
5013                 WREG32(GMCON_PGFSM_WRITE, 0);
5014
5015         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5016         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5017
5018         for (i = 0; i < 5; i++)
5019                 WREG32(GMCON_PGFSM_WRITE, 0);
5020
5021         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5022         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5023
5024         for (i = 0; i < 5; i++)
5025                 WREG32(GMCON_PGFSM_WRITE, 0);
5026
5027         WREG32(GMCON_PGFSM_WRITE, 0);
5028         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5029
5030         for (i = 0; i < 5; i++)
5031                 WREG32(GMCON_PGFSM_WRITE, 0);
5032
5033         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5034         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5035
5036         for (i = 0; i < 5; i++)
5037                 WREG32(GMCON_PGFSM_WRITE, 0);
5038
5039         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5040         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5041
5042         for (i = 0; i < 5; i++)
5043                 WREG32(GMCON_PGFSM_WRITE, 0);
5044
5045         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5046         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5047
5048         for (i = 0; i < 5; i++)
5049                 WREG32(GMCON_PGFSM_WRITE, 0);
5050
5051         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5052         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5053
5054         for (i = 0; i < 5; i++)
5055                 WREG32(GMCON_PGFSM_WRITE, 0);
5056
5057         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5058         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5059
5060         WREG32(GMCON_MISC3, save->gmcon_misc3);
5061         WREG32(GMCON_MISC, save->gmcon_misc);
5062         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5063 }
5064
5065 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5066 {
5067         struct evergreen_mc_save save;
5068         struct kv_reset_save_regs kv_save = { 0 };
5069         u32 tmp, i;
5070
5071         dev_info(rdev->dev, "GPU pci config reset\n");
5072
5073         /* disable dpm? */
5074
5075         /* disable cg/pg */
5076         cik_fini_pg(rdev);
5077         cik_fini_cg(rdev);
5078
5079         /* Disable GFX parsing/prefetching */
5080         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5081
5082         /* Disable MEC parsing/prefetching */
5083         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5084
5085         /* sdma0 */
5086         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5087         tmp |= SDMA_HALT;
5088         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5089         /* sdma1 */
5090         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5091         tmp |= SDMA_HALT;
5092         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5093         /* XXX other engines? */
5094
5095         /* halt the rlc, disable cp internal ints */
5096         cik_rlc_stop(rdev);
5097
5098         udelay(50);
5099
5100         /* disable mem access */
5101         evergreen_mc_stop(rdev, &save);
5102         if (evergreen_mc_wait_for_idle(rdev)) {
5103                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5104         }
5105
5106         if (rdev->flags & RADEON_IS_IGP)
5107                 kv_save_regs_for_reset(rdev, &kv_save);
5108
5109         /* disable BM */
5110         pci_clear_master(rdev->pdev);
5111         /* reset */
5112         radeon_pci_config_reset(rdev);
5113
5114         udelay(100);
5115
5116         /* wait for asic to come out of reset */
5117         for (i = 0; i < rdev->usec_timeout; i++) {
5118                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5119                         break;
5120                 udelay(1);
5121         }
5122
5123         /* does asic init need to be run first??? */
5124         if (rdev->flags & RADEON_IS_IGP)
5125                 kv_restore_regs_for_reset(rdev, &kv_save);
5126 }
5127
5128 /**
5129  * cik_asic_reset - soft reset GPU
5130  *
5131  * @rdev: radeon_device pointer
5132  * @hard: force hard reset
5133  *
5134  * Look up which blocks are hung and attempt
5135  * to reset them.
5136  * Returns 0 for success.
5137  */
5138 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5139 {
5140         u32 reset_mask;
5141
5142         if (hard) {
5143                 cik_gpu_pci_config_reset(rdev);
5144                 return 0;
5145         }
5146
5147         reset_mask = cik_gpu_check_soft_reset(rdev);
5148
5149         if (reset_mask)
5150                 r600_set_bios_scratch_engine_hung(rdev, true);
5151
5152         /* try soft reset */
5153         cik_gpu_soft_reset(rdev, reset_mask);
5154
5155         reset_mask = cik_gpu_check_soft_reset(rdev);
5156
5157         /* try pci config reset */
5158         if (reset_mask && radeon_hard_reset)
5159                 cik_gpu_pci_config_reset(rdev);
5160
5161         reset_mask = cik_gpu_check_soft_reset(rdev);
5162
5163         if (!reset_mask)
5164                 r600_set_bios_scratch_engine_hung(rdev, false);
5165
5166         return 0;
5167 }
5168
5169 /**
5170  * cik_gfx_is_lockup - check if the 3D engine is locked up
5171  *
5172  * @rdev: radeon_device pointer
5173  * @ring: radeon_ring structure holding ring information
5174  *
5175  * Check if the 3D engine is locked up (CIK).
5176  * Returns true if the engine is locked, false if not.
5177  */
5178 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5179 {
5180         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5181
5182         if (!(reset_mask & (RADEON_RESET_GFX |
5183                             RADEON_RESET_COMPUTE |
5184                             RADEON_RESET_CP))) {
5185                 radeon_ring_lockup_update(rdev, ring);
5186                 return false;
5187         }
5188         return radeon_ring_test_lockup(rdev, ring);
5189 }
5190
5191 /* MC */
5192 /**
5193  * cik_mc_program - program the GPU memory controller
5194  *
5195  * @rdev: radeon_device pointer
5196  *
5197  * Set the location of vram, gart, and AGP in the GPU's
5198  * physical address space (CIK).
5199  */
5200 static void cik_mc_program(struct radeon_device *rdev)
5201 {
5202         struct evergreen_mc_save save;
5203         u32 tmp;
5204         int i, j;
5205
5206         /* Initialize HDP */
5207         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5208                 WREG32((0x2c14 + j), 0x00000000);
5209                 WREG32((0x2c18 + j), 0x00000000);
5210                 WREG32((0x2c1c + j), 0x00000000);
5211                 WREG32((0x2c20 + j), 0x00000000);
5212                 WREG32((0x2c24 + j), 0x00000000);
5213         }
5214         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5215
5216         evergreen_mc_stop(rdev, &save);
5217         if (radeon_mc_wait_for_idle(rdev)) {
5218                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5219         }
5220         /* Lockout access through VGA aperture*/
5221         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5222         /* Update configuration */
5223         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5224                rdev->mc.vram_start >> 12);
5225         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5226                rdev->mc.vram_end >> 12);
5227         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5228                rdev->vram_scratch.gpu_addr >> 12);
5229         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5230         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5231         WREG32(MC_VM_FB_LOCATION, tmp);
5232         /* XXX double check these! */
5233         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5234         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5235         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5236         WREG32(MC_VM_AGP_BASE, 0);
5237         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5238         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5239         if (radeon_mc_wait_for_idle(rdev)) {
5240                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5241         }
5242         evergreen_mc_resume(rdev, &save);
5243         /* we need to own VRAM, so turn off the VGA renderer here
5244          * to stop it overwriting our objects */
5245         rv515_vga_render_disable(rdev);
5246 }
5247
5248 /**
5249  * cik_mc_init - initialize the memory controller driver params
5250  *
5251  * @rdev: radeon_device pointer
5252  *
5253  * Look up the amount of vram, vram width, and decide how to place
5254  * vram and gart within the GPU's physical address space (CIK).
5255  * Returns 0 for success.
5256  */
5257 static int cik_mc_init(struct radeon_device *rdev)
5258 {
5259         u32 tmp;
5260         int chansize, numchan;
5261
5262         /* Get VRAM informations */
5263         rdev->mc.vram_is_ddr = true;
5264         tmp = RREG32(MC_ARB_RAMCFG);
5265         if (tmp & CHANSIZE_MASK) {
5266                 chansize = 64;
5267         } else {
5268                 chansize = 32;
5269         }
5270         tmp = RREG32(MC_SHARED_CHMAP);
5271         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5272         case 0:
5273         default:
5274                 numchan = 1;
5275                 break;
5276         case 1:
5277                 numchan = 2;
5278                 break;
5279         case 2:
5280                 numchan = 4;
5281                 break;
5282         case 3:
5283                 numchan = 8;
5284                 break;
5285         case 4:
5286                 numchan = 3;
5287                 break;
5288         case 5:
5289                 numchan = 6;
5290                 break;
5291         case 6:
5292                 numchan = 10;
5293                 break;
5294         case 7:
5295                 numchan = 12;
5296                 break;
5297         case 8:
5298                 numchan = 16;
5299                 break;
5300         }
5301         rdev->mc.vram_width = numchan * chansize;
5302         /* Could aper size report 0 ? */
5303         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5304         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5305         /* size in MB on si */
5306         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5307         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5308         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5309         si_vram_gtt_location(rdev, &rdev->mc);
5310         radeon_update_bandwidth_info(rdev);
5311
5312         return 0;
5313 }
5314
5315 /*
5316  * GART
5317  * VMID 0 is the physical GPU addresses as used by the kernel.
5318  * VMIDs 1-15 are used for userspace clients and are handled
5319  * by the radeon vm/hsa code.
5320  */
5321 /**
5322  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5323  *
5324  * @rdev: radeon_device pointer
5325  *
5326  * Flush the TLB for the VMID 0 page table (CIK).
5327  */
5328 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5329 {
5330         /* flush hdp cache */
5331         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5332
5333         /* bits 0-15 are the VM contexts0-15 */
5334         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5335 }
5336
5337 /**
5338  * cik_pcie_gart_enable - gart enable
5339  *
5340  * @rdev: radeon_device pointer
5341  *
5342  * This sets up the TLBs, programs the page tables for VMID0,
5343  * sets up the hw for VMIDs 1-15 which are allocated on
5344  * demand, and sets up the global locations for the LDS, GDS,
5345  * and GPUVM for FSA64 clients (CIK).
5346  * Returns 0 for success, errors for failure.
5347  */
5348 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5349 {
5350         int r, i;
5351
5352         if (rdev->gart.robj == NULL) {
5353                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5354                 return -EINVAL;
5355         }
5356         r = radeon_gart_table_vram_pin(rdev);
5357         if (r)
5358                 return r;
5359         /* Setup TLB control */
5360         WREG32(MC_VM_MX_L1_TLB_CNTL,
5361                (0xA << 7) |
5362                ENABLE_L1_TLB |
5363                ENABLE_L1_FRAGMENT_PROCESSING |
5364                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5365                ENABLE_ADVANCED_DRIVER_MODEL |
5366                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5367         /* Setup L2 cache */
5368         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5369                ENABLE_L2_FRAGMENT_PROCESSING |
5370                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5371                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5372                EFFECTIVE_L2_QUEUE_SIZE(7) |
5373                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5374         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5375         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5376                BANK_SELECT(4) |
5377                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5378         /* setup context0 */
5379         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5380         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5381         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5382         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5383                         (u32)(rdev->dummy_page.addr >> 12));
5384         WREG32(VM_CONTEXT0_CNTL2, 0);
5385         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5386                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5387
5388         WREG32(0x15D4, 0);
5389         WREG32(0x15D8, 0);
5390         WREG32(0x15DC, 0);
5391
5392         /* restore context1-15 */
5393         /* set vm size, must be a multiple of 4 */
5394         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5395         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5396         for (i = 1; i < 16; i++) {
5397                 if (i < 8)
5398                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5399                                rdev->vm_manager.saved_table_addr[i]);
5400                 else
5401                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5402                                rdev->vm_manager.saved_table_addr[i]);
5403         }
5404
5405         /* enable context1-15 */
5406         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5407                (u32)(rdev->dummy_page.addr >> 12));
5408         WREG32(VM_CONTEXT1_CNTL2, 4);
5409         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5410                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5411                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5412                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5413                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5414                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5415                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5416                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5417                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5418                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5419                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5420                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5421                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5422                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5423
5424         if (rdev->family == CHIP_KAVERI) {
5425                 u32 tmp = RREG32(CHUB_CONTROL);
5426                 tmp &= ~BYPASS_VM;
5427                 WREG32(CHUB_CONTROL, tmp);
5428         }
5429
5430         /* XXX SH_MEM regs */
5431         /* where to put LDS, scratch, GPUVM in FSA64 space */
5432         mutex_lock(&rdev->srbm_mutex);
5433         for (i = 0; i < 16; i++) {
5434                 cik_srbm_select(rdev, 0, 0, 0, i);
5435                 /* CP and shaders */
5436                 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5437                 WREG32(SH_MEM_APE1_BASE, 1);
5438                 WREG32(SH_MEM_APE1_LIMIT, 0);
5439                 WREG32(SH_MEM_BASES, 0);
5440                 /* SDMA GFX */
5441                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5442                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5443                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5444                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5445                 /* XXX SDMA RLC - todo */
5446         }
5447         cik_srbm_select(rdev, 0, 0, 0, 0);
5448         mutex_unlock(&rdev->srbm_mutex);
5449
5450         cik_pcie_gart_tlb_flush(rdev);
5451         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5452                  (unsigned)(rdev->mc.gtt_size >> 20),
5453                  (unsigned long long)rdev->gart.table_addr);
5454         rdev->gart.ready = true;
5455         return 0;
5456 }
5457
5458 /**
5459  * cik_pcie_gart_disable - gart disable
5460  *
5461  * @rdev: radeon_device pointer
5462  *
5463  * This disables all VM page table (CIK).
5464  */
5465 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5466 {
5467         unsigned i;
5468
5469         for (i = 1; i < 16; ++i) {
5470                 uint32_t reg;
5471                 if (i < 8)
5472                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5473                 else
5474                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5475                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5476         }
5477
5478         /* Disable all tables */
5479         WREG32(VM_CONTEXT0_CNTL, 0);
5480         WREG32(VM_CONTEXT1_CNTL, 0);
5481         /* Setup TLB control */
5482         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5483                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5484         /* Setup L2 cache */
5485         WREG32(VM_L2_CNTL,
5486                ENABLE_L2_FRAGMENT_PROCESSING |
5487                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5488                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5489                EFFECTIVE_L2_QUEUE_SIZE(7) |
5490                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5491         WREG32(VM_L2_CNTL2, 0);
5492         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5493                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5494         radeon_gart_table_vram_unpin(rdev);
5495 }
5496
5497 /**
5498  * cik_pcie_gart_fini - vm fini callback
5499  *
5500  * @rdev: radeon_device pointer
5501  *
5502  * Tears down the driver GART/VM setup (CIK).
5503  */
5504 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5505 {
5506         cik_pcie_gart_disable(rdev);
5507         radeon_gart_table_vram_free(rdev);
5508         radeon_gart_fini(rdev);
5509 }
5510
5511 /* vm parser */
5512 /**
5513  * cik_ib_parse - vm ib_parse callback
5514  *
5515  * @rdev: radeon_device pointer
5516  * @ib: indirect buffer pointer
5517  *
5518  * CIK uses hw IB checking so this is a nop (CIK).
5519  */
5520 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5521 {
5522         return 0;
5523 }
5524
5525 /*
5526  * vm
5527  * VMID 0 is the physical GPU addresses as used by the kernel.
5528  * VMIDs 1-15 are used for userspace clients and are handled
5529  * by the radeon vm/hsa code.
5530  */
5531 /**
5532  * cik_vm_init - cik vm init callback
5533  *
5534  * @rdev: radeon_device pointer
5535  *
5536  * Inits cik specific vm parameters (number of VMs, base of vram for
5537  * VMIDs 1-15) (CIK).
5538  * Returns 0 for success.
5539  */
5540 int cik_vm_init(struct radeon_device *rdev)
5541 {
5542         /*
5543          * number of VMs
5544          * VMID 0 is reserved for System
5545          * radeon graphics/compute will use VMIDs 1-15
5546          */
5547         rdev->vm_manager.nvm = 16;
5548         /* base offset of vram pages */
5549         if (rdev->flags & RADEON_IS_IGP) {
5550                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5551                 tmp <<= 22;
5552                 rdev->vm_manager.vram_base_offset = tmp;
5553         } else
5554                 rdev->vm_manager.vram_base_offset = 0;
5555
5556         return 0;
5557 }
5558
5559 /**
5560  * cik_vm_fini - cik vm fini callback
5561  *
5562  * @rdev: radeon_device pointer
5563  *
5564  * Tear down any asic specific VM setup (CIK).
5565  */
5566 void cik_vm_fini(struct radeon_device *rdev)
5567 {
5568 }
5569
5570 /**
5571  * cik_vm_decode_fault - print human readable fault info
5572  *
5573  * @rdev: radeon_device pointer
5574  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5575  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5576  *
5577  * Print human readable fault information (CIK).
5578  */
5579 static void cik_vm_decode_fault(struct radeon_device *rdev,
5580                                 u32 status, u32 addr, u32 mc_client)
5581 {
5582         u32 mc_id;
5583         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5584         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5585         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5586                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5587
5588         if (rdev->family == CHIP_HAWAII)
5589                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5590         else
5591                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5592
5593         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5594                protections, vmid, addr,
5595                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5596                block, mc_client, mc_id);
5597 }
5598
5599 /**
5600  * cik_vm_flush - cik vm flush using the CP
5601  *
5602  * @rdev: radeon_device pointer
5603  *
5604  * Update the page table base and flush the VM TLB
5605  * using the CP (CIK).
5606  */
5607 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5608                   unsigned vm_id, uint64_t pd_addr)
5609 {
5610         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5611
5612         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5613         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5614                                  WRITE_DATA_DST_SEL(0)));
5615         if (vm_id < 8) {
5616                 radeon_ring_write(ring,
5617                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5618         } else {
5619                 radeon_ring_write(ring,
5620                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5621         }
5622         radeon_ring_write(ring, 0);
5623         radeon_ring_write(ring, pd_addr >> 12);
5624
5625         /* update SH_MEM_* regs */
5626         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5627         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5628                                  WRITE_DATA_DST_SEL(0)));
5629         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5630         radeon_ring_write(ring, 0);
5631         radeon_ring_write(ring, VMID(vm_id));
5632
5633         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5634         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5635                                  WRITE_DATA_DST_SEL(0)));
5636         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5637         radeon_ring_write(ring, 0);
5638
5639         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5640         radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5641         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5642         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5643
5644         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5645         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5646                                  WRITE_DATA_DST_SEL(0)));
5647         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5648         radeon_ring_write(ring, 0);
5649         radeon_ring_write(ring, VMID(0));
5650
5651         /* HDP flush */
5652         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5653
5654         /* bits 0-15 are the VM contexts0-15 */
5655         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5656         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5657                                  WRITE_DATA_DST_SEL(0)));
5658         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5659         radeon_ring_write(ring, 0);
5660         radeon_ring_write(ring, 1 << vm_id);
5661
5662         /* wait for the invalidate to complete */
5663         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5664         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5665                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5666                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5667         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5668         radeon_ring_write(ring, 0);
5669         radeon_ring_write(ring, 0); /* ref */
5670         radeon_ring_write(ring, 0); /* mask */
5671         radeon_ring_write(ring, 0x20); /* poll interval */
5672
5673         /* compute doesn't have PFP */
5674         if (usepfp) {
5675                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5676                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5677                 radeon_ring_write(ring, 0x0);
5678         }
5679 }
5680
5681 /*
5682  * RLC
5683  * The RLC is a multi-purpose microengine that handles a
5684  * variety of functions, the most important of which is
5685  * the interrupt controller.
5686  */
5687 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5688                                           bool enable)
5689 {
5690         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5691
5692         if (enable)
5693                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5694         else
5695                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5696         WREG32(CP_INT_CNTL_RING0, tmp);
5697 }
5698
5699 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5700 {
5701         u32 tmp;
5702
5703         tmp = RREG32(RLC_LB_CNTL);
5704         if (enable)
5705                 tmp |= LOAD_BALANCE_ENABLE;
5706         else
5707                 tmp &= ~LOAD_BALANCE_ENABLE;
5708         WREG32(RLC_LB_CNTL, tmp);
5709 }
5710
5711 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5712 {
5713         u32 i, j, k;
5714         u32 mask;
5715
5716         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5717                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5718                         cik_select_se_sh(rdev, i, j);
5719                         for (k = 0; k < rdev->usec_timeout; k++) {
5720                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5721                                         break;
5722                                 udelay(1);
5723                         }
5724                 }
5725         }
5726         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5727
5728         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5729         for (k = 0; k < rdev->usec_timeout; k++) {
5730                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5731                         break;
5732                 udelay(1);
5733         }
5734 }
5735
5736 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5737 {
5738         u32 tmp;
5739
5740         tmp = RREG32(RLC_CNTL);
5741         if (tmp != rlc)
5742                 WREG32(RLC_CNTL, rlc);
5743 }
5744
5745 static u32 cik_halt_rlc(struct radeon_device *rdev)
5746 {
5747         u32 data, orig;
5748
5749         orig = data = RREG32(RLC_CNTL);
5750
5751         if (data & RLC_ENABLE) {
5752                 u32 i;
5753
5754                 data &= ~RLC_ENABLE;
5755                 WREG32(RLC_CNTL, data);
5756
5757                 for (i = 0; i < rdev->usec_timeout; i++) {
5758                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5759                                 break;
5760                         udelay(1);
5761                 }
5762
5763                 cik_wait_for_rlc_serdes(rdev);
5764         }
5765
5766         return orig;
5767 }
5768
5769 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5770 {
5771         u32 tmp, i, mask;
5772
5773         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5774         WREG32(RLC_GPR_REG2, tmp);
5775
5776         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5777         for (i = 0; i < rdev->usec_timeout; i++) {
5778                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5779                         break;
5780                 udelay(1);
5781         }
5782
5783         for (i = 0; i < rdev->usec_timeout; i++) {
5784                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5785                         break;
5786                 udelay(1);
5787         }
5788 }
5789
5790 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5791 {
5792         u32 tmp;
5793
5794         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5795         WREG32(RLC_GPR_REG2, tmp);
5796 }
5797
5798 /**
5799  * cik_rlc_stop - stop the RLC ME
5800  *
5801  * @rdev: radeon_device pointer
5802  *
5803  * Halt the RLC ME (MicroEngine) (CIK).
5804  */
5805 static void cik_rlc_stop(struct radeon_device *rdev)
5806 {
5807         WREG32(RLC_CNTL, 0);
5808
5809         cik_enable_gui_idle_interrupt(rdev, false);
5810
5811         cik_wait_for_rlc_serdes(rdev);
5812 }
5813
5814 /**
5815  * cik_rlc_start - start the RLC ME
5816  *
5817  * @rdev: radeon_device pointer
5818  *
5819  * Unhalt the RLC ME (MicroEngine) (CIK).
5820  */
5821 static void cik_rlc_start(struct radeon_device *rdev)
5822 {
5823         WREG32(RLC_CNTL, RLC_ENABLE);
5824
5825         cik_enable_gui_idle_interrupt(rdev, true);
5826
5827         udelay(50);
5828 }
5829
5830 /**
5831  * cik_rlc_resume - setup the RLC hw
5832  *
5833  * @rdev: radeon_device pointer
5834  *
5835  * Initialize the RLC registers, load the ucode,
5836  * and start the RLC (CIK).
5837  * Returns 0 for success, -EINVAL if the ucode is not available.
5838  */
5839 static int cik_rlc_resume(struct radeon_device *rdev)
5840 {
5841         u32 i, size, tmp;
5842
5843         if (!rdev->rlc_fw)
5844                 return -EINVAL;
5845
5846         cik_rlc_stop(rdev);
5847
5848         /* disable CG */
5849         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5850         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5851
5852         si_rlc_reset(rdev);
5853
5854         cik_init_pg(rdev);
5855
5856         cik_init_cg(rdev);
5857
5858         WREG32(RLC_LB_CNTR_INIT, 0);
5859         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5860
5861         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5862         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5863         WREG32(RLC_LB_PARAMS, 0x00600408);
5864         WREG32(RLC_LB_CNTL, 0x80000004);
5865
5866         WREG32(RLC_MC_CNTL, 0);
5867         WREG32(RLC_UCODE_CNTL, 0);
5868
5869         if (rdev->new_fw) {
5870                 const struct rlc_firmware_header_v1_0 *hdr =
5871                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5872                 const __le32 *fw_data = (const __le32 *)
5873                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5874
5875                 radeon_ucode_print_rlc_hdr(&hdr->header);
5876
5877                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5878                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5879                 for (i = 0; i < size; i++)
5880                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5881                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5882         } else {
5883                 const __be32 *fw_data;
5884
5885                 switch (rdev->family) {
5886                 case CHIP_BONAIRE:
5887                 case CHIP_HAWAII:
5888                 default:
5889                         size = BONAIRE_RLC_UCODE_SIZE;
5890                         break;
5891                 case CHIP_KAVERI:
5892                         size = KV_RLC_UCODE_SIZE;
5893                         break;
5894                 case CHIP_KABINI:
5895                         size = KB_RLC_UCODE_SIZE;
5896                         break;
5897                 case CHIP_MULLINS:
5898                         size = ML_RLC_UCODE_SIZE;
5899                         break;
5900                 }
5901
5902                 fw_data = (const __be32 *)rdev->rlc_fw->data;
5903                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5904                 for (i = 0; i < size; i++)
5905                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5906                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5907         }
5908
5909         /* XXX - find out what chips support lbpw */
5910         cik_enable_lbpw(rdev, false);
5911
5912         if (rdev->family == CHIP_BONAIRE)
5913                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5914
5915         cik_rlc_start(rdev);
5916
5917         return 0;
5918 }
5919
5920 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5921 {
5922         u32 data, orig, tmp, tmp2;
5923
5924         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5925
5926         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5927                 cik_enable_gui_idle_interrupt(rdev, true);
5928
5929                 tmp = cik_halt_rlc(rdev);
5930
5931                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5932                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5933                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5934                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5935                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5936
5937                 cik_update_rlc(rdev, tmp);
5938
5939                 data |= CGCG_EN | CGLS_EN;
5940         } else {
5941                 cik_enable_gui_idle_interrupt(rdev, false);
5942
5943                 RREG32(CB_CGTT_SCLK_CTRL);
5944                 RREG32(CB_CGTT_SCLK_CTRL);
5945                 RREG32(CB_CGTT_SCLK_CTRL);
5946                 RREG32(CB_CGTT_SCLK_CTRL);
5947
5948                 data &= ~(CGCG_EN | CGLS_EN);
5949         }
5950
5951         if (orig != data)
5952                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5953
5954 }
5955
5956 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5957 {
5958         u32 data, orig, tmp = 0;
5959
5960         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5961                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5962                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5963                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5964                                 data |= CP_MEM_LS_EN;
5965                                 if (orig != data)
5966                                         WREG32(CP_MEM_SLP_CNTL, data);
5967                         }
5968                 }
5969
5970                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5971                 data |= 0x00000001;
5972                 data &= 0xfffffffd;
5973                 if (orig != data)
5974                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5975
5976                 tmp = cik_halt_rlc(rdev);
5977
5978                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5979                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5980                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5981                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5982                 WREG32(RLC_SERDES_WR_CTRL, data);
5983
5984                 cik_update_rlc(rdev, tmp);
5985
5986                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5987                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5988                         data &= ~SM_MODE_MASK;
5989                         data |= SM_MODE(0x2);
5990                         data |= SM_MODE_ENABLE;
5991                         data &= ~CGTS_OVERRIDE;
5992                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5993                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5994                                 data &= ~CGTS_LS_OVERRIDE;
5995                         data &= ~ON_MONITOR_ADD_MASK;
5996                         data |= ON_MONITOR_ADD_EN;
5997                         data |= ON_MONITOR_ADD(0x96);
5998                         if (orig != data)
5999                                 WREG32(CGTS_SM_CTRL_REG, data);
6000                 }
6001         } else {
6002                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6003                 data |= 0x00000003;
6004                 if (orig != data)
6005                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6006
6007                 data = RREG32(RLC_MEM_SLP_CNTL);
6008                 if (data & RLC_MEM_LS_EN) {
6009                         data &= ~RLC_MEM_LS_EN;
6010                         WREG32(RLC_MEM_SLP_CNTL, data);
6011                 }
6012
6013                 data = RREG32(CP_MEM_SLP_CNTL);
6014                 if (data & CP_MEM_LS_EN) {
6015                         data &= ~CP_MEM_LS_EN;
6016                         WREG32(CP_MEM_SLP_CNTL, data);
6017                 }
6018
6019                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6020                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6021                 if (orig != data)
6022                         WREG32(CGTS_SM_CTRL_REG, data);
6023
6024                 tmp = cik_halt_rlc(rdev);
6025
6026                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6027                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6028                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6029                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6030                 WREG32(RLC_SERDES_WR_CTRL, data);
6031
6032                 cik_update_rlc(rdev, tmp);
6033         }
6034 }
6035
6036 static const u32 mc_cg_registers[] =
6037 {
6038         MC_HUB_MISC_HUB_CG,
6039         MC_HUB_MISC_SIP_CG,
6040         MC_HUB_MISC_VM_CG,
6041         MC_XPB_CLK_GAT,
6042         ATC_MISC_CG,
6043         MC_CITF_MISC_WR_CG,
6044         MC_CITF_MISC_RD_CG,
6045         MC_CITF_MISC_VM_CG,
6046         VM_L2_CG,
6047 };
6048
6049 static void cik_enable_mc_ls(struct radeon_device *rdev,
6050                              bool enable)
6051 {
6052         int i;
6053         u32 orig, data;
6054
6055         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6056                 orig = data = RREG32(mc_cg_registers[i]);
6057                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6058                         data |= MC_LS_ENABLE;
6059                 else
6060                         data &= ~MC_LS_ENABLE;
6061                 if (data != orig)
6062                         WREG32(mc_cg_registers[i], data);
6063         }
6064 }
6065
6066 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6067                                bool enable)
6068 {
6069         int i;
6070         u32 orig, data;
6071
6072         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6073                 orig = data = RREG32(mc_cg_registers[i]);
6074                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6075                         data |= MC_CG_ENABLE;
6076                 else
6077                         data &= ~MC_CG_ENABLE;
6078                 if (data != orig)
6079                         WREG32(mc_cg_registers[i], data);
6080         }
6081 }
6082
6083 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6084                                  bool enable)
6085 {
6086         u32 orig, data;
6087
6088         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6089                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6090                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6091         } else {
6092                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6093                 data |= 0xff000000;
6094                 if (data != orig)
6095                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6096
6097                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6098                 data |= 0xff000000;
6099                 if (data != orig)
6100                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6101         }
6102 }
6103
6104 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6105                                  bool enable)
6106 {
6107         u32 orig, data;
6108
6109         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6110                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6111                 data |= 0x100;
6112                 if (orig != data)
6113                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6114
6115                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6116                 data |= 0x100;
6117                 if (orig != data)
6118                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6119         } else {
6120                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6121                 data &= ~0x100;
6122                 if (orig != data)
6123                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6124
6125                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6126                 data &= ~0x100;
6127                 if (orig != data)
6128                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6129         }
6130 }
6131
6132 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6133                                 bool enable)
6134 {
6135         u32 orig, data;
6136
6137         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6138                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6139                 data = 0xfff;
6140                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6141
6142                 orig = data = RREG32(UVD_CGC_CTRL);
6143                 data |= DCM;
6144                 if (orig != data)
6145                         WREG32(UVD_CGC_CTRL, data);
6146         } else {
6147                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6148                 data &= ~0xfff;
6149                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6150
6151                 orig = data = RREG32(UVD_CGC_CTRL);
6152                 data &= ~DCM;
6153                 if (orig != data)
6154                         WREG32(UVD_CGC_CTRL, data);
6155         }
6156 }
6157
6158 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6159                                bool enable)
6160 {
6161         u32 orig, data;
6162
6163         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6164
6165         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6166                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6167                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6168         else
6169                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6170                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6171
6172         if (orig != data)
6173                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6174 }
6175
6176 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6177                                 bool enable)
6178 {
6179         u32 orig, data;
6180
6181         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6182
6183         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6184                 data &= ~CLOCK_GATING_DIS;
6185         else
6186                 data |= CLOCK_GATING_DIS;
6187
6188         if (orig != data)
6189                 WREG32(HDP_HOST_PATH_CNTL, data);
6190 }
6191
6192 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6193                               bool enable)
6194 {
6195         u32 orig, data;
6196
6197         orig = data = RREG32(HDP_MEM_POWER_LS);
6198
6199         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6200                 data |= HDP_LS_ENABLE;
6201         else
6202                 data &= ~HDP_LS_ENABLE;
6203
6204         if (orig != data)
6205                 WREG32(HDP_MEM_POWER_LS, data);
6206 }
6207
6208 void cik_update_cg(struct radeon_device *rdev,
6209                    u32 block, bool enable)
6210 {
6211
6212         if (block & RADEON_CG_BLOCK_GFX) {
6213                 cik_enable_gui_idle_interrupt(rdev, false);
6214                 /* order matters! */
6215                 if (enable) {
6216                         cik_enable_mgcg(rdev, true);
6217                         cik_enable_cgcg(rdev, true);
6218                 } else {
6219                         cik_enable_cgcg(rdev, false);
6220                         cik_enable_mgcg(rdev, false);
6221                 }
6222                 cik_enable_gui_idle_interrupt(rdev, true);
6223         }
6224
6225         if (block & RADEON_CG_BLOCK_MC) {
6226                 if (!(rdev->flags & RADEON_IS_IGP)) {
6227                         cik_enable_mc_mgcg(rdev, enable);
6228                         cik_enable_mc_ls(rdev, enable);
6229                 }
6230         }
6231
6232         if (block & RADEON_CG_BLOCK_SDMA) {
6233                 cik_enable_sdma_mgcg(rdev, enable);
6234                 cik_enable_sdma_mgls(rdev, enable);
6235         }
6236
6237         if (block & RADEON_CG_BLOCK_BIF) {
6238                 cik_enable_bif_mgls(rdev, enable);
6239         }
6240
6241         if (block & RADEON_CG_BLOCK_UVD) {
6242                 if (rdev->has_uvd)
6243                         cik_enable_uvd_mgcg(rdev, enable);
6244         }
6245
6246         if (block & RADEON_CG_BLOCK_HDP) {
6247                 cik_enable_hdp_mgcg(rdev, enable);
6248                 cik_enable_hdp_ls(rdev, enable);
6249         }
6250
6251         if (block & RADEON_CG_BLOCK_VCE) {
6252                 vce_v2_0_enable_mgcg(rdev, enable);
6253         }
6254 }
6255
6256 static void cik_init_cg(struct radeon_device *rdev)
6257 {
6258
6259         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6260
6261         if (rdev->has_uvd)
6262                 si_init_uvd_internal_cg(rdev);
6263
6264         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6265                              RADEON_CG_BLOCK_SDMA |
6266                              RADEON_CG_BLOCK_BIF |
6267                              RADEON_CG_BLOCK_UVD |
6268                              RADEON_CG_BLOCK_HDP), true);
6269 }
6270
6271 static void cik_fini_cg(struct radeon_device *rdev)
6272 {
6273         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6274                              RADEON_CG_BLOCK_SDMA |
6275                              RADEON_CG_BLOCK_BIF |
6276                              RADEON_CG_BLOCK_UVD |
6277                              RADEON_CG_BLOCK_HDP), false);
6278
6279         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6280 }
6281
6282 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6283                                           bool enable)
6284 {
6285         u32 data, orig;
6286
6287         orig = data = RREG32(RLC_PG_CNTL);
6288         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6289                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6290         else
6291                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6292         if (orig != data)
6293                 WREG32(RLC_PG_CNTL, data);
6294 }
6295
6296 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6297                                           bool enable)
6298 {
6299         u32 data, orig;
6300
6301         orig = data = RREG32(RLC_PG_CNTL);
6302         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6303                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6304         else
6305                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6306         if (orig != data)
6307                 WREG32(RLC_PG_CNTL, data);
6308 }
6309
6310 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6311 {
6312         u32 data, orig;
6313
6314         orig = data = RREG32(RLC_PG_CNTL);
6315         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6316                 data &= ~DISABLE_CP_PG;
6317         else
6318                 data |= DISABLE_CP_PG;
6319         if (orig != data)
6320                 WREG32(RLC_PG_CNTL, data);
6321 }
6322
6323 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6324 {
6325         u32 data, orig;
6326
6327         orig = data = RREG32(RLC_PG_CNTL);
6328         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6329                 data &= ~DISABLE_GDS_PG;
6330         else
6331                 data |= DISABLE_GDS_PG;
6332         if (orig != data)
6333                 WREG32(RLC_PG_CNTL, data);
6334 }
6335
6336 #define CP_ME_TABLE_SIZE    96
6337 #define CP_ME_TABLE_OFFSET  2048
6338 #define CP_MEC_TABLE_OFFSET 4096
6339
6340 void cik_init_cp_pg_table(struct radeon_device *rdev)
6341 {
6342         volatile u32 *dst_ptr;
6343         int me, i, max_me = 4;
6344         u32 bo_offset = 0;
6345         u32 table_offset, table_size;
6346
6347         if (rdev->family == CHIP_KAVERI)
6348                 max_me = 5;
6349
6350         if (rdev->rlc.cp_table_ptr == NULL)
6351                 return;
6352
6353         /* write the cp table buffer */
6354         dst_ptr = rdev->rlc.cp_table_ptr;
6355         for (me = 0; me < max_me; me++) {
6356                 if (rdev->new_fw) {
6357                         const __le32 *fw_data;
6358                         const struct gfx_firmware_header_v1_0 *hdr;
6359
6360                         if (me == 0) {
6361                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6362                                 fw_data = (const __le32 *)
6363                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6364                                 table_offset = le32_to_cpu(hdr->jt_offset);
6365                                 table_size = le32_to_cpu(hdr->jt_size);
6366                         } else if (me == 1) {
6367                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6368                                 fw_data = (const __le32 *)
6369                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6370                                 table_offset = le32_to_cpu(hdr->jt_offset);
6371                                 table_size = le32_to_cpu(hdr->jt_size);
6372                         } else if (me == 2) {
6373                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6374                                 fw_data = (const __le32 *)
6375                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6376                                 table_offset = le32_to_cpu(hdr->jt_offset);
6377                                 table_size = le32_to_cpu(hdr->jt_size);
6378                         } else if (me == 3) {
6379                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6380                                 fw_data = (const __le32 *)
6381                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6382                                 table_offset = le32_to_cpu(hdr->jt_offset);
6383                                 table_size = le32_to_cpu(hdr->jt_size);
6384                         } else {
6385                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6386                                 fw_data = (const __le32 *)
6387                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6388                                 table_offset = le32_to_cpu(hdr->jt_offset);
6389                                 table_size = le32_to_cpu(hdr->jt_size);
6390                         }
6391
6392                         for (i = 0; i < table_size; i ++) {
6393                                 dst_ptr[bo_offset + i] =
6394                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6395                         }
6396                         bo_offset += table_size;
6397                 } else {
6398                         const __be32 *fw_data;
6399                         table_size = CP_ME_TABLE_SIZE;
6400
6401                         if (me == 0) {
6402                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6403                                 table_offset = CP_ME_TABLE_OFFSET;
6404                         } else if (me == 1) {
6405                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6406                                 table_offset = CP_ME_TABLE_OFFSET;
6407                         } else if (me == 2) {
6408                                 fw_data = (const __be32 *)rdev->me_fw->data;
6409                                 table_offset = CP_ME_TABLE_OFFSET;
6410                         } else {
6411                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6412                                 table_offset = CP_MEC_TABLE_OFFSET;
6413                         }
6414
6415                         for (i = 0; i < table_size; i ++) {
6416                                 dst_ptr[bo_offset + i] =
6417                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6418                         }
6419                         bo_offset += table_size;
6420                 }
6421         }
6422 }
6423
6424 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6425                                 bool enable)
6426 {
6427         u32 data, orig;
6428
6429         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6430                 orig = data = RREG32(RLC_PG_CNTL);
6431                 data |= GFX_PG_ENABLE;
6432                 if (orig != data)
6433                         WREG32(RLC_PG_CNTL, data);
6434
6435                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6436                 data |= AUTO_PG_EN;
6437                 if (orig != data)
6438                         WREG32(RLC_AUTO_PG_CTRL, data);
6439         } else {
6440                 orig = data = RREG32(RLC_PG_CNTL);
6441                 data &= ~GFX_PG_ENABLE;
6442                 if (orig != data)
6443                         WREG32(RLC_PG_CNTL, data);
6444
6445                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6446                 data &= ~AUTO_PG_EN;
6447                 if (orig != data)
6448                         WREG32(RLC_AUTO_PG_CTRL, data);
6449
6450                 data = RREG32(DB_RENDER_CONTROL);
6451         }
6452 }
6453
6454 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6455 {
6456         u32 mask = 0, tmp, tmp1;
6457         int i;
6458
6459         cik_select_se_sh(rdev, se, sh);
6460         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6461         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6462         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6463
6464         tmp &= 0xffff0000;
6465
6466         tmp |= tmp1;
6467         tmp >>= 16;
6468
6469         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6470                 mask <<= 1;
6471                 mask |= 1;
6472         }
6473
6474         return (~tmp) & mask;
6475 }
6476
6477 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6478 {
6479         u32 i, j, k, active_cu_number = 0;
6480         u32 mask, counter, cu_bitmap;
6481         u32 tmp = 0;
6482
6483         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6484                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6485                         mask = 1;
6486                         cu_bitmap = 0;
6487                         counter = 0;
6488                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6489                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6490                                         if (counter < 2)
6491                                                 cu_bitmap |= mask;
6492                                         counter ++;
6493                                 }
6494                                 mask <<= 1;
6495                         }
6496
6497                         active_cu_number += counter;
6498                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6499                 }
6500         }
6501
6502         WREG32(RLC_PG_AO_CU_MASK, tmp);
6503
6504         tmp = RREG32(RLC_MAX_PG_CU);
6505         tmp &= ~MAX_PU_CU_MASK;
6506         tmp |= MAX_PU_CU(active_cu_number);
6507         WREG32(RLC_MAX_PG_CU, tmp);
6508 }
6509
6510 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6511                                        bool enable)
6512 {
6513         u32 data, orig;
6514
6515         orig = data = RREG32(RLC_PG_CNTL);
6516         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6517                 data |= STATIC_PER_CU_PG_ENABLE;
6518         else
6519                 data &= ~STATIC_PER_CU_PG_ENABLE;
6520         if (orig != data)
6521                 WREG32(RLC_PG_CNTL, data);
6522 }
6523
6524 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6525                                         bool enable)
6526 {
6527         u32 data, orig;
6528
6529         orig = data = RREG32(RLC_PG_CNTL);
6530         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6531                 data |= DYN_PER_CU_PG_ENABLE;
6532         else
6533                 data &= ~DYN_PER_CU_PG_ENABLE;
6534         if (orig != data)
6535                 WREG32(RLC_PG_CNTL, data);
6536 }
6537
6538 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6539 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6540
6541 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6542 {
6543         u32 data, orig;
6544         u32 i;
6545
6546         if (rdev->rlc.cs_data) {
6547                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6548                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6549                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6550                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6551         } else {
6552                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6553                 for (i = 0; i < 3; i++)
6554                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6555         }
6556         if (rdev->rlc.reg_list) {
6557                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6558                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6559                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6560         }
6561
6562         orig = data = RREG32(RLC_PG_CNTL);
6563         data |= GFX_PG_SRC;
6564         if (orig != data)
6565                 WREG32(RLC_PG_CNTL, data);
6566
6567         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6568         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6569
6570         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6571         data &= ~IDLE_POLL_COUNT_MASK;
6572         data |= IDLE_POLL_COUNT(0x60);
6573         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6574
6575         data = 0x10101010;
6576         WREG32(RLC_PG_DELAY, data);
6577
6578         data = RREG32(RLC_PG_DELAY_2);
6579         data &= ~0xff;
6580         data |= 0x3;
6581         WREG32(RLC_PG_DELAY_2, data);
6582
6583         data = RREG32(RLC_AUTO_PG_CTRL);
6584         data &= ~GRBM_REG_SGIT_MASK;
6585         data |= GRBM_REG_SGIT(0x700);
6586         WREG32(RLC_AUTO_PG_CTRL, data);
6587
6588 }
6589
6590 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6591 {
6592         cik_enable_gfx_cgpg(rdev, enable);
6593         cik_enable_gfx_static_mgpg(rdev, enable);
6594         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6595 }
6596
6597 u32 cik_get_csb_size(struct radeon_device *rdev)
6598 {
6599         u32 count = 0;
6600         const struct cs_section_def *sect = NULL;
6601         const struct cs_extent_def *ext = NULL;
6602
6603         if (rdev->rlc.cs_data == NULL)
6604                 return 0;
6605
6606         /* begin clear state */
6607         count += 2;
6608         /* context control state */
6609         count += 3;
6610
6611         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6612                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6613                         if (sect->id == SECT_CONTEXT)
6614                                 count += 2 + ext->reg_count;
6615                         else
6616                                 return 0;
6617                 }
6618         }
6619         /* pa_sc_raster_config/pa_sc_raster_config1 */
6620         count += 4;
6621         /* end clear state */
6622         count += 2;
6623         /* clear state */
6624         count += 2;
6625
6626         return count;
6627 }
6628
6629 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6630 {
6631         u32 count = 0, i;
6632         const struct cs_section_def *sect = NULL;
6633         const struct cs_extent_def *ext = NULL;
6634
6635         if (rdev->rlc.cs_data == NULL)
6636                 return;
6637         if (buffer == NULL)
6638                 return;
6639
6640         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6641         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6642
6643         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6644         buffer[count++] = cpu_to_le32(0x80000000);
6645         buffer[count++] = cpu_to_le32(0x80000000);
6646
6647         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6648                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6649                         if (sect->id == SECT_CONTEXT) {
6650                                 buffer[count++] =
6651                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6652                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6653                                 for (i = 0; i < ext->reg_count; i++)
6654                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6655                         } else {
6656                                 return;
6657                         }
6658                 }
6659         }
6660
6661         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6662         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6663         switch (rdev->family) {
6664         case CHIP_BONAIRE:
6665                 buffer[count++] = cpu_to_le32(0x16000012);
6666                 buffer[count++] = cpu_to_le32(0x00000000);
6667                 break;
6668         case CHIP_KAVERI:
6669                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6670                 buffer[count++] = cpu_to_le32(0x00000000);
6671                 break;
6672         case CHIP_KABINI:
6673         case CHIP_MULLINS:
6674                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6675                 buffer[count++] = cpu_to_le32(0x00000000);
6676                 break;
6677         case CHIP_HAWAII:
6678                 buffer[count++] = cpu_to_le32(0x3a00161a);
6679                 buffer[count++] = cpu_to_le32(0x0000002e);
6680                 break;
6681         default:
6682                 buffer[count++] = cpu_to_le32(0x00000000);
6683                 buffer[count++] = cpu_to_le32(0x00000000);
6684                 break;
6685         }
6686
6687         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6688         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6689
6690         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6691         buffer[count++] = cpu_to_le32(0);
6692 }
6693
6694 static void cik_init_pg(struct radeon_device *rdev)
6695 {
6696         if (rdev->pg_flags) {
6697                 cik_enable_sck_slowdown_on_pu(rdev, true);
6698                 cik_enable_sck_slowdown_on_pd(rdev, true);
6699                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6700                         cik_init_gfx_cgpg(rdev);
6701                         cik_enable_cp_pg(rdev, true);
6702                         cik_enable_gds_pg(rdev, true);
6703                 }
6704                 cik_init_ao_cu_mask(rdev);
6705                 cik_update_gfx_pg(rdev, true);
6706         }
6707 }
6708
6709 static void cik_fini_pg(struct radeon_device *rdev)
6710 {
6711         if (rdev->pg_flags) {
6712                 cik_update_gfx_pg(rdev, false);
6713                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6714                         cik_enable_cp_pg(rdev, false);
6715                         cik_enable_gds_pg(rdev, false);
6716                 }
6717         }
6718 }
6719
6720 /*
6721  * Interrupts
6722  * Starting with r6xx, interrupts are handled via a ring buffer.
6723  * Ring buffers are areas of GPU accessible memory that the GPU
6724  * writes interrupt vectors into and the host reads vectors out of.
6725  * There is a rptr (read pointer) that determines where the
6726  * host is currently reading, and a wptr (write pointer)
6727  * which determines where the GPU has written.  When the
6728  * pointers are equal, the ring is idle.  When the GPU
6729  * writes vectors to the ring buffer, it increments the
6730  * wptr.  When there is an interrupt, the host then starts
6731  * fetching commands and processing them until the pointers are
6732  * equal again at which point it updates the rptr.
6733  */
6734
6735 /**
6736  * cik_enable_interrupts - Enable the interrupt ring buffer
6737  *
6738  * @rdev: radeon_device pointer
6739  *
6740  * Enable the interrupt ring buffer (CIK).
6741  */
6742 static void cik_enable_interrupts(struct radeon_device *rdev)
6743 {
6744         u32 ih_cntl = RREG32(IH_CNTL);
6745         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6746
6747         ih_cntl |= ENABLE_INTR;
6748         ih_rb_cntl |= IH_RB_ENABLE;
6749         WREG32(IH_CNTL, ih_cntl);
6750         WREG32(IH_RB_CNTL, ih_rb_cntl);
6751         rdev->ih.enabled = true;
6752 }
6753
6754 /**
6755  * cik_disable_interrupts - Disable the interrupt ring buffer
6756  *
6757  * @rdev: radeon_device pointer
6758  *
6759  * Disable the interrupt ring buffer (CIK).
6760  */
6761 static void cik_disable_interrupts(struct radeon_device *rdev)
6762 {
6763         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6764         u32 ih_cntl = RREG32(IH_CNTL);
6765
6766         ih_rb_cntl &= ~IH_RB_ENABLE;
6767         ih_cntl &= ~ENABLE_INTR;
6768         WREG32(IH_RB_CNTL, ih_rb_cntl);
6769         WREG32(IH_CNTL, ih_cntl);
6770         /* set rptr, wptr to 0 */
6771         WREG32(IH_RB_RPTR, 0);
6772         WREG32(IH_RB_WPTR, 0);
6773         rdev->ih.enabled = false;
6774         rdev->ih.rptr = 0;
6775 }
6776
6777 /**
6778  * cik_disable_interrupt_state - Disable all interrupt sources
6779  *
6780  * @rdev: radeon_device pointer
6781  *
6782  * Clear all interrupt enable bits used by the driver (CIK).
6783  */
6784 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6785 {
6786         u32 tmp;
6787
6788         /* gfx ring */
6789         tmp = RREG32(CP_INT_CNTL_RING0) &
6790                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6791         WREG32(CP_INT_CNTL_RING0, tmp);
6792         /* sdma */
6793         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6794         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6795         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6796         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6797         /* compute queues */
6798         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6799         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6800         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6801         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6802         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6803         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6804         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6805         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6806         /* grbm */
6807         WREG32(GRBM_INT_CNTL, 0);
6808         /* SRBM */
6809         WREG32(SRBM_INT_CNTL, 0);
6810         /* vline/vblank, etc. */
6811         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6812         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6813         if (rdev->num_crtc >= 4) {
6814                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6815                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6816         }
6817         if (rdev->num_crtc >= 6) {
6818                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6819                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6820         }
6821         /* pflip */
6822         if (rdev->num_crtc >= 2) {
6823                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6824                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6825         }
6826         if (rdev->num_crtc >= 4) {
6827                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6828                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6829         }
6830         if (rdev->num_crtc >= 6) {
6831                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6832                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6833         }
6834
6835         /* dac hotplug */
6836         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6837
6838         /* digital hotplug */
6839         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6840         WREG32(DC_HPD1_INT_CONTROL, tmp);
6841         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6842         WREG32(DC_HPD2_INT_CONTROL, tmp);
6843         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6844         WREG32(DC_HPD3_INT_CONTROL, tmp);
6845         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6846         WREG32(DC_HPD4_INT_CONTROL, tmp);
6847         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6848         WREG32(DC_HPD5_INT_CONTROL, tmp);
6849         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6850         WREG32(DC_HPD6_INT_CONTROL, tmp);
6851
6852 }
6853
6854 /**
6855  * cik_irq_init - init and enable the interrupt ring
6856  *
6857  * @rdev: radeon_device pointer
6858  *
6859  * Allocate a ring buffer for the interrupt controller,
6860  * enable the RLC, disable interrupts, enable the IH
6861  * ring buffer and enable it (CIK).
6862  * Called at device load and reume.
6863  * Returns 0 for success, errors for failure.
6864  */
6865 static int cik_irq_init(struct radeon_device *rdev)
6866 {
6867         int ret = 0;
6868         int rb_bufsz;
6869         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6870
6871         /* allocate ring */
6872         ret = r600_ih_ring_alloc(rdev);
6873         if (ret)
6874                 return ret;
6875
6876         /* disable irqs */
6877         cik_disable_interrupts(rdev);
6878
6879         /* init rlc */
6880         ret = cik_rlc_resume(rdev);
6881         if (ret) {
6882                 r600_ih_ring_fini(rdev);
6883                 return ret;
6884         }
6885
6886         /* setup interrupt control */
6887         /* set dummy read address to dummy page address */
6888         WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6889         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6890         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6891          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6892          */
6893         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6894         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6895         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6896         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6897
6898         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6899         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6900
6901         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6902                       IH_WPTR_OVERFLOW_CLEAR |
6903                       (rb_bufsz << 1));
6904
6905         if (rdev->wb.enabled)
6906                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6907
6908         /* set the writeback address whether it's enabled or not */
6909         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6910         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6911
6912         WREG32(IH_RB_CNTL, ih_rb_cntl);
6913
6914         /* set rptr, wptr to 0 */
6915         WREG32(IH_RB_RPTR, 0);
6916         WREG32(IH_RB_WPTR, 0);
6917
6918         /* Default settings for IH_CNTL (disabled at first) */
6919         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6920         /* RPTR_REARM only works if msi's are enabled */
6921         if (rdev->msi_enabled)
6922                 ih_cntl |= RPTR_REARM;
6923         WREG32(IH_CNTL, ih_cntl);
6924
6925         /* force the active interrupt state to all disabled */
6926         cik_disable_interrupt_state(rdev);
6927
6928         pci_set_master(rdev->pdev);
6929
6930         /* enable irqs */
6931         cik_enable_interrupts(rdev);
6932
6933         return ret;
6934 }
6935
6936 /**
6937  * cik_irq_set - enable/disable interrupt sources
6938  *
6939  * @rdev: radeon_device pointer
6940  *
6941  * Enable interrupt sources on the GPU (vblanks, hpd,
6942  * etc.) (CIK).
6943  * Returns 0 for success, errors for failure.
6944  */
6945 int cik_irq_set(struct radeon_device *rdev)
6946 {
6947         u32 cp_int_cntl;
6948         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6949         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6950         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6951         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6952         u32 grbm_int_cntl = 0;
6953         u32 dma_cntl, dma_cntl1;
6954
6955         if (!rdev->irq.installed) {
6956                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6957                 return -EINVAL;
6958         }
6959         /* don't enable anything if the ih is disabled */
6960         if (!rdev->ih.enabled) {
6961                 cik_disable_interrupts(rdev);
6962                 /* force the active interrupt state to all disabled */
6963                 cik_disable_interrupt_state(rdev);
6964                 return 0;
6965         }
6966
6967         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6968                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6969         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6970
6971         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6972         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6973         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6974         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6975         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6976         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6977
6978         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6979         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6980
6981         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6982         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6983         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6984         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6985         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6986         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6987         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6988         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6989
6990         /* enable CP interrupts on all rings */
6991         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6992                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6993                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6994         }
6995         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6996                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6997                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6998                 if (ring->me == 1) {
6999                         switch (ring->pipe) {
7000                         case 0:
7001                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7002                                 break;
7003                         case 1:
7004                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7005                                 break;
7006                         case 2:
7007                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7008                                 break;
7009                         case 3:
7010                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7011                                 break;
7012                         default:
7013                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7014                                 break;
7015                         }
7016                 } else if (ring->me == 2) {
7017                         switch (ring->pipe) {
7018                         case 0:
7019                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7020                                 break;
7021                         case 1:
7022                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7023                                 break;
7024                         case 2:
7025                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7026                                 break;
7027                         case 3:
7028                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7029                                 break;
7030                         default:
7031                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7032                                 break;
7033                         }
7034                 } else {
7035                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7036                 }
7037         }
7038         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7039                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7040                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7041                 if (ring->me == 1) {
7042                         switch (ring->pipe) {
7043                         case 0:
7044                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7045                                 break;
7046                         case 1:
7047                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7048                                 break;
7049                         case 2:
7050                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7051                                 break;
7052                         case 3:
7053                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7054                                 break;
7055                         default:
7056                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7057                                 break;
7058                         }
7059                 } else if (ring->me == 2) {
7060                         switch (ring->pipe) {
7061                         case 0:
7062                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7063                                 break;
7064                         case 1:
7065                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7066                                 break;
7067                         case 2:
7068                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7069                                 break;
7070                         case 3:
7071                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7072                                 break;
7073                         default:
7074                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7075                                 break;
7076                         }
7077                 } else {
7078                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7079                 }
7080         }
7081
7082         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7083                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7084                 dma_cntl |= TRAP_ENABLE;
7085         }
7086
7087         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7088                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7089                 dma_cntl1 |= TRAP_ENABLE;
7090         }
7091
7092         if (rdev->irq.crtc_vblank_int[0] ||
7093             atomic_read(&rdev->irq.pflip[0])) {
7094                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7095                 crtc1 |= VBLANK_INTERRUPT_MASK;
7096         }
7097         if (rdev->irq.crtc_vblank_int[1] ||
7098             atomic_read(&rdev->irq.pflip[1])) {
7099                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7100                 crtc2 |= VBLANK_INTERRUPT_MASK;
7101         }
7102         if (rdev->irq.crtc_vblank_int[2] ||
7103             atomic_read(&rdev->irq.pflip[2])) {
7104                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7105                 crtc3 |= VBLANK_INTERRUPT_MASK;
7106         }
7107         if (rdev->irq.crtc_vblank_int[3] ||
7108             atomic_read(&rdev->irq.pflip[3])) {
7109                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7110                 crtc4 |= VBLANK_INTERRUPT_MASK;
7111         }
7112         if (rdev->irq.crtc_vblank_int[4] ||
7113             atomic_read(&rdev->irq.pflip[4])) {
7114                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7115                 crtc5 |= VBLANK_INTERRUPT_MASK;
7116         }
7117         if (rdev->irq.crtc_vblank_int[5] ||
7118             atomic_read(&rdev->irq.pflip[5])) {
7119                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7120                 crtc6 |= VBLANK_INTERRUPT_MASK;
7121         }
7122         if (rdev->irq.hpd[0]) {
7123                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7124                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7125         }
7126         if (rdev->irq.hpd[1]) {
7127                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7128                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7129         }
7130         if (rdev->irq.hpd[2]) {
7131                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7132                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7133         }
7134         if (rdev->irq.hpd[3]) {
7135                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7136                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7137         }
7138         if (rdev->irq.hpd[4]) {
7139                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7140                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7141         }
7142         if (rdev->irq.hpd[5]) {
7143                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7144                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7145         }
7146
7147         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7148
7149         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7150         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7151
7152         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7153         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7154         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7155         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7156         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7157         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7158         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7159         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7160
7161         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7162
7163         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7164         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7165         if (rdev->num_crtc >= 4) {
7166                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7167                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7168         }
7169         if (rdev->num_crtc >= 6) {
7170                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7171                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7172         }
7173
7174         if (rdev->num_crtc >= 2) {
7175                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7176                        GRPH_PFLIP_INT_MASK);
7177                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7178                        GRPH_PFLIP_INT_MASK);
7179         }
7180         if (rdev->num_crtc >= 4) {
7181                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7182                        GRPH_PFLIP_INT_MASK);
7183                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7184                        GRPH_PFLIP_INT_MASK);
7185         }
7186         if (rdev->num_crtc >= 6) {
7187                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7188                        GRPH_PFLIP_INT_MASK);
7189                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7190                        GRPH_PFLIP_INT_MASK);
7191         }
7192
7193         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7194         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7195         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7196         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7197         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7198         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7199
7200         /* posting read */
7201         RREG32(SRBM_STATUS);
7202
7203         return 0;
7204 }
7205
7206 /**
7207  * cik_irq_ack - ack interrupt sources
7208  *
7209  * @rdev: radeon_device pointer
7210  *
7211  * Ack interrupt sources on the GPU (vblanks, hpd,
7212  * etc.) (CIK).  Certain interrupts sources are sw
7213  * generated and do not require an explicit ack.
7214  */
7215 static inline void cik_irq_ack(struct radeon_device *rdev)
7216 {
7217         u32 tmp;
7218
7219         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7220         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7221         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7222         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7223         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7224         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7225         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7226
7227         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7228                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7229         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7230                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7231         if (rdev->num_crtc >= 4) {
7232                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7233                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7234                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7235                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7236         }
7237         if (rdev->num_crtc >= 6) {
7238                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7239                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7240                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7241                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7242         }
7243
7244         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7245                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7246                        GRPH_PFLIP_INT_CLEAR);
7247         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7248                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7249                        GRPH_PFLIP_INT_CLEAR);
7250         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7251                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7252         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7253                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7254         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7255                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7256         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7257                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7258
7259         if (rdev->num_crtc >= 4) {
7260                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7261                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7262                                GRPH_PFLIP_INT_CLEAR);
7263                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7264                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7265                                GRPH_PFLIP_INT_CLEAR);
7266                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7267                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7268                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7269                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7270                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7271                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7272                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7273                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7274         }
7275
7276         if (rdev->num_crtc >= 6) {
7277                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7278                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7279                                GRPH_PFLIP_INT_CLEAR);
7280                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7281                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7282                                GRPH_PFLIP_INT_CLEAR);
7283                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7284                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7285                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7286                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7287                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7288                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7289                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7290                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7291         }
7292
7293         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7294                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7295                 tmp |= DC_HPDx_INT_ACK;
7296                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7297         }
7298         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7299                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7300                 tmp |= DC_HPDx_INT_ACK;
7301                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7302         }
7303         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7304                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7305                 tmp |= DC_HPDx_INT_ACK;
7306                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7307         }
7308         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7309                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7310                 tmp |= DC_HPDx_INT_ACK;
7311                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7312         }
7313         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7314                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7315                 tmp |= DC_HPDx_INT_ACK;
7316                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7317         }
7318         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7319                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7320                 tmp |= DC_HPDx_INT_ACK;
7321                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7322         }
7323         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7324                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7325                 tmp |= DC_HPDx_RX_INT_ACK;
7326                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7327         }
7328         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7329                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7330                 tmp |= DC_HPDx_RX_INT_ACK;
7331                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7332         }
7333         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7334                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7335                 tmp |= DC_HPDx_RX_INT_ACK;
7336                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7337         }
7338         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7339                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7340                 tmp |= DC_HPDx_RX_INT_ACK;
7341                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7342         }
7343         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7344                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7345                 tmp |= DC_HPDx_RX_INT_ACK;
7346                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7347         }
7348         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7349                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7350                 tmp |= DC_HPDx_RX_INT_ACK;
7351                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7352         }
7353 }
7354
7355 /**
7356  * cik_irq_disable - disable interrupts
7357  *
7358  * @rdev: radeon_device pointer
7359  *
7360  * Disable interrupts on the hw (CIK).
7361  */
7362 static void cik_irq_disable(struct radeon_device *rdev)
7363 {
7364         cik_disable_interrupts(rdev);
7365         /* Wait and acknowledge irq */
7366         mdelay(1);
7367         cik_irq_ack(rdev);
7368         cik_disable_interrupt_state(rdev);
7369 }
7370
7371 /**
7372  * cik_irq_disable - disable interrupts for suspend
7373  *
7374  * @rdev: radeon_device pointer
7375  *
7376  * Disable interrupts and stop the RLC (CIK).
7377  * Used for suspend.
7378  */
7379 static void cik_irq_suspend(struct radeon_device *rdev)
7380 {
7381         cik_irq_disable(rdev);
7382         cik_rlc_stop(rdev);
7383 }
7384
7385 /**
7386  * cik_irq_fini - tear down interrupt support
7387  *
7388  * @rdev: radeon_device pointer
7389  *
7390  * Disable interrupts on the hw and free the IH ring
7391  * buffer (CIK).
7392  * Used for driver unload.
7393  */
7394 static void cik_irq_fini(struct radeon_device *rdev)
7395 {
7396         cik_irq_suspend(rdev);
7397         r600_ih_ring_fini(rdev);
7398 }
7399
7400 /**
7401  * cik_get_ih_wptr - get the IH ring buffer wptr
7402  *
7403  * @rdev: radeon_device pointer
7404  *
7405  * Get the IH ring buffer wptr from either the register
7406  * or the writeback memory buffer (CIK).  Also check for
7407  * ring buffer overflow and deal with it.
7408  * Used by cik_irq_process().
7409  * Returns the value of the wptr.
7410  */
7411 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7412 {
7413         u32 wptr, tmp;
7414
7415         if (rdev->wb.enabled)
7416                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7417         else
7418                 wptr = RREG32(IH_RB_WPTR);
7419
7420         if (wptr & RB_OVERFLOW) {
7421                 wptr &= ~RB_OVERFLOW;
7422                 /* When a ring buffer overflow happen start parsing interrupt
7423                  * from the last not overwritten vector (wptr + 16). Hopefully
7424                  * this should allow us to catchup.
7425                  */
7426                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7427                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7428                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7429                 tmp = RREG32(IH_RB_CNTL);
7430                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7431                 WREG32(IH_RB_CNTL, tmp);
7432         }
7433         return (wptr & rdev->ih.ptr_mask);
7434 }
7435
7436 /*        CIK IV Ring
7437  * Each IV ring entry is 128 bits:
7438  * [7:0]    - interrupt source id
7439  * [31:8]   - reserved
7440  * [59:32]  - interrupt source data
7441  * [63:60]  - reserved
7442  * [71:64]  - RINGID
7443  *            CP:
7444  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7445  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7446  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7447  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7448  *            PIPE_ID - ME0 0=3D
7449  *                    - ME1&2 compute dispatcher (4 pipes each)
7450  *            SDMA:
7451  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7452  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7453  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7454  * [79:72]  - VMID
7455  * [95:80]  - PASID
7456  * [127:96] - reserved
7457  */
7458 /**
7459  * cik_irq_process - interrupt handler
7460  *
7461  * @rdev: radeon_device pointer
7462  *
7463  * Interrupt hander (CIK).  Walk the IH ring,
7464  * ack interrupts and schedule work to handle
7465  * interrupt events.
7466  * Returns irq process return code.
7467  */
7468 int cik_irq_process(struct radeon_device *rdev)
7469 {
7470         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7471         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7472         u32 wptr;
7473         u32 rptr;
7474         u32 src_id, src_data, ring_id;
7475         u8 me_id, pipe_id, queue_id;
7476         u32 ring_index;
7477         bool queue_hotplug = false;
7478         bool queue_dp = false;
7479         bool queue_reset = false;
7480         u32 addr, status, mc_client;
7481         bool queue_thermal = false;
7482
7483         if (!rdev->ih.enabled || rdev->shutdown)
7484                 return IRQ_NONE;
7485
7486         wptr = cik_get_ih_wptr(rdev);
7487
7488 restart_ih:
7489         /* is somebody else already processing irqs? */
7490         if (atomic_xchg(&rdev->ih.lock, 1))
7491                 return IRQ_NONE;
7492
7493         rptr = rdev->ih.rptr;
7494         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7495
7496         /* Order reading of wptr vs. reading of IH ring data */
7497         rmb();
7498
7499         /* display interrupts */
7500         cik_irq_ack(rdev);
7501
7502         while (rptr != wptr) {
7503                 /* wptr/rptr are in bytes! */
7504                 ring_index = rptr / 4;
7505
7506                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7507                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7508                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7509
7510                 switch (src_id) {
7511                 case 1: /* D1 vblank/vline */
7512                         switch (src_data) {
7513                         case 0: /* D1 vblank */
7514                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7515                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7516
7517                                 if (rdev->irq.crtc_vblank_int[0]) {
7518                                         drm_handle_vblank(rdev->ddev, 0);
7519                                         rdev->pm.vblank_sync = true;
7520                                         wake_up(&rdev->irq.vblank_queue);
7521                                 }
7522                                 if (atomic_read(&rdev->irq.pflip[0]))
7523                                         radeon_crtc_handle_vblank(rdev, 0);
7524                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7525                                 DRM_DEBUG("IH: D1 vblank\n");
7526
7527                                 break;
7528                         case 1: /* D1 vline */
7529                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7530                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7531
7532                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7533                                 DRM_DEBUG("IH: D1 vline\n");
7534
7535                                 break;
7536                         default:
7537                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7538                                 break;
7539                         }
7540                         break;
7541                 case 2: /* D2 vblank/vline */
7542                         switch (src_data) {
7543                         case 0: /* D2 vblank */
7544                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7545                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7546
7547                                 if (rdev->irq.crtc_vblank_int[1]) {
7548                                         drm_handle_vblank(rdev->ddev, 1);
7549                                         rdev->pm.vblank_sync = true;
7550                                         wake_up(&rdev->irq.vblank_queue);
7551                                 }
7552                                 if (atomic_read(&rdev->irq.pflip[1]))
7553                                         radeon_crtc_handle_vblank(rdev, 1);
7554                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7555                                 DRM_DEBUG("IH: D2 vblank\n");
7556
7557                                 break;
7558                         case 1: /* D2 vline */
7559                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7560                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7561
7562                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7563                                 DRM_DEBUG("IH: D2 vline\n");
7564
7565                                 break;
7566                         default:
7567                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7568                                 break;
7569                         }
7570                         break;
7571                 case 3: /* D3 vblank/vline */
7572                         switch (src_data) {
7573                         case 0: /* D3 vblank */
7574                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7575                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7576
7577                                 if (rdev->irq.crtc_vblank_int[2]) {
7578                                         drm_handle_vblank(rdev->ddev, 2);
7579                                         rdev->pm.vblank_sync = true;
7580                                         wake_up(&rdev->irq.vblank_queue);
7581                                 }
7582                                 if (atomic_read(&rdev->irq.pflip[2]))
7583                                         radeon_crtc_handle_vblank(rdev, 2);
7584                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7585                                 DRM_DEBUG("IH: D3 vblank\n");
7586
7587                                 break;
7588                         case 1: /* D3 vline */
7589                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7590                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7591
7592                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7593                                 DRM_DEBUG("IH: D3 vline\n");
7594
7595                                 break;
7596                         default:
7597                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7598                                 break;
7599                         }
7600                         break;
7601                 case 4: /* D4 vblank/vline */
7602                         switch (src_data) {
7603                         case 0: /* D4 vblank */
7604                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7605                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7606
7607                                 if (rdev->irq.crtc_vblank_int[3]) {
7608                                         drm_handle_vblank(rdev->ddev, 3);
7609                                         rdev->pm.vblank_sync = true;
7610                                         wake_up(&rdev->irq.vblank_queue);
7611                                 }
7612                                 if (atomic_read(&rdev->irq.pflip[3]))
7613                                         radeon_crtc_handle_vblank(rdev, 3);
7614                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7615                                 DRM_DEBUG("IH: D4 vblank\n");
7616
7617                                 break;
7618                         case 1: /* D4 vline */
7619                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7620                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7621
7622                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7623                                 DRM_DEBUG("IH: D4 vline\n");
7624
7625                                 break;
7626                         default:
7627                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7628                                 break;
7629                         }
7630                         break;
7631                 case 5: /* D5 vblank/vline */
7632                         switch (src_data) {
7633                         case 0: /* D5 vblank */
7634                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7635                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7636
7637                                 if (rdev->irq.crtc_vblank_int[4]) {
7638                                         drm_handle_vblank(rdev->ddev, 4);
7639                                         rdev->pm.vblank_sync = true;
7640                                         wake_up(&rdev->irq.vblank_queue);
7641                                 }
7642                                 if (atomic_read(&rdev->irq.pflip[4]))
7643                                         radeon_crtc_handle_vblank(rdev, 4);
7644                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7645                                 DRM_DEBUG("IH: D5 vblank\n");
7646
7647                                 break;
7648                         case 1: /* D5 vline */
7649                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7650                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7651
7652                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7653                                 DRM_DEBUG("IH: D5 vline\n");
7654
7655                                 break;
7656                         default:
7657                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7658                                 break;
7659                         }
7660                         break;
7661                 case 6: /* D6 vblank/vline */
7662                         switch (src_data) {
7663                         case 0: /* D6 vblank */
7664                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7665                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7666
7667                                 if (rdev->irq.crtc_vblank_int[5]) {
7668                                         drm_handle_vblank(rdev->ddev, 5);
7669                                         rdev->pm.vblank_sync = true;
7670                                         wake_up(&rdev->irq.vblank_queue);
7671                                 }
7672                                 if (atomic_read(&rdev->irq.pflip[5]))
7673                                         radeon_crtc_handle_vblank(rdev, 5);
7674                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7675                                 DRM_DEBUG("IH: D6 vblank\n");
7676
7677                                 break;
7678                         case 1: /* D6 vline */
7679                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7680                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7681
7682                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7683                                 DRM_DEBUG("IH: D6 vline\n");
7684
7685                                 break;
7686                         default:
7687                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7688                                 break;
7689                         }
7690                         break;
7691                 case 8: /* D1 page flip */
7692                 case 10: /* D2 page flip */
7693                 case 12: /* D3 page flip */
7694                 case 14: /* D4 page flip */
7695                 case 16: /* D5 page flip */
7696                 case 18: /* D6 page flip */
7697                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7698                         if (radeon_use_pflipirq > 0)
7699                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7700                         break;
7701                 case 42: /* HPD hotplug */
7702                         switch (src_data) {
7703                         case 0:
7704                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7705                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706
7707                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7708                                 queue_hotplug = true;
7709                                 DRM_DEBUG("IH: HPD1\n");
7710
7711                                 break;
7712                         case 1:
7713                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7714                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7715
7716                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7717                                 queue_hotplug = true;
7718                                 DRM_DEBUG("IH: HPD2\n");
7719
7720                                 break;
7721                         case 2:
7722                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7723                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7724
7725                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7726                                 queue_hotplug = true;
7727                                 DRM_DEBUG("IH: HPD3\n");
7728
7729                                 break;
7730                         case 3:
7731                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7732                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7733
7734                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7735                                 queue_hotplug = true;
7736                                 DRM_DEBUG("IH: HPD4\n");
7737
7738                                 break;
7739                         case 4:
7740                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7741                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7742
7743                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7744                                 queue_hotplug = true;
7745                                 DRM_DEBUG("IH: HPD5\n");
7746
7747                                 break;
7748                         case 5:
7749                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7750                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751
7752                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7753                                 queue_hotplug = true;
7754                                 DRM_DEBUG("IH: HPD6\n");
7755
7756                                 break;
7757                         case 6:
7758                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7759                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7760
7761                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7762                                 queue_dp = true;
7763                                 DRM_DEBUG("IH: HPD_RX 1\n");
7764
7765                                 break;
7766                         case 7:
7767                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7768                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7769
7770                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7771                                 queue_dp = true;
7772                                 DRM_DEBUG("IH: HPD_RX 2\n");
7773
7774                                 break;
7775                         case 8:
7776                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7777                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7778
7779                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7780                                 queue_dp = true;
7781                                 DRM_DEBUG("IH: HPD_RX 3\n");
7782
7783                                 break;
7784                         case 9:
7785                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7786                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7787
7788                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7789                                 queue_dp = true;
7790                                 DRM_DEBUG("IH: HPD_RX 4\n");
7791
7792                                 break;
7793                         case 10:
7794                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7795                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7796
7797                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7798                                 queue_dp = true;
7799                                 DRM_DEBUG("IH: HPD_RX 5\n");
7800
7801                                 break;
7802                         case 11:
7803                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7804                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7805
7806                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7807                                 queue_dp = true;
7808                                 DRM_DEBUG("IH: HPD_RX 6\n");
7809
7810                                 break;
7811                         default:
7812                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7813                                 break;
7814                         }
7815                         break;
7816                 case 96:
7817                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7818                         WREG32(SRBM_INT_ACK, 0x1);
7819                         break;
7820                 case 124: /* UVD */
7821                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7822                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7823                         break;
7824                 case 146:
7825                 case 147:
7826                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7827                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7828                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7829                         /* reset addr and status */
7830                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7831                         if (addr == 0x0 && status == 0x0)
7832                                 break;
7833                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7834                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7835                                 addr);
7836                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7837                                 status);
7838                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7839                         break;
7840                 case 167: /* VCE */
7841                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7842                         switch (src_data) {
7843                         case 0:
7844                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7845                                 break;
7846                         case 1:
7847                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7848                                 break;
7849                         default:
7850                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7851                                 break;
7852                         }
7853                         break;
7854                 case 176: /* GFX RB CP_INT */
7855                 case 177: /* GFX IB CP_INT */
7856                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7857                         break;
7858                 case 181: /* CP EOP event */
7859                         DRM_DEBUG("IH: CP EOP\n");
7860                         /* XXX check the bitfield order! */
7861                         me_id = (ring_id & 0x60) >> 5;
7862                         pipe_id = (ring_id & 0x18) >> 3;
7863                         queue_id = (ring_id & 0x7) >> 0;
7864                         switch (me_id) {
7865                         case 0:
7866                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7867                                 break;
7868                         case 1:
7869                         case 2:
7870                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7871                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7872                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7873                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7874                                 break;
7875                         }
7876                         break;
7877                 case 184: /* CP Privileged reg access */
7878                         DRM_ERROR("Illegal register access in command stream\n");
7879                         /* XXX check the bitfield order! */
7880                         me_id = (ring_id & 0x60) >> 5;
7881                         pipe_id = (ring_id & 0x18) >> 3;
7882                         queue_id = (ring_id & 0x7) >> 0;
7883                         switch (me_id) {
7884                         case 0:
7885                                 /* This results in a full GPU reset, but all we need to do is soft
7886                                  * reset the CP for gfx
7887                                  */
7888                                 queue_reset = true;
7889                                 break;
7890                         case 1:
7891                                 /* XXX compute */
7892                                 queue_reset = true;
7893                                 break;
7894                         case 2:
7895                                 /* XXX compute */
7896                                 queue_reset = true;
7897                                 break;
7898                         }
7899                         break;
7900                 case 185: /* CP Privileged inst */
7901                         DRM_ERROR("Illegal instruction in command stream\n");
7902                         /* XXX check the bitfield order! */
7903                         me_id = (ring_id & 0x60) >> 5;
7904                         pipe_id = (ring_id & 0x18) >> 3;
7905                         queue_id = (ring_id & 0x7) >> 0;
7906                         switch (me_id) {
7907                         case 0:
7908                                 /* This results in a full GPU reset, but all we need to do is soft
7909                                  * reset the CP for gfx
7910                                  */
7911                                 queue_reset = true;
7912                                 break;
7913                         case 1:
7914                                 /* XXX compute */
7915                                 queue_reset = true;
7916                                 break;
7917                         case 2:
7918                                 /* XXX compute */
7919                                 queue_reset = true;
7920                                 break;
7921                         }
7922                         break;
7923                 case 224: /* SDMA trap event */
7924                         /* XXX check the bitfield order! */
7925                         me_id = (ring_id & 0x3) >> 0;
7926                         queue_id = (ring_id & 0xc) >> 2;
7927                         DRM_DEBUG("IH: SDMA trap\n");
7928                         switch (me_id) {
7929                         case 0:
7930                                 switch (queue_id) {
7931                                 case 0:
7932                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7933                                         break;
7934                                 case 1:
7935                                         /* XXX compute */
7936                                         break;
7937                                 case 2:
7938                                         /* XXX compute */
7939                                         break;
7940                                 }
7941                                 break;
7942                         case 1:
7943                                 switch (queue_id) {
7944                                 case 0:
7945                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7946                                         break;
7947                                 case 1:
7948                                         /* XXX compute */
7949                                         break;
7950                                 case 2:
7951                                         /* XXX compute */
7952                                         break;
7953                                 }
7954                                 break;
7955                         }
7956                         break;
7957                 case 230: /* thermal low to high */
7958                         DRM_DEBUG("IH: thermal low to high\n");
7959                         rdev->pm.dpm.thermal.high_to_low = false;
7960                         queue_thermal = true;
7961                         break;
7962                 case 231: /* thermal high to low */
7963                         DRM_DEBUG("IH: thermal high to low\n");
7964                         rdev->pm.dpm.thermal.high_to_low = true;
7965                         queue_thermal = true;
7966                         break;
7967                 case 233: /* GUI IDLE */
7968                         DRM_DEBUG("IH: GUI idle\n");
7969                         break;
7970                 case 241: /* SDMA Privileged inst */
7971                 case 247: /* SDMA Privileged inst */
7972                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
7973                         /* XXX check the bitfield order! */
7974                         me_id = (ring_id & 0x3) >> 0;
7975                         queue_id = (ring_id & 0xc) >> 2;
7976                         switch (me_id) {
7977                         case 0:
7978                                 switch (queue_id) {
7979                                 case 0:
7980                                         queue_reset = true;
7981                                         break;
7982                                 case 1:
7983                                         /* XXX compute */
7984                                         queue_reset = true;
7985                                         break;
7986                                 case 2:
7987                                         /* XXX compute */
7988                                         queue_reset = true;
7989                                         break;
7990                                 }
7991                                 break;
7992                         case 1:
7993                                 switch (queue_id) {
7994                                 case 0:
7995                                         queue_reset = true;
7996                                         break;
7997                                 case 1:
7998                                         /* XXX compute */
7999                                         queue_reset = true;
8000                                         break;
8001                                 case 2:
8002                                         /* XXX compute */
8003                                         queue_reset = true;
8004                                         break;
8005                                 }
8006                                 break;
8007                         }
8008                         break;
8009                 default:
8010                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8011                         break;
8012                 }
8013
8014                 /* wptr/rptr are in bytes! */
8015                 rptr += 16;
8016                 rptr &= rdev->ih.ptr_mask;
8017                 WREG32(IH_RB_RPTR, rptr);
8018         }
8019         if (queue_dp)
8020                 schedule_work(&rdev->dp_work);
8021         if (queue_hotplug)
8022                 schedule_delayed_work(&rdev->hotplug_work, 0);
8023         if (queue_reset) {
8024                 rdev->needs_reset = true;
8025                 wake_up_all(&rdev->fence_queue);
8026         }
8027         if (queue_thermal)
8028                 schedule_work(&rdev->pm.dpm.thermal.work);
8029         rdev->ih.rptr = rptr;
8030         atomic_set(&rdev->ih.lock, 0);
8031
8032         /* make sure wptr hasn't changed while processing */
8033         wptr = cik_get_ih_wptr(rdev);
8034         if (wptr != rptr)
8035                 goto restart_ih;
8036
8037         return IRQ_HANDLED;
8038 }
8039
8040 /*
8041  * startup/shutdown callbacks
8042  */
8043 static void cik_uvd_init(struct radeon_device *rdev)
8044 {
8045         int r;
8046
8047         if (!rdev->has_uvd)
8048                 return;
8049
8050         r = radeon_uvd_init(rdev);
8051         if (r) {
8052                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8053                 /*
8054                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8055                  * to early fails cik_uvd_start() and thus nothing happens
8056                  * there. So it is pointless to try to go through that code
8057                  * hence why we disable uvd here.
8058                  */
8059                 rdev->has_uvd = 0;
8060                 return;
8061         }
8062         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8063         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8064 }
8065
8066 static void cik_uvd_start(struct radeon_device *rdev)
8067 {
8068         int r;
8069
8070         if (!rdev->has_uvd)
8071                 return;
8072
8073         r = radeon_uvd_resume(rdev);
8074         if (r) {
8075                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8076                 goto error;
8077         }
8078         r = uvd_v4_2_resume(rdev);
8079         if (r) {
8080                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8081                 goto error;
8082         }
8083         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8084         if (r) {
8085                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8086                 goto error;
8087         }
8088         return;
8089
8090 error:
8091         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8092 }
8093
8094 static void cik_uvd_resume(struct radeon_device *rdev)
8095 {
8096         struct radeon_ring *ring;
8097         int r;
8098
8099         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8100                 return;
8101
8102         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8103         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8104         if (r) {
8105                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8106                 return;
8107         }
8108         r = uvd_v1_0_init(rdev);
8109         if (r) {
8110                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8111                 return;
8112         }
8113 }
8114
8115 static void cik_vce_init(struct radeon_device *rdev)
8116 {
8117         int r;
8118
8119         if (!rdev->has_vce)
8120                 return;
8121
8122         r = radeon_vce_init(rdev);
8123         if (r) {
8124                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8125                 /*
8126                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8127                  * to early fails cik_vce_start() and thus nothing happens
8128                  * there. So it is pointless to try to go through that code
8129                  * hence why we disable vce here.
8130                  */
8131                 rdev->has_vce = 0;
8132                 return;
8133         }
8134         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8135         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8136         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8137         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8138 }
8139
8140 static void cik_vce_start(struct radeon_device *rdev)
8141 {
8142         int r;
8143
8144         if (!rdev->has_vce)
8145                 return;
8146
8147         r = radeon_vce_resume(rdev);
8148         if (r) {
8149                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8150                 goto error;
8151         }
8152         r = vce_v2_0_resume(rdev);
8153         if (r) {
8154                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8155                 goto error;
8156         }
8157         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8158         if (r) {
8159                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8160                 goto error;
8161         }
8162         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8163         if (r) {
8164                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8165                 goto error;
8166         }
8167         return;
8168
8169 error:
8170         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8171         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8172 }
8173
8174 static void cik_vce_resume(struct radeon_device *rdev)
8175 {
8176         struct radeon_ring *ring;
8177         int r;
8178
8179         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8180                 return;
8181
8182         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8183         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8184         if (r) {
8185                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8186                 return;
8187         }
8188         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8189         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8190         if (r) {
8191                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8192                 return;
8193         }
8194         r = vce_v1_0_init(rdev);
8195         if (r) {
8196                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8197                 return;
8198         }
8199 }
8200
8201 /**
8202  * cik_startup - program the asic to a functional state
8203  *
8204  * @rdev: radeon_device pointer
8205  *
8206  * Programs the asic to a functional state (CIK).
8207  * Called by cik_init() and cik_resume().
8208  * Returns 0 for success, error for failure.
8209  */
8210 static int cik_startup(struct radeon_device *rdev)
8211 {
8212         struct radeon_ring *ring;
8213         u32 nop;
8214         int r;
8215
8216         /* enable pcie gen2/3 link */
8217         cik_pcie_gen3_enable(rdev);
8218         /* enable aspm */
8219         cik_program_aspm(rdev);
8220
8221         /* scratch needs to be initialized before MC */
8222         r = r600_vram_scratch_init(rdev);
8223         if (r)
8224                 return r;
8225
8226         cik_mc_program(rdev);
8227
8228         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8229                 r = ci_mc_load_microcode(rdev);
8230                 if (r) {
8231                         DRM_ERROR("Failed to load MC firmware!\n");
8232                         return r;
8233                 }
8234         }
8235
8236         r = cik_pcie_gart_enable(rdev);
8237         if (r)
8238                 return r;
8239         cik_gpu_init(rdev);
8240
8241         /* allocate rlc buffers */
8242         if (rdev->flags & RADEON_IS_IGP) {
8243                 if (rdev->family == CHIP_KAVERI) {
8244                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8245                         rdev->rlc.reg_list_size =
8246                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8247                 } else {
8248                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8249                         rdev->rlc.reg_list_size =
8250                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8251                 }
8252         }
8253         rdev->rlc.cs_data = ci_cs_data;
8254         rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8255         rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8256         r = sumo_rlc_init(rdev);
8257         if (r) {
8258                 DRM_ERROR("Failed to init rlc BOs!\n");
8259                 return r;
8260         }
8261
8262         /* allocate wb buffer */
8263         r = radeon_wb_init(rdev);
8264         if (r)
8265                 return r;
8266
8267         /* allocate mec buffers */
8268         r = cik_mec_init(rdev);
8269         if (r) {
8270                 DRM_ERROR("Failed to init MEC BOs!\n");
8271                 return r;
8272         }
8273
8274         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8275         if (r) {
8276                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8277                 return r;
8278         }
8279
8280         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8281         if (r) {
8282                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8283                 return r;
8284         }
8285
8286         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8287         if (r) {
8288                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8289                 return r;
8290         }
8291
8292         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8293         if (r) {
8294                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8295                 return r;
8296         }
8297
8298         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8299         if (r) {
8300                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8301                 return r;
8302         }
8303
8304         cik_uvd_start(rdev);
8305         cik_vce_start(rdev);
8306
8307         /* Enable IRQ */
8308         if (!rdev->irq.installed) {
8309                 r = radeon_irq_kms_init(rdev);
8310                 if (r)
8311                         return r;
8312         }
8313
8314         r = cik_irq_init(rdev);
8315         if (r) {
8316                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8317                 radeon_irq_kms_fini(rdev);
8318                 return r;
8319         }
8320         cik_irq_set(rdev);
8321
8322         if (rdev->family == CHIP_HAWAII) {
8323                 if (rdev->new_fw)
8324                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8325                 else
8326                         nop = RADEON_CP_PACKET2;
8327         } else {
8328                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8329         }
8330
8331         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8332         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8333                              nop);
8334         if (r)
8335                 return r;
8336
8337         /* set up the compute queues */
8338         /* type-2 packets are deprecated on MEC, use type-3 instead */
8339         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8340         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8341                              nop);
8342         if (r)
8343                 return r;
8344         ring->me = 1; /* first MEC */
8345         ring->pipe = 0; /* first pipe */
8346         ring->queue = 0; /* first queue */
8347         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8348
8349         /* type-2 packets are deprecated on MEC, use type-3 instead */
8350         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8351         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8352                              nop);
8353         if (r)
8354                 return r;
8355         /* dGPU only have 1 MEC */
8356         ring->me = 1; /* first MEC */
8357         ring->pipe = 0; /* first pipe */
8358         ring->queue = 1; /* second queue */
8359         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8360
8361         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8362         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8363                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8364         if (r)
8365                 return r;
8366
8367         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8368         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8369                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8370         if (r)
8371                 return r;
8372
8373         r = cik_cp_resume(rdev);
8374         if (r)
8375                 return r;
8376
8377         r = cik_sdma_resume(rdev);
8378         if (r)
8379                 return r;
8380
8381         cik_uvd_resume(rdev);
8382         cik_vce_resume(rdev);
8383
8384         r = radeon_ib_pool_init(rdev);
8385         if (r) {
8386                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8387                 return r;
8388         }
8389
8390         r = radeon_vm_manager_init(rdev);
8391         if (r) {
8392                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8393                 return r;
8394         }
8395
8396         r = radeon_audio_init(rdev);
8397         if (r)
8398                 return r;
8399
8400         return 0;
8401 }
8402
8403 /**
8404  * cik_resume - resume the asic to a functional state
8405  *
8406  * @rdev: radeon_device pointer
8407  *
8408  * Programs the asic to a functional state (CIK).
8409  * Called at resume.
8410  * Returns 0 for success, error for failure.
8411  */
8412 int cik_resume(struct radeon_device *rdev)
8413 {
8414         int r;
8415
8416         /* post card */
8417         atom_asic_init(rdev->mode_info.atom_context);
8418
8419         /* init golden registers */
8420         cik_init_golden_registers(rdev);
8421
8422         if (rdev->pm.pm_method == PM_METHOD_DPM)
8423                 radeon_pm_resume(rdev);
8424
8425         rdev->accel_working = true;
8426         r = cik_startup(rdev);
8427         if (r) {
8428                 DRM_ERROR("cik startup failed on resume\n");
8429                 rdev->accel_working = false;
8430                 return r;
8431         }
8432
8433         return r;
8434
8435 }
8436
8437 /**
8438  * cik_suspend - suspend the asic
8439  *
8440  * @rdev: radeon_device pointer
8441  *
8442  * Bring the chip into a state suitable for suspend (CIK).
8443  * Called at suspend.
8444  * Returns 0 for success.
8445  */
8446 int cik_suspend(struct radeon_device *rdev)
8447 {
8448         radeon_pm_suspend(rdev);
8449         radeon_audio_fini(rdev);
8450         radeon_vm_manager_fini(rdev);
8451         cik_cp_enable(rdev, false);
8452         cik_sdma_enable(rdev, false);
8453         if (rdev->has_uvd) {
8454                 uvd_v1_0_fini(rdev);
8455                 radeon_uvd_suspend(rdev);
8456         }
8457         if (rdev->has_vce)
8458                 radeon_vce_suspend(rdev);
8459         cik_fini_pg(rdev);
8460         cik_fini_cg(rdev);
8461         cik_irq_suspend(rdev);
8462         radeon_wb_disable(rdev);
8463         cik_pcie_gart_disable(rdev);
8464         return 0;
8465 }
8466
8467 /* Plan is to move initialization in that function and use
8468  * helper function so that radeon_device_init pretty much
8469  * do nothing more than calling asic specific function. This
8470  * should also allow to remove a bunch of callback function
8471  * like vram_info.
8472  */
8473 /**
8474  * cik_init - asic specific driver and hw init
8475  *
8476  * @rdev: radeon_device pointer
8477  *
8478  * Setup asic specific driver variables and program the hw
8479  * to a functional state (CIK).
8480  * Called at driver startup.
8481  * Returns 0 for success, errors for failure.
8482  */
8483 int cik_init(struct radeon_device *rdev)
8484 {
8485         struct radeon_ring *ring;
8486         int r;
8487
8488         /* Read BIOS */
8489         if (!radeon_get_bios(rdev)) {
8490                 if (ASIC_IS_AVIVO(rdev))
8491                         return -EINVAL;
8492         }
8493         /* Must be an ATOMBIOS */
8494         if (!rdev->is_atom_bios) {
8495                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8496                 return -EINVAL;
8497         }
8498         r = radeon_atombios_init(rdev);
8499         if (r)
8500                 return r;
8501
8502         /* Post card if necessary */
8503         if (!radeon_card_posted(rdev)) {
8504                 if (!rdev->bios) {
8505                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8506                         return -EINVAL;
8507                 }
8508                 DRM_INFO("GPU not posted. posting now...\n");
8509                 atom_asic_init(rdev->mode_info.atom_context);
8510         }
8511         /* init golden registers */
8512         cik_init_golden_registers(rdev);
8513         /* Initialize scratch registers */
8514         cik_scratch_init(rdev);
8515         /* Initialize surface registers */
8516         radeon_surface_init(rdev);
8517         /* Initialize clocks */
8518         radeon_get_clock_info(rdev->ddev);
8519
8520         /* Fence driver */
8521         r = radeon_fence_driver_init(rdev);
8522         if (r)
8523                 return r;
8524
8525         /* initialize memory controller */
8526         r = cik_mc_init(rdev);
8527         if (r)
8528                 return r;
8529         /* Memory manager */
8530         r = radeon_bo_init(rdev);
8531         if (r)
8532                 return r;
8533
8534         if (rdev->flags & RADEON_IS_IGP) {
8535                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8536                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8537                         r = cik_init_microcode(rdev);
8538                         if (r) {
8539                                 DRM_ERROR("Failed to load firmware!\n");
8540                                 return r;
8541                         }
8542                 }
8543         } else {
8544                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8545                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8546                     !rdev->mc_fw) {
8547                         r = cik_init_microcode(rdev);
8548                         if (r) {
8549                                 DRM_ERROR("Failed to load firmware!\n");
8550                                 return r;
8551                         }
8552                 }
8553         }
8554
8555         /* Initialize power management */
8556         radeon_pm_init(rdev);
8557
8558         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8559         ring->ring_obj = NULL;
8560         r600_ring_init(rdev, ring, 1024 * 1024);
8561
8562         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8563         ring->ring_obj = NULL;
8564         r600_ring_init(rdev, ring, 1024 * 1024);
8565         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8566         if (r)
8567                 return r;
8568
8569         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8570         ring->ring_obj = NULL;
8571         r600_ring_init(rdev, ring, 1024 * 1024);
8572         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8573         if (r)
8574                 return r;
8575
8576         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8577         ring->ring_obj = NULL;
8578         r600_ring_init(rdev, ring, 256 * 1024);
8579
8580         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8581         ring->ring_obj = NULL;
8582         r600_ring_init(rdev, ring, 256 * 1024);
8583
8584         cik_uvd_init(rdev);
8585         cik_vce_init(rdev);
8586
8587         rdev->ih.ring_obj = NULL;
8588         r600_ih_ring_init(rdev, 64 * 1024);
8589
8590         r = r600_pcie_gart_init(rdev);
8591         if (r)
8592                 return r;
8593
8594         rdev->accel_working = true;
8595         r = cik_startup(rdev);
8596         if (r) {
8597                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8598                 cik_cp_fini(rdev);
8599                 cik_sdma_fini(rdev);
8600                 cik_irq_fini(rdev);
8601                 sumo_rlc_fini(rdev);
8602                 cik_mec_fini(rdev);
8603                 radeon_wb_fini(rdev);
8604                 radeon_ib_pool_fini(rdev);
8605                 radeon_vm_manager_fini(rdev);
8606                 radeon_irq_kms_fini(rdev);
8607                 cik_pcie_gart_fini(rdev);
8608                 rdev->accel_working = false;
8609         }
8610
8611         /* Don't start up if the MC ucode is missing.
8612          * The default clocks and voltages before the MC ucode
8613          * is loaded are not suffient for advanced operations.
8614          */
8615         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8616                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8617                 return -EINVAL;
8618         }
8619
8620         return 0;
8621 }
8622
8623 /**
8624  * cik_fini - asic specific driver and hw fini
8625  *
8626  * @rdev: radeon_device pointer
8627  *
8628  * Tear down the asic specific driver variables and program the hw
8629  * to an idle state (CIK).
8630  * Called at driver unload.
8631  */
8632 void cik_fini(struct radeon_device *rdev)
8633 {
8634         radeon_pm_fini(rdev);
8635         cik_cp_fini(rdev);
8636         cik_sdma_fini(rdev);
8637         cik_fini_pg(rdev);
8638         cik_fini_cg(rdev);
8639         cik_irq_fini(rdev);
8640         sumo_rlc_fini(rdev);
8641         cik_mec_fini(rdev);
8642         radeon_wb_fini(rdev);
8643         radeon_vm_manager_fini(rdev);
8644         radeon_ib_pool_fini(rdev);
8645         radeon_irq_kms_fini(rdev);
8646         uvd_v1_0_fini(rdev);
8647         radeon_uvd_fini(rdev);
8648         radeon_vce_fini(rdev);
8649         cik_pcie_gart_fini(rdev);
8650         r600_vram_scratch_fini(rdev);
8651         radeon_gem_fini(rdev);
8652         radeon_fence_driver_fini(rdev);
8653         radeon_bo_fini(rdev);
8654         radeon_atombios_fini(rdev);
8655         kfree(rdev->bios);
8656         rdev->bios = NULL;
8657 }
8658
8659 void dce8_program_fmt(struct drm_encoder *encoder)
8660 {
8661         struct drm_device *dev = encoder->dev;
8662         struct radeon_device *rdev = dev->dev_private;
8663         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8664         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8665         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8666         int bpc = 0;
8667         u32 tmp = 0;
8668         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8669
8670         if (connector) {
8671                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8672                 bpc = radeon_get_monitor_bpc(connector);
8673                 dither = radeon_connector->dither;
8674         }
8675
8676         /* LVDS/eDP FMT is set up by atom */
8677         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8678                 return;
8679
8680         /* not needed for analog */
8681         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8682             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8683                 return;
8684
8685         if (bpc == 0)
8686                 return;
8687
8688         switch (bpc) {
8689         case 6:
8690                 if (dither == RADEON_FMT_DITHER_ENABLE)
8691                         /* XXX sort out optimal dither settings */
8692                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8693                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8694                 else
8695                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8696                 break;
8697         case 8:
8698                 if (dither == RADEON_FMT_DITHER_ENABLE)
8699                         /* XXX sort out optimal dither settings */
8700                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8701                                 FMT_RGB_RANDOM_ENABLE |
8702                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8703                 else
8704                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8705                 break;
8706         case 10:
8707                 if (dither == RADEON_FMT_DITHER_ENABLE)
8708                         /* XXX sort out optimal dither settings */
8709                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8710                                 FMT_RGB_RANDOM_ENABLE |
8711                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8712                 else
8713                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8714                 break;
8715         default:
8716                 /* not needed */
8717                 break;
8718         }
8719
8720         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8721 }
8722
8723 /* display watermark setup */
8724 /**
8725  * dce8_line_buffer_adjust - Set up the line buffer
8726  *
8727  * @rdev: radeon_device pointer
8728  * @radeon_crtc: the selected display controller
8729  * @mode: the current display mode on the selected display
8730  * controller
8731  *
8732  * Setup up the line buffer allocation for
8733  * the selected display controller (CIK).
8734  * Returns the line buffer size in pixels.
8735  */
8736 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8737                                    struct radeon_crtc *radeon_crtc,
8738                                    struct drm_display_mode *mode)
8739 {
8740         u32 tmp, buffer_alloc, i;
8741         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8742         /*
8743          * Line Buffer Setup
8744          * There are 6 line buffers, one for each display controllers.
8745          * There are 3 partitions per LB. Select the number of partitions
8746          * to enable based on the display width.  For display widths larger
8747          * than 4096, you need use to use 2 display controllers and combine
8748          * them using the stereo blender.
8749          */
8750         if (radeon_crtc->base.enabled && mode) {
8751                 if (mode->crtc_hdisplay < 1920) {
8752                         tmp = 1;
8753                         buffer_alloc = 2;
8754                 } else if (mode->crtc_hdisplay < 2560) {
8755                         tmp = 2;
8756                         buffer_alloc = 2;
8757                 } else if (mode->crtc_hdisplay < 4096) {
8758                         tmp = 0;
8759                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8760                 } else {
8761                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8762                         tmp = 0;
8763                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8764                 }
8765         } else {
8766                 tmp = 1;
8767                 buffer_alloc = 0;
8768         }
8769
8770         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8771                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8772
8773         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8774                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8775         for (i = 0; i < rdev->usec_timeout; i++) {
8776                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8777                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8778                         break;
8779                 udelay(1);
8780         }
8781
8782         if (radeon_crtc->base.enabled && mode) {
8783                 switch (tmp) {
8784                 case 0:
8785                 default:
8786                         return 4096 * 2;
8787                 case 1:
8788                         return 1920 * 2;
8789                 case 2:
8790                         return 2560 * 2;
8791                 }
8792         }
8793
8794         /* controller not enabled, so no lb used */
8795         return 0;
8796 }
8797
8798 /**
8799  * cik_get_number_of_dram_channels - get the number of dram channels
8800  *
8801  * @rdev: radeon_device pointer
8802  *
8803  * Look up the number of video ram channels (CIK).
8804  * Used for display watermark bandwidth calculations
8805  * Returns the number of dram channels
8806  */
8807 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8808 {
8809         u32 tmp = RREG32(MC_SHARED_CHMAP);
8810
8811         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8812         case 0:
8813         default:
8814                 return 1;
8815         case 1:
8816                 return 2;
8817         case 2:
8818                 return 4;
8819         case 3:
8820                 return 8;
8821         case 4:
8822                 return 3;
8823         case 5:
8824                 return 6;
8825         case 6:
8826                 return 10;
8827         case 7:
8828                 return 12;
8829         case 8:
8830                 return 16;
8831         }
8832 }
8833
8834 struct dce8_wm_params {
8835         u32 dram_channels; /* number of dram channels */
8836         u32 yclk;          /* bandwidth per dram data pin in kHz */
8837         u32 sclk;          /* engine clock in kHz */
8838         u32 disp_clk;      /* display clock in kHz */
8839         u32 src_width;     /* viewport width */
8840         u32 active_time;   /* active display time in ns */
8841         u32 blank_time;    /* blank time in ns */
8842         bool interlaced;    /* mode is interlaced */
8843         fixed20_12 vsc;    /* vertical scale ratio */
8844         u32 num_heads;     /* number of active crtcs */
8845         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8846         u32 lb_size;       /* line buffer allocated to pipe */
8847         u32 vtaps;         /* vertical scaler taps */
8848 };
8849
8850 /**
8851  * dce8_dram_bandwidth - get the dram bandwidth
8852  *
8853  * @wm: watermark calculation data
8854  *
8855  * Calculate the raw dram bandwidth (CIK).
8856  * Used for display watermark bandwidth calculations
8857  * Returns the dram bandwidth in MBytes/s
8858  */
8859 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8860 {
8861         /* Calculate raw DRAM Bandwidth */
8862         fixed20_12 dram_efficiency; /* 0.7 */
8863         fixed20_12 yclk, dram_channels, bandwidth;
8864         fixed20_12 a;
8865
8866         a.full = dfixed_const(1000);
8867         yclk.full = dfixed_const(wm->yclk);
8868         yclk.full = dfixed_div(yclk, a);
8869         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8870         a.full = dfixed_const(10);
8871         dram_efficiency.full = dfixed_const(7);
8872         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8873         bandwidth.full = dfixed_mul(dram_channels, yclk);
8874         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8875
8876         return dfixed_trunc(bandwidth);
8877 }
8878
8879 /**
8880  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8881  *
8882  * @wm: watermark calculation data
8883  *
8884  * Calculate the dram bandwidth used for display (CIK).
8885  * Used for display watermark bandwidth calculations
8886  * Returns the dram bandwidth for display in MBytes/s
8887  */
8888 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8889 {
8890         /* Calculate DRAM Bandwidth and the part allocated to display. */
8891         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8892         fixed20_12 yclk, dram_channels, bandwidth;
8893         fixed20_12 a;
8894
8895         a.full = dfixed_const(1000);
8896         yclk.full = dfixed_const(wm->yclk);
8897         yclk.full = dfixed_div(yclk, a);
8898         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8899         a.full = dfixed_const(10);
8900         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8901         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8902         bandwidth.full = dfixed_mul(dram_channels, yclk);
8903         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8904
8905         return dfixed_trunc(bandwidth);
8906 }
8907
8908 /**
8909  * dce8_data_return_bandwidth - get the data return bandwidth
8910  *
8911  * @wm: watermark calculation data
8912  *
8913  * Calculate the data return bandwidth used for display (CIK).
8914  * Used for display watermark bandwidth calculations
8915  * Returns the data return bandwidth in MBytes/s
8916  */
8917 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8918 {
8919         /* Calculate the display Data return Bandwidth */
8920         fixed20_12 return_efficiency; /* 0.8 */
8921         fixed20_12 sclk, bandwidth;
8922         fixed20_12 a;
8923
8924         a.full = dfixed_const(1000);
8925         sclk.full = dfixed_const(wm->sclk);
8926         sclk.full = dfixed_div(sclk, a);
8927         a.full = dfixed_const(10);
8928         return_efficiency.full = dfixed_const(8);
8929         return_efficiency.full = dfixed_div(return_efficiency, a);
8930         a.full = dfixed_const(32);
8931         bandwidth.full = dfixed_mul(a, sclk);
8932         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8933
8934         return dfixed_trunc(bandwidth);
8935 }
8936
8937 /**
8938  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8939  *
8940  * @wm: watermark calculation data
8941  *
8942  * Calculate the dmif bandwidth used for display (CIK).
8943  * Used for display watermark bandwidth calculations
8944  * Returns the dmif bandwidth in MBytes/s
8945  */
8946 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8947 {
8948         /* Calculate the DMIF Request Bandwidth */
8949         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8950         fixed20_12 disp_clk, bandwidth;
8951         fixed20_12 a, b;
8952
8953         a.full = dfixed_const(1000);
8954         disp_clk.full = dfixed_const(wm->disp_clk);
8955         disp_clk.full = dfixed_div(disp_clk, a);
8956         a.full = dfixed_const(32);
8957         b.full = dfixed_mul(a, disp_clk);
8958
8959         a.full = dfixed_const(10);
8960         disp_clk_request_efficiency.full = dfixed_const(8);
8961         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8962
8963         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8964
8965         return dfixed_trunc(bandwidth);
8966 }
8967
8968 /**
8969  * dce8_available_bandwidth - get the min available bandwidth
8970  *
8971  * @wm: watermark calculation data
8972  *
8973  * Calculate the min available bandwidth used for display (CIK).
8974  * Used for display watermark bandwidth calculations
8975  * Returns the min available bandwidth in MBytes/s
8976  */
8977 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8978 {
8979         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8980         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8981         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8982         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8983
8984         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8985 }
8986
8987 /**
8988  * dce8_average_bandwidth - get the average available bandwidth
8989  *
8990  * @wm: watermark calculation data
8991  *
8992  * Calculate the average available bandwidth used for display (CIK).
8993  * Used for display watermark bandwidth calculations
8994  * Returns the average available bandwidth in MBytes/s
8995  */
8996 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8997 {
8998         /* Calculate the display mode Average Bandwidth
8999          * DisplayMode should contain the source and destination dimensions,
9000          * timing, etc.
9001          */
9002         fixed20_12 bpp;
9003         fixed20_12 line_time;
9004         fixed20_12 src_width;
9005         fixed20_12 bandwidth;
9006         fixed20_12 a;
9007
9008         a.full = dfixed_const(1000);
9009         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9010         line_time.full = dfixed_div(line_time, a);
9011         bpp.full = dfixed_const(wm->bytes_per_pixel);
9012         src_width.full = dfixed_const(wm->src_width);
9013         bandwidth.full = dfixed_mul(src_width, bpp);
9014         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9015         bandwidth.full = dfixed_div(bandwidth, line_time);
9016
9017         return dfixed_trunc(bandwidth);
9018 }
9019
9020 /**
9021  * dce8_latency_watermark - get the latency watermark
9022  *
9023  * @wm: watermark calculation data
9024  *
9025  * Calculate the latency watermark (CIK).
9026  * Used for display watermark bandwidth calculations
9027  * Returns the latency watermark in ns
9028  */
9029 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9030 {
9031         /* First calculate the latency in ns */
9032         u32 mc_latency = 2000; /* 2000 ns. */
9033         u32 available_bandwidth = dce8_available_bandwidth(wm);
9034         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9035         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9036         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9037         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9038                 (wm->num_heads * cursor_line_pair_return_time);
9039         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9040         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9041         u32 tmp, dmif_size = 12288;
9042         fixed20_12 a, b, c;
9043
9044         if (wm->num_heads == 0)
9045                 return 0;
9046
9047         a.full = dfixed_const(2);
9048         b.full = dfixed_const(1);
9049         if ((wm->vsc.full > a.full) ||
9050             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9051             (wm->vtaps >= 5) ||
9052             ((wm->vsc.full >= a.full) && wm->interlaced))
9053                 max_src_lines_per_dst_line = 4;
9054         else
9055                 max_src_lines_per_dst_line = 2;
9056
9057         a.full = dfixed_const(available_bandwidth);
9058         b.full = dfixed_const(wm->num_heads);
9059         a.full = dfixed_div(a, b);
9060         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9061         tmp = min(dfixed_trunc(a), tmp);
9062
9063         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9064
9065         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9066         b.full = dfixed_const(1000);
9067         c.full = dfixed_const(lb_fill_bw);
9068         b.full = dfixed_div(c, b);
9069         a.full = dfixed_div(a, b);
9070         line_fill_time = dfixed_trunc(a);
9071
9072         if (line_fill_time < wm->active_time)
9073                 return latency;
9074         else
9075                 return latency + (line_fill_time - wm->active_time);
9076
9077 }
9078
9079 /**
9080  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9081  * average and available dram bandwidth
9082  *
9083  * @wm: watermark calculation data
9084  *
9085  * Check if the display average bandwidth fits in the display
9086  * dram bandwidth (CIK).
9087  * Used for display watermark bandwidth calculations
9088  * Returns true if the display fits, false if not.
9089  */
9090 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9091 {
9092         if (dce8_average_bandwidth(wm) <=
9093             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9094                 return true;
9095         else
9096                 return false;
9097 }
9098
9099 /**
9100  * dce8_average_bandwidth_vs_available_bandwidth - check
9101  * average and available bandwidth
9102  *
9103  * @wm: watermark calculation data
9104  *
9105  * Check if the display average bandwidth fits in the display
9106  * available bandwidth (CIK).
9107  * Used for display watermark bandwidth calculations
9108  * Returns true if the display fits, false if not.
9109  */
9110 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9111 {
9112         if (dce8_average_bandwidth(wm) <=
9113             (dce8_available_bandwidth(wm) / wm->num_heads))
9114                 return true;
9115         else
9116                 return false;
9117 }
9118
9119 /**
9120  * dce8_check_latency_hiding - check latency hiding
9121  *
9122  * @wm: watermark calculation data
9123  *
9124  * Check latency hiding (CIK).
9125  * Used for display watermark bandwidth calculations
9126  * Returns true if the display fits, false if not.
9127  */
9128 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9129 {
9130         u32 lb_partitions = wm->lb_size / wm->src_width;
9131         u32 line_time = wm->active_time + wm->blank_time;
9132         u32 latency_tolerant_lines;
9133         u32 latency_hiding;
9134         fixed20_12 a;
9135
9136         a.full = dfixed_const(1);
9137         if (wm->vsc.full > a.full)
9138                 latency_tolerant_lines = 1;
9139         else {
9140                 if (lb_partitions <= (wm->vtaps + 1))
9141                         latency_tolerant_lines = 1;
9142                 else
9143                         latency_tolerant_lines = 2;
9144         }
9145
9146         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9147
9148         if (dce8_latency_watermark(wm) <= latency_hiding)
9149                 return true;
9150         else
9151                 return false;
9152 }
9153
9154 /**
9155  * dce8_program_watermarks - program display watermarks
9156  *
9157  * @rdev: radeon_device pointer
9158  * @radeon_crtc: the selected display controller
9159  * @lb_size: line buffer size
9160  * @num_heads: number of display controllers in use
9161  *
9162  * Calculate and program the display watermarks for the
9163  * selected display controller (CIK).
9164  */
9165 static void dce8_program_watermarks(struct radeon_device *rdev,
9166                                     struct radeon_crtc *radeon_crtc,
9167                                     u32 lb_size, u32 num_heads)
9168 {
9169         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9170         struct dce8_wm_params wm_low, wm_high;
9171         u32 active_time;
9172         u32 line_time = 0;
9173         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9174         u32 tmp, wm_mask;
9175
9176         if (radeon_crtc->base.enabled && num_heads && mode) {
9177                 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9178                                             (u32)mode->clock);
9179                 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9180                                           (u32)mode->clock);
9181                 line_time = min(line_time, (u32)65535);
9182
9183                 /* watermark for high clocks */
9184                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9185                     rdev->pm.dpm_enabled) {
9186                         wm_high.yclk =
9187                                 radeon_dpm_get_mclk(rdev, false) * 10;
9188                         wm_high.sclk =
9189                                 radeon_dpm_get_sclk(rdev, false) * 10;
9190                 } else {
9191                         wm_high.yclk = rdev->pm.current_mclk * 10;
9192                         wm_high.sclk = rdev->pm.current_sclk * 10;
9193                 }
9194
9195                 wm_high.disp_clk = mode->clock;
9196                 wm_high.src_width = mode->crtc_hdisplay;
9197                 wm_high.active_time = active_time;
9198                 wm_high.blank_time = line_time - wm_high.active_time;
9199                 wm_high.interlaced = false;
9200                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9201                         wm_high.interlaced = true;
9202                 wm_high.vsc = radeon_crtc->vsc;
9203                 wm_high.vtaps = 1;
9204                 if (radeon_crtc->rmx_type != RMX_OFF)
9205                         wm_high.vtaps = 2;
9206                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9207                 wm_high.lb_size = lb_size;
9208                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9209                 wm_high.num_heads = num_heads;
9210
9211                 /* set for high clocks */
9212                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9213
9214                 /* possibly force display priority to high */
9215                 /* should really do this at mode validation time... */
9216                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9217                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9218                     !dce8_check_latency_hiding(&wm_high) ||
9219                     (rdev->disp_priority == 2)) {
9220                         DRM_DEBUG_KMS("force priority to high\n");
9221                 }
9222
9223                 /* watermark for low clocks */
9224                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9225                     rdev->pm.dpm_enabled) {
9226                         wm_low.yclk =
9227                                 radeon_dpm_get_mclk(rdev, true) * 10;
9228                         wm_low.sclk =
9229                                 radeon_dpm_get_sclk(rdev, true) * 10;
9230                 } else {
9231                         wm_low.yclk = rdev->pm.current_mclk * 10;
9232                         wm_low.sclk = rdev->pm.current_sclk * 10;
9233                 }
9234
9235                 wm_low.disp_clk = mode->clock;
9236                 wm_low.src_width = mode->crtc_hdisplay;
9237                 wm_low.active_time = active_time;
9238                 wm_low.blank_time = line_time - wm_low.active_time;
9239                 wm_low.interlaced = false;
9240                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9241                         wm_low.interlaced = true;
9242                 wm_low.vsc = radeon_crtc->vsc;
9243                 wm_low.vtaps = 1;
9244                 if (radeon_crtc->rmx_type != RMX_OFF)
9245                         wm_low.vtaps = 2;
9246                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9247                 wm_low.lb_size = lb_size;
9248                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9249                 wm_low.num_heads = num_heads;
9250
9251                 /* set for low clocks */
9252                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9253
9254                 /* possibly force display priority to high */
9255                 /* should really do this at mode validation time... */
9256                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9257                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9258                     !dce8_check_latency_hiding(&wm_low) ||
9259                     (rdev->disp_priority == 2)) {
9260                         DRM_DEBUG_KMS("force priority to high\n");
9261                 }
9262
9263                 /* Save number of lines the linebuffer leads before the scanout */
9264                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9265         }
9266
9267         /* select wm A */
9268         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9269         tmp = wm_mask;
9270         tmp &= ~LATENCY_WATERMARK_MASK(3);
9271         tmp |= LATENCY_WATERMARK_MASK(1);
9272         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9273         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9274                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9275                 LATENCY_HIGH_WATERMARK(line_time)));
9276         /* select wm B */
9277         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9278         tmp &= ~LATENCY_WATERMARK_MASK(3);
9279         tmp |= LATENCY_WATERMARK_MASK(2);
9280         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9281         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9282                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9283                 LATENCY_HIGH_WATERMARK(line_time)));
9284         /* restore original selection */
9285         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9286
9287         /* save values for DPM */
9288         radeon_crtc->line_time = line_time;
9289         radeon_crtc->wm_high = latency_watermark_a;
9290         radeon_crtc->wm_low = latency_watermark_b;
9291 }
9292
9293 /**
9294  * dce8_bandwidth_update - program display watermarks
9295  *
9296  * @rdev: radeon_device pointer
9297  *
9298  * Calculate and program the display watermarks and line
9299  * buffer allocation (CIK).
9300  */
9301 void dce8_bandwidth_update(struct radeon_device *rdev)
9302 {
9303         struct drm_display_mode *mode = NULL;
9304         u32 num_heads = 0, lb_size;
9305         int i;
9306
9307         if (!rdev->mode_info.mode_config_initialized)
9308                 return;
9309
9310         radeon_update_display_priority(rdev);
9311
9312         for (i = 0; i < rdev->num_crtc; i++) {
9313                 if (rdev->mode_info.crtcs[i]->base.enabled)
9314                         num_heads++;
9315         }
9316         for (i = 0; i < rdev->num_crtc; i++) {
9317                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9318                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9319                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9320         }
9321 }
9322
9323 /**
9324  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9325  *
9326  * @rdev: radeon_device pointer
9327  *
9328  * Fetches a GPU clock counter snapshot (SI).
9329  * Returns the 64 bit clock counter snapshot.
9330  */
9331 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9332 {
9333         uint64_t clock;
9334
9335         mutex_lock(&rdev->gpu_clock_mutex);
9336         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9337         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9338                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9339         mutex_unlock(&rdev->gpu_clock_mutex);
9340         return clock;
9341 }
9342
9343 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9344                              u32 cntl_reg, u32 status_reg)
9345 {
9346         int r, i;
9347         struct atom_clock_dividers dividers;
9348         uint32_t tmp;
9349
9350         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9351                                            clock, false, &dividers);
9352         if (r)
9353                 return r;
9354
9355         tmp = RREG32_SMC(cntl_reg);
9356         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9357         tmp |= dividers.post_divider;
9358         WREG32_SMC(cntl_reg, tmp);
9359
9360         for (i = 0; i < 100; i++) {
9361                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9362                         break;
9363                 mdelay(10);
9364         }
9365         if (i == 100)
9366                 return -ETIMEDOUT;
9367
9368         return 0;
9369 }
9370
9371 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9372 {
9373         int r = 0;
9374
9375         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9376         if (r)
9377                 return r;
9378
9379         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9380         return r;
9381 }
9382
9383 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9384 {
9385         int r, i;
9386         struct atom_clock_dividers dividers;
9387         u32 tmp;
9388
9389         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9390                                            ecclk, false, &dividers);
9391         if (r)
9392                 return r;
9393
9394         for (i = 0; i < 100; i++) {
9395                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9396                         break;
9397                 mdelay(10);
9398         }
9399         if (i == 100)
9400                 return -ETIMEDOUT;
9401
9402         tmp = RREG32_SMC(CG_ECLK_CNTL);
9403         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9404         tmp |= dividers.post_divider;
9405         WREG32_SMC(CG_ECLK_CNTL, tmp);
9406
9407         for (i = 0; i < 100; i++) {
9408                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9409                         break;
9410                 mdelay(10);
9411         }
9412         if (i == 100)
9413                 return -ETIMEDOUT;
9414
9415         return 0;
9416 }
9417
9418 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9419 {
9420         struct pci_dev *root = rdev->pdev->bus->self;
9421         enum pci_bus_speed speed_cap;
9422         int bridge_pos, gpu_pos;
9423         u32 speed_cntl, current_data_rate;
9424         int i;
9425         u16 tmp16;
9426
9427         if (pci_is_root_bus(rdev->pdev->bus))
9428                 return;
9429
9430         if (radeon_pcie_gen2 == 0)
9431                 return;
9432
9433         if (rdev->flags & RADEON_IS_IGP)
9434                 return;
9435
9436         if (!(rdev->flags & RADEON_IS_PCIE))
9437                 return;
9438
9439         speed_cap = pcie_get_speed_cap(root);
9440         if (speed_cap == PCI_SPEED_UNKNOWN)
9441                 return;
9442
9443         if ((speed_cap != PCIE_SPEED_8_0GT) &&
9444             (speed_cap != PCIE_SPEED_5_0GT))
9445                 return;
9446
9447         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9448         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9449                 LC_CURRENT_DATA_RATE_SHIFT;
9450         if (speed_cap == PCIE_SPEED_8_0GT) {
9451                 if (current_data_rate == 2) {
9452                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9453                         return;
9454                 }
9455                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9456         } else if (speed_cap == PCIE_SPEED_5_0GT) {
9457                 if (current_data_rate == 1) {
9458                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9459                         return;
9460                 }
9461                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9462         }
9463
9464         bridge_pos = pci_pcie_cap(root);
9465         if (!bridge_pos)
9466                 return;
9467
9468         gpu_pos = pci_pcie_cap(rdev->pdev);
9469         if (!gpu_pos)
9470                 return;
9471
9472         if (speed_cap == PCIE_SPEED_8_0GT) {
9473                 /* re-try equalization if gen3 is not already enabled */
9474                 if (current_data_rate != 2) {
9475                         u16 bridge_cfg, gpu_cfg;
9476                         u16 bridge_cfg2, gpu_cfg2;
9477                         u32 max_lw, current_lw, tmp;
9478
9479                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9480                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9481
9482                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9483                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9484
9485                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9486                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9487
9488                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9489                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9490                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9491
9492                         if (current_lw < max_lw) {
9493                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9494                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9495                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9496                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9497                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9498                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9499                                 }
9500                         }
9501
9502                         for (i = 0; i < 10; i++) {
9503                                 /* check status */
9504                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9505                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9506                                         break;
9507
9508                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9509                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9510
9511                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9512                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9513
9514                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9515                                 tmp |= LC_SET_QUIESCE;
9516                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9517
9518                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9519                                 tmp |= LC_REDO_EQ;
9520                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9521
9522                                 mdelay(100);
9523
9524                                 /* linkctl */
9525                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9526                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9527                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9528                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9529
9530                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9531                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9532                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9533                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9534
9535                                 /* linkctl2 */
9536                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9537                                 tmp16 &= ~((1 << 4) | (7 << 9));
9538                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9539                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9540
9541                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9542                                 tmp16 &= ~((1 << 4) | (7 << 9));
9543                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9544                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9545
9546                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9547                                 tmp &= ~LC_SET_QUIESCE;
9548                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9549                         }
9550                 }
9551         }
9552
9553         /* set the link speed */
9554         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9555         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9556         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9557
9558         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9559         tmp16 &= ~0xf;
9560         if (speed_cap == PCIE_SPEED_8_0GT)
9561                 tmp16 |= 3; /* gen3 */
9562         else if (speed_cap == PCIE_SPEED_5_0GT)
9563                 tmp16 |= 2; /* gen2 */
9564         else
9565                 tmp16 |= 1; /* gen1 */
9566         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9567
9568         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9569         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9570         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9571
9572         for (i = 0; i < rdev->usec_timeout; i++) {
9573                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9574                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9575                         break;
9576                 udelay(1);
9577         }
9578 }
9579
9580 static void cik_program_aspm(struct radeon_device *rdev)
9581 {
9582         u32 data, orig;
9583         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9584         bool disable_clkreq = false;
9585
9586         if (radeon_aspm == 0)
9587                 return;
9588
9589         /* XXX double check IGPs */
9590         if (rdev->flags & RADEON_IS_IGP)
9591                 return;
9592
9593         if (!(rdev->flags & RADEON_IS_PCIE))
9594                 return;
9595
9596         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9597         data &= ~LC_XMIT_N_FTS_MASK;
9598         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9599         if (orig != data)
9600                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9601
9602         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9603         data |= LC_GO_TO_RECOVERY;
9604         if (orig != data)
9605                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9606
9607         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9608         data |= P_IGNORE_EDB_ERR;
9609         if (orig != data)
9610                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9611
9612         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9613         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9614         data |= LC_PMI_TO_L1_DIS;
9615         if (!disable_l0s)
9616                 data |= LC_L0S_INACTIVITY(7);
9617
9618         if (!disable_l1) {
9619                 data |= LC_L1_INACTIVITY(7);
9620                 data &= ~LC_PMI_TO_L1_DIS;
9621                 if (orig != data)
9622                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9623
9624                 if (!disable_plloff_in_l1) {
9625                         bool clk_req_support;
9626
9627                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9628                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9629                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9630                         if (orig != data)
9631                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9632
9633                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9634                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9635                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9636                         if (orig != data)
9637                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9638
9639                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9640                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9641                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9642                         if (orig != data)
9643                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9644
9645                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9646                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9647                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9648                         if (orig != data)
9649                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9650
9651                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9652                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9653                         data |= LC_DYN_LANES_PWR_STATE(3);
9654                         if (orig != data)
9655                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9656
9657                         if (!disable_clkreq &&
9658                             !pci_is_root_bus(rdev->pdev->bus)) {
9659                                 struct pci_dev *root = rdev->pdev->bus->self;
9660                                 u32 lnkcap;
9661
9662                                 clk_req_support = false;
9663                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9664                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9665                                         clk_req_support = true;
9666                         } else {
9667                                 clk_req_support = false;
9668                         }
9669
9670                         if (clk_req_support) {
9671                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9672                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9673                                 if (orig != data)
9674                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9675
9676                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9677                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9678                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9679                                 if (orig != data)
9680                                         WREG32_SMC(THM_CLK_CNTL, data);
9681
9682                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9683                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9684                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9685                                 if (orig != data)
9686                                         WREG32_SMC(MISC_CLK_CTRL, data);
9687
9688                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9689                                 data &= ~BCLK_AS_XCLK;
9690                                 if (orig != data)
9691                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9692
9693                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9694                                 data &= ~FORCE_BIF_REFCLK_EN;
9695                                 if (orig != data)
9696                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9697
9698                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9699                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9700                                 data |= MPLL_CLKOUT_SEL(4);
9701                                 if (orig != data)
9702                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9703                         }
9704                 }
9705         } else {
9706                 if (orig != data)
9707                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9708         }
9709
9710         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9711         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9712         if (orig != data)
9713                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9714
9715         if (!disable_l0s) {
9716                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9717                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9718                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9719                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9720                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9721                                 data &= ~LC_L0S_INACTIVITY_MASK;
9722                                 if (orig != data)
9723                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9724                         }
9725                 }
9726         }
9727 }