GNU Linux-libre 4.14.257-gnu1
[releases.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 #define SH_MEM_CONFIG_GFX_DEFAULT \
39         ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
40
41 /*(DEBLOBBED)*/
42
43 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
44 extern void r600_ih_ring_fini(struct radeon_device *rdev);
45 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
46 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
47 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
48 extern void sumo_rlc_fini(struct radeon_device *rdev);
49 extern int sumo_rlc_init(struct radeon_device *rdev);
50 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
51 extern void si_rlc_reset(struct radeon_device *rdev);
52 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
53 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
54 extern int cik_sdma_resume(struct radeon_device *rdev);
55 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
56 extern void cik_sdma_fini(struct radeon_device *rdev);
57 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
58 static void cik_rlc_stop(struct radeon_device *rdev);
59 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
60 static void cik_program_aspm(struct radeon_device *rdev);
61 static void cik_init_pg(struct radeon_device *rdev);
62 static void cik_init_cg(struct radeon_device *rdev);
63 static void cik_fini_pg(struct radeon_device *rdev);
64 static void cik_fini_cg(struct radeon_device *rdev);
65 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
66                                           bool enable);
67
68 /**
69  * cik_get_allowed_info_register - fetch the register for the info ioctl
70  *
71  * @rdev: radeon_device pointer
72  * @reg: register offset in bytes
73  * @val: register value
74  *
75  * Returns 0 for success or -EINVAL for an invalid register
76  *
77  */
78 int cik_get_allowed_info_register(struct radeon_device *rdev,
79                                   u32 reg, u32 *val)
80 {
81         switch (reg) {
82         case GRBM_STATUS:
83         case GRBM_STATUS2:
84         case GRBM_STATUS_SE0:
85         case GRBM_STATUS_SE1:
86         case GRBM_STATUS_SE2:
87         case GRBM_STATUS_SE3:
88         case SRBM_STATUS:
89         case SRBM_STATUS2:
90         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
91         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
92         case UVD_STATUS:
93         /* TODO VCE */
94                 *val = RREG32(reg);
95                 return 0;
96         default:
97                 return -EINVAL;
98         }
99 }
100
101 /*
102  * Indirect registers accessor
103  */
104 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
105 {
106         unsigned long flags;
107         u32 r;
108
109         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
110         WREG32(CIK_DIDT_IND_INDEX, (reg));
111         r = RREG32(CIK_DIDT_IND_DATA);
112         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
113         return r;
114 }
115
116 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
117 {
118         unsigned long flags;
119
120         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
121         WREG32(CIK_DIDT_IND_INDEX, (reg));
122         WREG32(CIK_DIDT_IND_DATA, (v));
123         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
124 }
125
126 /* get temperature in millidegrees */
127 int ci_get_temp(struct radeon_device *rdev)
128 {
129         u32 temp;
130         int actual_temp = 0;
131
132         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
133                 CTF_TEMP_SHIFT;
134
135         if (temp & 0x200)
136                 actual_temp = 255;
137         else
138                 actual_temp = temp & 0x1ff;
139
140         actual_temp = actual_temp * 1000;
141
142         return actual_temp;
143 }
144
145 /* get temperature in millidegrees */
146 int kv_get_temp(struct radeon_device *rdev)
147 {
148         u32 temp;
149         int actual_temp = 0;
150
151         temp = RREG32_SMC(0xC0300E0C);
152
153         if (temp)
154                 actual_temp = (temp / 8) - 49;
155         else
156                 actual_temp = 0;
157
158         actual_temp = actual_temp * 1000;
159
160         return actual_temp;
161 }
162
163 /*
164  * Indirect registers accessor
165  */
166 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
167 {
168         unsigned long flags;
169         u32 r;
170
171         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
172         WREG32(PCIE_INDEX, reg);
173         (void)RREG32(PCIE_INDEX);
174         r = RREG32(PCIE_DATA);
175         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
176         return r;
177 }
178
179 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
180 {
181         unsigned long flags;
182
183         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
184         WREG32(PCIE_INDEX, reg);
185         (void)RREG32(PCIE_INDEX);
186         WREG32(PCIE_DATA, v);
187         (void)RREG32(PCIE_DATA);
188         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
189 }
190
191 static const u32 spectre_rlc_save_restore_register_list[] =
192 {
193         (0x0e00 << 16) | (0xc12c >> 2),
194         0x00000000,
195         (0x0e00 << 16) | (0xc140 >> 2),
196         0x00000000,
197         (0x0e00 << 16) | (0xc150 >> 2),
198         0x00000000,
199         (0x0e00 << 16) | (0xc15c >> 2),
200         0x00000000,
201         (0x0e00 << 16) | (0xc168 >> 2),
202         0x00000000,
203         (0x0e00 << 16) | (0xc170 >> 2),
204         0x00000000,
205         (0x0e00 << 16) | (0xc178 >> 2),
206         0x00000000,
207         (0x0e00 << 16) | (0xc204 >> 2),
208         0x00000000,
209         (0x0e00 << 16) | (0xc2b4 >> 2),
210         0x00000000,
211         (0x0e00 << 16) | (0xc2b8 >> 2),
212         0x00000000,
213         (0x0e00 << 16) | (0xc2bc >> 2),
214         0x00000000,
215         (0x0e00 << 16) | (0xc2c0 >> 2),
216         0x00000000,
217         (0x0e00 << 16) | (0x8228 >> 2),
218         0x00000000,
219         (0x0e00 << 16) | (0x829c >> 2),
220         0x00000000,
221         (0x0e00 << 16) | (0x869c >> 2),
222         0x00000000,
223         (0x0600 << 16) | (0x98f4 >> 2),
224         0x00000000,
225         (0x0e00 << 16) | (0x98f8 >> 2),
226         0x00000000,
227         (0x0e00 << 16) | (0x9900 >> 2),
228         0x00000000,
229         (0x0e00 << 16) | (0xc260 >> 2),
230         0x00000000,
231         (0x0e00 << 16) | (0x90e8 >> 2),
232         0x00000000,
233         (0x0e00 << 16) | (0x3c000 >> 2),
234         0x00000000,
235         (0x0e00 << 16) | (0x3c00c >> 2),
236         0x00000000,
237         (0x0e00 << 16) | (0x8c1c >> 2),
238         0x00000000,
239         (0x0e00 << 16) | (0x9700 >> 2),
240         0x00000000,
241         (0x0e00 << 16) | (0xcd20 >> 2),
242         0x00000000,
243         (0x4e00 << 16) | (0xcd20 >> 2),
244         0x00000000,
245         (0x5e00 << 16) | (0xcd20 >> 2),
246         0x00000000,
247         (0x6e00 << 16) | (0xcd20 >> 2),
248         0x00000000,
249         (0x7e00 << 16) | (0xcd20 >> 2),
250         0x00000000,
251         (0x8e00 << 16) | (0xcd20 >> 2),
252         0x00000000,
253         (0x9e00 << 16) | (0xcd20 >> 2),
254         0x00000000,
255         (0xae00 << 16) | (0xcd20 >> 2),
256         0x00000000,
257         (0xbe00 << 16) | (0xcd20 >> 2),
258         0x00000000,
259         (0x0e00 << 16) | (0x89bc >> 2),
260         0x00000000,
261         (0x0e00 << 16) | (0x8900 >> 2),
262         0x00000000,
263         0x3,
264         (0x0e00 << 16) | (0xc130 >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0xc134 >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0xc1fc >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0xc208 >> 2),
271         0x00000000,
272         (0x0e00 << 16) | (0xc264 >> 2),
273         0x00000000,
274         (0x0e00 << 16) | (0xc268 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0xc26c >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0xc270 >> 2),
279         0x00000000,
280         (0x0e00 << 16) | (0xc274 >> 2),
281         0x00000000,
282         (0x0e00 << 16) | (0xc278 >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0xc27c >> 2),
285         0x00000000,
286         (0x0e00 << 16) | (0xc280 >> 2),
287         0x00000000,
288         (0x0e00 << 16) | (0xc284 >> 2),
289         0x00000000,
290         (0x0e00 << 16) | (0xc288 >> 2),
291         0x00000000,
292         (0x0e00 << 16) | (0xc28c >> 2),
293         0x00000000,
294         (0x0e00 << 16) | (0xc290 >> 2),
295         0x00000000,
296         (0x0e00 << 16) | (0xc294 >> 2),
297         0x00000000,
298         (0x0e00 << 16) | (0xc298 >> 2),
299         0x00000000,
300         (0x0e00 << 16) | (0xc29c >> 2),
301         0x00000000,
302         (0x0e00 << 16) | (0xc2a0 >> 2),
303         0x00000000,
304         (0x0e00 << 16) | (0xc2a4 >> 2),
305         0x00000000,
306         (0x0e00 << 16) | (0xc2a8 >> 2),
307         0x00000000,
308         (0x0e00 << 16) | (0xc2ac  >> 2),
309         0x00000000,
310         (0x0e00 << 16) | (0xc2b0 >> 2),
311         0x00000000,
312         (0x0e00 << 16) | (0x301d0 >> 2),
313         0x00000000,
314         (0x0e00 << 16) | (0x30238 >> 2),
315         0x00000000,
316         (0x0e00 << 16) | (0x30250 >> 2),
317         0x00000000,
318         (0x0e00 << 16) | (0x30254 >> 2),
319         0x00000000,
320         (0x0e00 << 16) | (0x30258 >> 2),
321         0x00000000,
322         (0x0e00 << 16) | (0x3025c >> 2),
323         0x00000000,
324         (0x4e00 << 16) | (0xc900 >> 2),
325         0x00000000,
326         (0x5e00 << 16) | (0xc900 >> 2),
327         0x00000000,
328         (0x6e00 << 16) | (0xc900 >> 2),
329         0x00000000,
330         (0x7e00 << 16) | (0xc900 >> 2),
331         0x00000000,
332         (0x8e00 << 16) | (0xc900 >> 2),
333         0x00000000,
334         (0x9e00 << 16) | (0xc900 >> 2),
335         0x00000000,
336         (0xae00 << 16) | (0xc900 >> 2),
337         0x00000000,
338         (0xbe00 << 16) | (0xc900 >> 2),
339         0x00000000,
340         (0x4e00 << 16) | (0xc904 >> 2),
341         0x00000000,
342         (0x5e00 << 16) | (0xc904 >> 2),
343         0x00000000,
344         (0x6e00 << 16) | (0xc904 >> 2),
345         0x00000000,
346         (0x7e00 << 16) | (0xc904 >> 2),
347         0x00000000,
348         (0x8e00 << 16) | (0xc904 >> 2),
349         0x00000000,
350         (0x9e00 << 16) | (0xc904 >> 2),
351         0x00000000,
352         (0xae00 << 16) | (0xc904 >> 2),
353         0x00000000,
354         (0xbe00 << 16) | (0xc904 >> 2),
355         0x00000000,
356         (0x4e00 << 16) | (0xc908 >> 2),
357         0x00000000,
358         (0x5e00 << 16) | (0xc908 >> 2),
359         0x00000000,
360         (0x6e00 << 16) | (0xc908 >> 2),
361         0x00000000,
362         (0x7e00 << 16) | (0xc908 >> 2),
363         0x00000000,
364         (0x8e00 << 16) | (0xc908 >> 2),
365         0x00000000,
366         (0x9e00 << 16) | (0xc908 >> 2),
367         0x00000000,
368         (0xae00 << 16) | (0xc908 >> 2),
369         0x00000000,
370         (0xbe00 << 16) | (0xc908 >> 2),
371         0x00000000,
372         (0x4e00 << 16) | (0xc90c >> 2),
373         0x00000000,
374         (0x5e00 << 16) | (0xc90c >> 2),
375         0x00000000,
376         (0x6e00 << 16) | (0xc90c >> 2),
377         0x00000000,
378         (0x7e00 << 16) | (0xc90c >> 2),
379         0x00000000,
380         (0x8e00 << 16) | (0xc90c >> 2),
381         0x00000000,
382         (0x9e00 << 16) | (0xc90c >> 2),
383         0x00000000,
384         (0xae00 << 16) | (0xc90c >> 2),
385         0x00000000,
386         (0xbe00 << 16) | (0xc90c >> 2),
387         0x00000000,
388         (0x4e00 << 16) | (0xc910 >> 2),
389         0x00000000,
390         (0x5e00 << 16) | (0xc910 >> 2),
391         0x00000000,
392         (0x6e00 << 16) | (0xc910 >> 2),
393         0x00000000,
394         (0x7e00 << 16) | (0xc910 >> 2),
395         0x00000000,
396         (0x8e00 << 16) | (0xc910 >> 2),
397         0x00000000,
398         (0x9e00 << 16) | (0xc910 >> 2),
399         0x00000000,
400         (0xae00 << 16) | (0xc910 >> 2),
401         0x00000000,
402         (0xbe00 << 16) | (0xc910 >> 2),
403         0x00000000,
404         (0x0e00 << 16) | (0xc99c >> 2),
405         0x00000000,
406         (0x0e00 << 16) | (0x9834 >> 2),
407         0x00000000,
408         (0x0000 << 16) | (0x30f00 >> 2),
409         0x00000000,
410         (0x0001 << 16) | (0x30f00 >> 2),
411         0x00000000,
412         (0x0000 << 16) | (0x30f04 >> 2),
413         0x00000000,
414         (0x0001 << 16) | (0x30f04 >> 2),
415         0x00000000,
416         (0x0000 << 16) | (0x30f08 >> 2),
417         0x00000000,
418         (0x0001 << 16) | (0x30f08 >> 2),
419         0x00000000,
420         (0x0000 << 16) | (0x30f0c >> 2),
421         0x00000000,
422         (0x0001 << 16) | (0x30f0c >> 2),
423         0x00000000,
424         (0x0600 << 16) | (0x9b7c >> 2),
425         0x00000000,
426         (0x0e00 << 16) | (0x8a14 >> 2),
427         0x00000000,
428         (0x0e00 << 16) | (0x8a18 >> 2),
429         0x00000000,
430         (0x0600 << 16) | (0x30a00 >> 2),
431         0x00000000,
432         (0x0e00 << 16) | (0x8bf0 >> 2),
433         0x00000000,
434         (0x0e00 << 16) | (0x8bcc >> 2),
435         0x00000000,
436         (0x0e00 << 16) | (0x8b24 >> 2),
437         0x00000000,
438         (0x0e00 << 16) | (0x30a04 >> 2),
439         0x00000000,
440         (0x0600 << 16) | (0x30a10 >> 2),
441         0x00000000,
442         (0x0600 << 16) | (0x30a14 >> 2),
443         0x00000000,
444         (0x0600 << 16) | (0x30a18 >> 2),
445         0x00000000,
446         (0x0600 << 16) | (0x30a2c >> 2),
447         0x00000000,
448         (0x0e00 << 16) | (0xc700 >> 2),
449         0x00000000,
450         (0x0e00 << 16) | (0xc704 >> 2),
451         0x00000000,
452         (0x0e00 << 16) | (0xc708 >> 2),
453         0x00000000,
454         (0x0e00 << 16) | (0xc768 >> 2),
455         0x00000000,
456         (0x0400 << 16) | (0xc770 >> 2),
457         0x00000000,
458         (0x0400 << 16) | (0xc774 >> 2),
459         0x00000000,
460         (0x0400 << 16) | (0xc778 >> 2),
461         0x00000000,
462         (0x0400 << 16) | (0xc77c >> 2),
463         0x00000000,
464         (0x0400 << 16) | (0xc780 >> 2),
465         0x00000000,
466         (0x0400 << 16) | (0xc784 >> 2),
467         0x00000000,
468         (0x0400 << 16) | (0xc788 >> 2),
469         0x00000000,
470         (0x0400 << 16) | (0xc78c >> 2),
471         0x00000000,
472         (0x0400 << 16) | (0xc798 >> 2),
473         0x00000000,
474         (0x0400 << 16) | (0xc79c >> 2),
475         0x00000000,
476         (0x0400 << 16) | (0xc7a0 >> 2),
477         0x00000000,
478         (0x0400 << 16) | (0xc7a4 >> 2),
479         0x00000000,
480         (0x0400 << 16) | (0xc7a8 >> 2),
481         0x00000000,
482         (0x0400 << 16) | (0xc7ac >> 2),
483         0x00000000,
484         (0x0400 << 16) | (0xc7b0 >> 2),
485         0x00000000,
486         (0x0400 << 16) | (0xc7b4 >> 2),
487         0x00000000,
488         (0x0e00 << 16) | (0x9100 >> 2),
489         0x00000000,
490         (0x0e00 << 16) | (0x3c010 >> 2),
491         0x00000000,
492         (0x0e00 << 16) | (0x92a8 >> 2),
493         0x00000000,
494         (0x0e00 << 16) | (0x92ac >> 2),
495         0x00000000,
496         (0x0e00 << 16) | (0x92b4 >> 2),
497         0x00000000,
498         (0x0e00 << 16) | (0x92b8 >> 2),
499         0x00000000,
500         (0x0e00 << 16) | (0x92bc >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0x92c0 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0x92c4 >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0x92c8 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0x92cc >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0x92d0 >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0x8c00 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x8c04 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0x8c20 >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0x8c38 >> 2),
519         0x00000000,
520         (0x0e00 << 16) | (0x8c3c >> 2),
521         0x00000000,
522         (0x0e00 << 16) | (0xae00 >> 2),
523         0x00000000,
524         (0x0e00 << 16) | (0x9604 >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0xac08 >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0xac0c >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0xac10 >> 2),
531         0x00000000,
532         (0x0e00 << 16) | (0xac14 >> 2),
533         0x00000000,
534         (0x0e00 << 16) | (0xac58 >> 2),
535         0x00000000,
536         (0x0e00 << 16) | (0xac68 >> 2),
537         0x00000000,
538         (0x0e00 << 16) | (0xac6c >> 2),
539         0x00000000,
540         (0x0e00 << 16) | (0xac70 >> 2),
541         0x00000000,
542         (0x0e00 << 16) | (0xac74 >> 2),
543         0x00000000,
544         (0x0e00 << 16) | (0xac78 >> 2),
545         0x00000000,
546         (0x0e00 << 16) | (0xac7c >> 2),
547         0x00000000,
548         (0x0e00 << 16) | (0xac80 >> 2),
549         0x00000000,
550         (0x0e00 << 16) | (0xac84 >> 2),
551         0x00000000,
552         (0x0e00 << 16) | (0xac88 >> 2),
553         0x00000000,
554         (0x0e00 << 16) | (0xac8c >> 2),
555         0x00000000,
556         (0x0e00 << 16) | (0x970c >> 2),
557         0x00000000,
558         (0x0e00 << 16) | (0x9714 >> 2),
559         0x00000000,
560         (0x0e00 << 16) | (0x9718 >> 2),
561         0x00000000,
562         (0x0e00 << 16) | (0x971c >> 2),
563         0x00000000,
564         (0x0e00 << 16) | (0x31068 >> 2),
565         0x00000000,
566         (0x4e00 << 16) | (0x31068 >> 2),
567         0x00000000,
568         (0x5e00 << 16) | (0x31068 >> 2),
569         0x00000000,
570         (0x6e00 << 16) | (0x31068 >> 2),
571         0x00000000,
572         (0x7e00 << 16) | (0x31068 >> 2),
573         0x00000000,
574         (0x8e00 << 16) | (0x31068 >> 2),
575         0x00000000,
576         (0x9e00 << 16) | (0x31068 >> 2),
577         0x00000000,
578         (0xae00 << 16) | (0x31068 >> 2),
579         0x00000000,
580         (0xbe00 << 16) | (0x31068 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0xcd10 >> 2),
583         0x00000000,
584         (0x0e00 << 16) | (0xcd14 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x88b0 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x88b4 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x88b8 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0x88bc >> 2),
593         0x00000000,
594         (0x0400 << 16) | (0x89c0 >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0x88c4 >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0x88c8 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0x88d0 >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0x88d4 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0x88d8 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0x8980 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0x30938 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0x3093c >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0x30940 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0x89a0 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0x30900 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0x30904 >> 2),
619         0x00000000,
620         (0x0e00 << 16) | (0x89b4 >> 2),
621         0x00000000,
622         (0x0e00 << 16) | (0x3c210 >> 2),
623         0x00000000,
624         (0x0e00 << 16) | (0x3c214 >> 2),
625         0x00000000,
626         (0x0e00 << 16) | (0x3c218 >> 2),
627         0x00000000,
628         (0x0e00 << 16) | (0x8904 >> 2),
629         0x00000000,
630         0x5,
631         (0x0e00 << 16) | (0x8c28 >> 2),
632         (0x0e00 << 16) | (0x8c2c >> 2),
633         (0x0e00 << 16) | (0x8c30 >> 2),
634         (0x0e00 << 16) | (0x8c34 >> 2),
635         (0x0e00 << 16) | (0x9600 >> 2),
636 };
637
638 static const u32 kalindi_rlc_save_restore_register_list[] =
639 {
640         (0x0e00 << 16) | (0xc12c >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0xc140 >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0xc150 >> 2),
645         0x00000000,
646         (0x0e00 << 16) | (0xc15c >> 2),
647         0x00000000,
648         (0x0e00 << 16) | (0xc168 >> 2),
649         0x00000000,
650         (0x0e00 << 16) | (0xc170 >> 2),
651         0x00000000,
652         (0x0e00 << 16) | (0xc204 >> 2),
653         0x00000000,
654         (0x0e00 << 16) | (0xc2b4 >> 2),
655         0x00000000,
656         (0x0e00 << 16) | (0xc2b8 >> 2),
657         0x00000000,
658         (0x0e00 << 16) | (0xc2bc >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0xc2c0 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0x8228 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0x829c >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0x869c >> 2),
667         0x00000000,
668         (0x0600 << 16) | (0x98f4 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0x98f8 >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0x9900 >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0xc260 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0x90e8 >> 2),
677         0x00000000,
678         (0x0e00 << 16) | (0x3c000 >> 2),
679         0x00000000,
680         (0x0e00 << 16) | (0x3c00c >> 2),
681         0x00000000,
682         (0x0e00 << 16) | (0x8c1c >> 2),
683         0x00000000,
684         (0x0e00 << 16) | (0x9700 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0xcd20 >> 2),
687         0x00000000,
688         (0x4e00 << 16) | (0xcd20 >> 2),
689         0x00000000,
690         (0x5e00 << 16) | (0xcd20 >> 2),
691         0x00000000,
692         (0x6e00 << 16) | (0xcd20 >> 2),
693         0x00000000,
694         (0x7e00 << 16) | (0xcd20 >> 2),
695         0x00000000,
696         (0x0e00 << 16) | (0x89bc >> 2),
697         0x00000000,
698         (0x0e00 << 16) | (0x8900 >> 2),
699         0x00000000,
700         0x3,
701         (0x0e00 << 16) | (0xc130 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0xc134 >> 2),
704         0x00000000,
705         (0x0e00 << 16) | (0xc1fc >> 2),
706         0x00000000,
707         (0x0e00 << 16) | (0xc208 >> 2),
708         0x00000000,
709         (0x0e00 << 16) | (0xc264 >> 2),
710         0x00000000,
711         (0x0e00 << 16) | (0xc268 >> 2),
712         0x00000000,
713         (0x0e00 << 16) | (0xc26c >> 2),
714         0x00000000,
715         (0x0e00 << 16) | (0xc270 >> 2),
716         0x00000000,
717         (0x0e00 << 16) | (0xc274 >> 2),
718         0x00000000,
719         (0x0e00 << 16) | (0xc28c >> 2),
720         0x00000000,
721         (0x0e00 << 16) | (0xc290 >> 2),
722         0x00000000,
723         (0x0e00 << 16) | (0xc294 >> 2),
724         0x00000000,
725         (0x0e00 << 16) | (0xc298 >> 2),
726         0x00000000,
727         (0x0e00 << 16) | (0xc2a0 >> 2),
728         0x00000000,
729         (0x0e00 << 16) | (0xc2a4 >> 2),
730         0x00000000,
731         (0x0e00 << 16) | (0xc2a8 >> 2),
732         0x00000000,
733         (0x0e00 << 16) | (0xc2ac >> 2),
734         0x00000000,
735         (0x0e00 << 16) | (0x301d0 >> 2),
736         0x00000000,
737         (0x0e00 << 16) | (0x30238 >> 2),
738         0x00000000,
739         (0x0e00 << 16) | (0x30250 >> 2),
740         0x00000000,
741         (0x0e00 << 16) | (0x30254 >> 2),
742         0x00000000,
743         (0x0e00 << 16) | (0x30258 >> 2),
744         0x00000000,
745         (0x0e00 << 16) | (0x3025c >> 2),
746         0x00000000,
747         (0x4e00 << 16) | (0xc900 >> 2),
748         0x00000000,
749         (0x5e00 << 16) | (0xc900 >> 2),
750         0x00000000,
751         (0x6e00 << 16) | (0xc900 >> 2),
752         0x00000000,
753         (0x7e00 << 16) | (0xc900 >> 2),
754         0x00000000,
755         (0x4e00 << 16) | (0xc904 >> 2),
756         0x00000000,
757         (0x5e00 << 16) | (0xc904 >> 2),
758         0x00000000,
759         (0x6e00 << 16) | (0xc904 >> 2),
760         0x00000000,
761         (0x7e00 << 16) | (0xc904 >> 2),
762         0x00000000,
763         (0x4e00 << 16) | (0xc908 >> 2),
764         0x00000000,
765         (0x5e00 << 16) | (0xc908 >> 2),
766         0x00000000,
767         (0x6e00 << 16) | (0xc908 >> 2),
768         0x00000000,
769         (0x7e00 << 16) | (0xc908 >> 2),
770         0x00000000,
771         (0x4e00 << 16) | (0xc90c >> 2),
772         0x00000000,
773         (0x5e00 << 16) | (0xc90c >> 2),
774         0x00000000,
775         (0x6e00 << 16) | (0xc90c >> 2),
776         0x00000000,
777         (0x7e00 << 16) | (0xc90c >> 2),
778         0x00000000,
779         (0x4e00 << 16) | (0xc910 >> 2),
780         0x00000000,
781         (0x5e00 << 16) | (0xc910 >> 2),
782         0x00000000,
783         (0x6e00 << 16) | (0xc910 >> 2),
784         0x00000000,
785         (0x7e00 << 16) | (0xc910 >> 2),
786         0x00000000,
787         (0x0e00 << 16) | (0xc99c >> 2),
788         0x00000000,
789         (0x0e00 << 16) | (0x9834 >> 2),
790         0x00000000,
791         (0x0000 << 16) | (0x30f00 >> 2),
792         0x00000000,
793         (0x0000 << 16) | (0x30f04 >> 2),
794         0x00000000,
795         (0x0000 << 16) | (0x30f08 >> 2),
796         0x00000000,
797         (0x0000 << 16) | (0x30f0c >> 2),
798         0x00000000,
799         (0x0600 << 16) | (0x9b7c >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0x8a14 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0x8a18 >> 2),
804         0x00000000,
805         (0x0600 << 16) | (0x30a00 >> 2),
806         0x00000000,
807         (0x0e00 << 16) | (0x8bf0 >> 2),
808         0x00000000,
809         (0x0e00 << 16) | (0x8bcc >> 2),
810         0x00000000,
811         (0x0e00 << 16) | (0x8b24 >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0x30a04 >> 2),
814         0x00000000,
815         (0x0600 << 16) | (0x30a10 >> 2),
816         0x00000000,
817         (0x0600 << 16) | (0x30a14 >> 2),
818         0x00000000,
819         (0x0600 << 16) | (0x30a18 >> 2),
820         0x00000000,
821         (0x0600 << 16) | (0x30a2c >> 2),
822         0x00000000,
823         (0x0e00 << 16) | (0xc700 >> 2),
824         0x00000000,
825         (0x0e00 << 16) | (0xc704 >> 2),
826         0x00000000,
827         (0x0e00 << 16) | (0xc708 >> 2),
828         0x00000000,
829         (0x0e00 << 16) | (0xc768 >> 2),
830         0x00000000,
831         (0x0400 << 16) | (0xc770 >> 2),
832         0x00000000,
833         (0x0400 << 16) | (0xc774 >> 2),
834         0x00000000,
835         (0x0400 << 16) | (0xc798 >> 2),
836         0x00000000,
837         (0x0400 << 16) | (0xc79c >> 2),
838         0x00000000,
839         (0x0e00 << 16) | (0x9100 >> 2),
840         0x00000000,
841         (0x0e00 << 16) | (0x3c010 >> 2),
842         0x00000000,
843         (0x0e00 << 16) | (0x8c00 >> 2),
844         0x00000000,
845         (0x0e00 << 16) | (0x8c04 >> 2),
846         0x00000000,
847         (0x0e00 << 16) | (0x8c20 >> 2),
848         0x00000000,
849         (0x0e00 << 16) | (0x8c38 >> 2),
850         0x00000000,
851         (0x0e00 << 16) | (0x8c3c >> 2),
852         0x00000000,
853         (0x0e00 << 16) | (0xae00 >> 2),
854         0x00000000,
855         (0x0e00 << 16) | (0x9604 >> 2),
856         0x00000000,
857         (0x0e00 << 16) | (0xac08 >> 2),
858         0x00000000,
859         (0x0e00 << 16) | (0xac0c >> 2),
860         0x00000000,
861         (0x0e00 << 16) | (0xac10 >> 2),
862         0x00000000,
863         (0x0e00 << 16) | (0xac14 >> 2),
864         0x00000000,
865         (0x0e00 << 16) | (0xac58 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0xac68 >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0xac6c >> 2),
870         0x00000000,
871         (0x0e00 << 16) | (0xac70 >> 2),
872         0x00000000,
873         (0x0e00 << 16) | (0xac74 >> 2),
874         0x00000000,
875         (0x0e00 << 16) | (0xac78 >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0xac7c >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0xac80 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0xac84 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0xac88 >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0xac8c >> 2),
886         0x00000000,
887         (0x0e00 << 16) | (0x970c >> 2),
888         0x00000000,
889         (0x0e00 << 16) | (0x9714 >> 2),
890         0x00000000,
891         (0x0e00 << 16) | (0x9718 >> 2),
892         0x00000000,
893         (0x0e00 << 16) | (0x971c >> 2),
894         0x00000000,
895         (0x0e00 << 16) | (0x31068 >> 2),
896         0x00000000,
897         (0x4e00 << 16) | (0x31068 >> 2),
898         0x00000000,
899         (0x5e00 << 16) | (0x31068 >> 2),
900         0x00000000,
901         (0x6e00 << 16) | (0x31068 >> 2),
902         0x00000000,
903         (0x7e00 << 16) | (0x31068 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0xcd10 >> 2),
906         0x00000000,
907         (0x0e00 << 16) | (0xcd14 >> 2),
908         0x00000000,
909         (0x0e00 << 16) | (0x88b0 >> 2),
910         0x00000000,
911         (0x0e00 << 16) | (0x88b4 >> 2),
912         0x00000000,
913         (0x0e00 << 16) | (0x88b8 >> 2),
914         0x00000000,
915         (0x0e00 << 16) | (0x88bc >> 2),
916         0x00000000,
917         (0x0400 << 16) | (0x89c0 >> 2),
918         0x00000000,
919         (0x0e00 << 16) | (0x88c4 >> 2),
920         0x00000000,
921         (0x0e00 << 16) | (0x88c8 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0x88d0 >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0x88d4 >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0x88d8 >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0x8980 >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0x30938 >> 2),
932         0x00000000,
933         (0x0e00 << 16) | (0x3093c >> 2),
934         0x00000000,
935         (0x0e00 << 16) | (0x30940 >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0x89a0 >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0x30900 >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0x30904 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0x89b4 >> 2),
944         0x00000000,
945         (0x0e00 << 16) | (0x3e1fc >> 2),
946         0x00000000,
947         (0x0e00 << 16) | (0x3c210 >> 2),
948         0x00000000,
949         (0x0e00 << 16) | (0x3c214 >> 2),
950         0x00000000,
951         (0x0e00 << 16) | (0x3c218 >> 2),
952         0x00000000,
953         (0x0e00 << 16) | (0x8904 >> 2),
954         0x00000000,
955         0x5,
956         (0x0e00 << 16) | (0x8c28 >> 2),
957         (0x0e00 << 16) | (0x8c2c >> 2),
958         (0x0e00 << 16) | (0x8c30 >> 2),
959         (0x0e00 << 16) | (0x8c34 >> 2),
960         (0x0e00 << 16) | (0x9600 >> 2),
961 };
962
963 static const u32 bonaire_golden_spm_registers[] =
964 {
965         0x30800, 0xe0ffffff, 0xe0000000
966 };
967
968 static const u32 bonaire_golden_common_registers[] =
969 {
970         0xc770, 0xffffffff, 0x00000800,
971         0xc774, 0xffffffff, 0x00000800,
972         0xc798, 0xffffffff, 0x00007fbf,
973         0xc79c, 0xffffffff, 0x00007faf
974 };
975
976 static const u32 bonaire_golden_registers[] =
977 {
978         0x3354, 0x00000333, 0x00000333,
979         0x3350, 0x000c0fc0, 0x00040200,
980         0x9a10, 0x00010000, 0x00058208,
981         0x3c000, 0xffff1fff, 0x00140000,
982         0x3c200, 0xfdfc0fff, 0x00000100,
983         0x3c234, 0x40000000, 0x40000200,
984         0x9830, 0xffffffff, 0x00000000,
985         0x9834, 0xf00fffff, 0x00000400,
986         0x9838, 0x0002021c, 0x00020200,
987         0xc78, 0x00000080, 0x00000000,
988         0x5bb0, 0x000000f0, 0x00000070,
989         0x5bc0, 0xf0311fff, 0x80300000,
990         0x98f8, 0x73773777, 0x12010001,
991         0x350c, 0x00810000, 0x408af000,
992         0x7030, 0x31000111, 0x00000011,
993         0x2f48, 0x73773777, 0x12010001,
994         0x220c, 0x00007fb6, 0x0021a1b1,
995         0x2210, 0x00007fb6, 0x002021b1,
996         0x2180, 0x00007fb6, 0x00002191,
997         0x2218, 0x00007fb6, 0x002121b1,
998         0x221c, 0x00007fb6, 0x002021b1,
999         0x21dc, 0x00007fb6, 0x00002191,
1000         0x21e0, 0x00007fb6, 0x00002191,
1001         0x3628, 0x0000003f, 0x0000000a,
1002         0x362c, 0x0000003f, 0x0000000a,
1003         0x2ae4, 0x00073ffe, 0x000022a2,
1004         0x240c, 0x000007ff, 0x00000000,
1005         0x8a14, 0xf000003f, 0x00000007,
1006         0x8bf0, 0x00002001, 0x00000001,
1007         0x8b24, 0xffffffff, 0x00ffffff,
1008         0x30a04, 0x0000ff0f, 0x00000000,
1009         0x28a4c, 0x07ffffff, 0x06000000,
1010         0x4d8, 0x00000fff, 0x00000100,
1011         0x3e78, 0x00000001, 0x00000002,
1012         0x9100, 0x03000000, 0x0362c688,
1013         0x8c00, 0x000000ff, 0x00000001,
1014         0xe40, 0x00001fff, 0x00001fff,
1015         0x9060, 0x0000007f, 0x00000020,
1016         0x9508, 0x00010000, 0x00010000,
1017         0xac14, 0x000003ff, 0x000000f3,
1018         0xac0c, 0xffffffff, 0x00001032
1019 };
1020
1021 static const u32 bonaire_mgcg_cgcg_init[] =
1022 {
1023         0xc420, 0xffffffff, 0xfffffffc,
1024         0x30800, 0xffffffff, 0xe0000000,
1025         0x3c2a0, 0xffffffff, 0x00000100,
1026         0x3c208, 0xffffffff, 0x00000100,
1027         0x3c2c0, 0xffffffff, 0xc0000100,
1028         0x3c2c8, 0xffffffff, 0xc0000100,
1029         0x3c2c4, 0xffffffff, 0xc0000100,
1030         0x55e4, 0xffffffff, 0x00600100,
1031         0x3c280, 0xffffffff, 0x00000100,
1032         0x3c214, 0xffffffff, 0x06000100,
1033         0x3c220, 0xffffffff, 0x00000100,
1034         0x3c218, 0xffffffff, 0x06000100,
1035         0x3c204, 0xffffffff, 0x00000100,
1036         0x3c2e0, 0xffffffff, 0x00000100,
1037         0x3c224, 0xffffffff, 0x00000100,
1038         0x3c200, 0xffffffff, 0x00000100,
1039         0x3c230, 0xffffffff, 0x00000100,
1040         0x3c234, 0xffffffff, 0x00000100,
1041         0x3c250, 0xffffffff, 0x00000100,
1042         0x3c254, 0xffffffff, 0x00000100,
1043         0x3c258, 0xffffffff, 0x00000100,
1044         0x3c25c, 0xffffffff, 0x00000100,
1045         0x3c260, 0xffffffff, 0x00000100,
1046         0x3c27c, 0xffffffff, 0x00000100,
1047         0x3c278, 0xffffffff, 0x00000100,
1048         0x3c210, 0xffffffff, 0x06000100,
1049         0x3c290, 0xffffffff, 0x00000100,
1050         0x3c274, 0xffffffff, 0x00000100,
1051         0x3c2b4, 0xffffffff, 0x00000100,
1052         0x3c2b0, 0xffffffff, 0x00000100,
1053         0x3c270, 0xffffffff, 0x00000100,
1054         0x30800, 0xffffffff, 0xe0000000,
1055         0x3c020, 0xffffffff, 0x00010000,
1056         0x3c024, 0xffffffff, 0x00030002,
1057         0x3c028, 0xffffffff, 0x00040007,
1058         0x3c02c, 0xffffffff, 0x00060005,
1059         0x3c030, 0xffffffff, 0x00090008,
1060         0x3c034, 0xffffffff, 0x00010000,
1061         0x3c038, 0xffffffff, 0x00030002,
1062         0x3c03c, 0xffffffff, 0x00040007,
1063         0x3c040, 0xffffffff, 0x00060005,
1064         0x3c044, 0xffffffff, 0x00090008,
1065         0x3c048, 0xffffffff, 0x00010000,
1066         0x3c04c, 0xffffffff, 0x00030002,
1067         0x3c050, 0xffffffff, 0x00040007,
1068         0x3c054, 0xffffffff, 0x00060005,
1069         0x3c058, 0xffffffff, 0x00090008,
1070         0x3c05c, 0xffffffff, 0x00010000,
1071         0x3c060, 0xffffffff, 0x00030002,
1072         0x3c064, 0xffffffff, 0x00040007,
1073         0x3c068, 0xffffffff, 0x00060005,
1074         0x3c06c, 0xffffffff, 0x00090008,
1075         0x3c070, 0xffffffff, 0x00010000,
1076         0x3c074, 0xffffffff, 0x00030002,
1077         0x3c078, 0xffffffff, 0x00040007,
1078         0x3c07c, 0xffffffff, 0x00060005,
1079         0x3c080, 0xffffffff, 0x00090008,
1080         0x3c084, 0xffffffff, 0x00010000,
1081         0x3c088, 0xffffffff, 0x00030002,
1082         0x3c08c, 0xffffffff, 0x00040007,
1083         0x3c090, 0xffffffff, 0x00060005,
1084         0x3c094, 0xffffffff, 0x00090008,
1085         0x3c098, 0xffffffff, 0x00010000,
1086         0x3c09c, 0xffffffff, 0x00030002,
1087         0x3c0a0, 0xffffffff, 0x00040007,
1088         0x3c0a4, 0xffffffff, 0x00060005,
1089         0x3c0a8, 0xffffffff, 0x00090008,
1090         0x3c000, 0xffffffff, 0x96e00200,
1091         0x8708, 0xffffffff, 0x00900100,
1092         0xc424, 0xffffffff, 0x0020003f,
1093         0x38, 0xffffffff, 0x0140001c,
1094         0x3c, 0x000f0000, 0x000f0000,
1095         0x220, 0xffffffff, 0xC060000C,
1096         0x224, 0xc0000fff, 0x00000100,
1097         0xf90, 0xffffffff, 0x00000100,
1098         0xf98, 0x00000101, 0x00000000,
1099         0x20a8, 0xffffffff, 0x00000104,
1100         0x55e4, 0xff000fff, 0x00000100,
1101         0x30cc, 0xc0000fff, 0x00000104,
1102         0xc1e4, 0x00000001, 0x00000001,
1103         0xd00c, 0xff000ff0, 0x00000100,
1104         0xd80c, 0xff000ff0, 0x00000100
1105 };
1106
1107 static const u32 spectre_golden_spm_registers[] =
1108 {
1109         0x30800, 0xe0ffffff, 0xe0000000
1110 };
1111
1112 static const u32 spectre_golden_common_registers[] =
1113 {
1114         0xc770, 0xffffffff, 0x00000800,
1115         0xc774, 0xffffffff, 0x00000800,
1116         0xc798, 0xffffffff, 0x00007fbf,
1117         0xc79c, 0xffffffff, 0x00007faf
1118 };
1119
1120 static const u32 spectre_golden_registers[] =
1121 {
1122         0x3c000, 0xffff1fff, 0x96940200,
1123         0x3c00c, 0xffff0001, 0xff000000,
1124         0x3c200, 0xfffc0fff, 0x00000100,
1125         0x6ed8, 0x00010101, 0x00010000,
1126         0x9834, 0xf00fffff, 0x00000400,
1127         0x9838, 0xfffffffc, 0x00020200,
1128         0x5bb0, 0x000000f0, 0x00000070,
1129         0x5bc0, 0xf0311fff, 0x80300000,
1130         0x98f8, 0x73773777, 0x12010001,
1131         0x9b7c, 0x00ff0000, 0x00fc0000,
1132         0x2f48, 0x73773777, 0x12010001,
1133         0x8a14, 0xf000003f, 0x00000007,
1134         0x8b24, 0xffffffff, 0x00ffffff,
1135         0x28350, 0x3f3f3fff, 0x00000082,
1136         0x28354, 0x0000003f, 0x00000000,
1137         0x3e78, 0x00000001, 0x00000002,
1138         0x913c, 0xffff03df, 0x00000004,
1139         0xc768, 0x00000008, 0x00000008,
1140         0x8c00, 0x000008ff, 0x00000800,
1141         0x9508, 0x00010000, 0x00010000,
1142         0xac0c, 0xffffffff, 0x54763210,
1143         0x214f8, 0x01ff01ff, 0x00000002,
1144         0x21498, 0x007ff800, 0x00200000,
1145         0x2015c, 0xffffffff, 0x00000f40,
1146         0x30934, 0xffffffff, 0x00000001
1147 };
1148
1149 static const u32 spectre_mgcg_cgcg_init[] =
1150 {
1151         0xc420, 0xffffffff, 0xfffffffc,
1152         0x30800, 0xffffffff, 0xe0000000,
1153         0x3c2a0, 0xffffffff, 0x00000100,
1154         0x3c208, 0xffffffff, 0x00000100,
1155         0x3c2c0, 0xffffffff, 0x00000100,
1156         0x3c2c8, 0xffffffff, 0x00000100,
1157         0x3c2c4, 0xffffffff, 0x00000100,
1158         0x55e4, 0xffffffff, 0x00600100,
1159         0x3c280, 0xffffffff, 0x00000100,
1160         0x3c214, 0xffffffff, 0x06000100,
1161         0x3c220, 0xffffffff, 0x00000100,
1162         0x3c218, 0xffffffff, 0x06000100,
1163         0x3c204, 0xffffffff, 0x00000100,
1164         0x3c2e0, 0xffffffff, 0x00000100,
1165         0x3c224, 0xffffffff, 0x00000100,
1166         0x3c200, 0xffffffff, 0x00000100,
1167         0x3c230, 0xffffffff, 0x00000100,
1168         0x3c234, 0xffffffff, 0x00000100,
1169         0x3c250, 0xffffffff, 0x00000100,
1170         0x3c254, 0xffffffff, 0x00000100,
1171         0x3c258, 0xffffffff, 0x00000100,
1172         0x3c25c, 0xffffffff, 0x00000100,
1173         0x3c260, 0xffffffff, 0x00000100,
1174         0x3c27c, 0xffffffff, 0x00000100,
1175         0x3c278, 0xffffffff, 0x00000100,
1176         0x3c210, 0xffffffff, 0x06000100,
1177         0x3c290, 0xffffffff, 0x00000100,
1178         0x3c274, 0xffffffff, 0x00000100,
1179         0x3c2b4, 0xffffffff, 0x00000100,
1180         0x3c2b0, 0xffffffff, 0x00000100,
1181         0x3c270, 0xffffffff, 0x00000100,
1182         0x30800, 0xffffffff, 0xe0000000,
1183         0x3c020, 0xffffffff, 0x00010000,
1184         0x3c024, 0xffffffff, 0x00030002,
1185         0x3c028, 0xffffffff, 0x00040007,
1186         0x3c02c, 0xffffffff, 0x00060005,
1187         0x3c030, 0xffffffff, 0x00090008,
1188         0x3c034, 0xffffffff, 0x00010000,
1189         0x3c038, 0xffffffff, 0x00030002,
1190         0x3c03c, 0xffffffff, 0x00040007,
1191         0x3c040, 0xffffffff, 0x00060005,
1192         0x3c044, 0xffffffff, 0x00090008,
1193         0x3c048, 0xffffffff, 0x00010000,
1194         0x3c04c, 0xffffffff, 0x00030002,
1195         0x3c050, 0xffffffff, 0x00040007,
1196         0x3c054, 0xffffffff, 0x00060005,
1197         0x3c058, 0xffffffff, 0x00090008,
1198         0x3c05c, 0xffffffff, 0x00010000,
1199         0x3c060, 0xffffffff, 0x00030002,
1200         0x3c064, 0xffffffff, 0x00040007,
1201         0x3c068, 0xffffffff, 0x00060005,
1202         0x3c06c, 0xffffffff, 0x00090008,
1203         0x3c070, 0xffffffff, 0x00010000,
1204         0x3c074, 0xffffffff, 0x00030002,
1205         0x3c078, 0xffffffff, 0x00040007,
1206         0x3c07c, 0xffffffff, 0x00060005,
1207         0x3c080, 0xffffffff, 0x00090008,
1208         0x3c084, 0xffffffff, 0x00010000,
1209         0x3c088, 0xffffffff, 0x00030002,
1210         0x3c08c, 0xffffffff, 0x00040007,
1211         0x3c090, 0xffffffff, 0x00060005,
1212         0x3c094, 0xffffffff, 0x00090008,
1213         0x3c098, 0xffffffff, 0x00010000,
1214         0x3c09c, 0xffffffff, 0x00030002,
1215         0x3c0a0, 0xffffffff, 0x00040007,
1216         0x3c0a4, 0xffffffff, 0x00060005,
1217         0x3c0a8, 0xffffffff, 0x00090008,
1218         0x3c0ac, 0xffffffff, 0x00010000,
1219         0x3c0b0, 0xffffffff, 0x00030002,
1220         0x3c0b4, 0xffffffff, 0x00040007,
1221         0x3c0b8, 0xffffffff, 0x00060005,
1222         0x3c0bc, 0xffffffff, 0x00090008,
1223         0x3c000, 0xffffffff, 0x96e00200,
1224         0x8708, 0xffffffff, 0x00900100,
1225         0xc424, 0xffffffff, 0x0020003f,
1226         0x38, 0xffffffff, 0x0140001c,
1227         0x3c, 0x000f0000, 0x000f0000,
1228         0x220, 0xffffffff, 0xC060000C,
1229         0x224, 0xc0000fff, 0x00000100,
1230         0xf90, 0xffffffff, 0x00000100,
1231         0xf98, 0x00000101, 0x00000000,
1232         0x20a8, 0xffffffff, 0x00000104,
1233         0x55e4, 0xff000fff, 0x00000100,
1234         0x30cc, 0xc0000fff, 0x00000104,
1235         0xc1e4, 0x00000001, 0x00000001,
1236         0xd00c, 0xff000ff0, 0x00000100,
1237         0xd80c, 0xff000ff0, 0x00000100
1238 };
1239
1240 static const u32 kalindi_golden_spm_registers[] =
1241 {
1242         0x30800, 0xe0ffffff, 0xe0000000
1243 };
1244
1245 static const u32 kalindi_golden_common_registers[] =
1246 {
1247         0xc770, 0xffffffff, 0x00000800,
1248         0xc774, 0xffffffff, 0x00000800,
1249         0xc798, 0xffffffff, 0x00007fbf,
1250         0xc79c, 0xffffffff, 0x00007faf
1251 };
1252
1253 static const u32 kalindi_golden_registers[] =
1254 {
1255         0x3c000, 0xffffdfff, 0x6e944040,
1256         0x55e4, 0xff607fff, 0xfc000100,
1257         0x3c220, 0xff000fff, 0x00000100,
1258         0x3c224, 0xff000fff, 0x00000100,
1259         0x3c200, 0xfffc0fff, 0x00000100,
1260         0x6ed8, 0x00010101, 0x00010000,
1261         0x9830, 0xffffffff, 0x00000000,
1262         0x9834, 0xf00fffff, 0x00000400,
1263         0x5bb0, 0x000000f0, 0x00000070,
1264         0x5bc0, 0xf0311fff, 0x80300000,
1265         0x98f8, 0x73773777, 0x12010001,
1266         0x98fc, 0xffffffff, 0x00000010,
1267         0x9b7c, 0x00ff0000, 0x00fc0000,
1268         0x8030, 0x00001f0f, 0x0000100a,
1269         0x2f48, 0x73773777, 0x12010001,
1270         0x2408, 0x000fffff, 0x000c007f,
1271         0x8a14, 0xf000003f, 0x00000007,
1272         0x8b24, 0x3fff3fff, 0x00ffcfff,
1273         0x30a04, 0x0000ff0f, 0x00000000,
1274         0x28a4c, 0x07ffffff, 0x06000000,
1275         0x4d8, 0x00000fff, 0x00000100,
1276         0x3e78, 0x00000001, 0x00000002,
1277         0xc768, 0x00000008, 0x00000008,
1278         0x8c00, 0x000000ff, 0x00000003,
1279         0x214f8, 0x01ff01ff, 0x00000002,
1280         0x21498, 0x007ff800, 0x00200000,
1281         0x2015c, 0xffffffff, 0x00000f40,
1282         0x88c4, 0x001f3ae3, 0x00000082,
1283         0x88d4, 0x0000001f, 0x00000010,
1284         0x30934, 0xffffffff, 0x00000000
1285 };
1286
1287 static const u32 kalindi_mgcg_cgcg_init[] =
1288 {
1289         0xc420, 0xffffffff, 0xfffffffc,
1290         0x30800, 0xffffffff, 0xe0000000,
1291         0x3c2a0, 0xffffffff, 0x00000100,
1292         0x3c208, 0xffffffff, 0x00000100,
1293         0x3c2c0, 0xffffffff, 0x00000100,
1294         0x3c2c8, 0xffffffff, 0x00000100,
1295         0x3c2c4, 0xffffffff, 0x00000100,
1296         0x55e4, 0xffffffff, 0x00600100,
1297         0x3c280, 0xffffffff, 0x00000100,
1298         0x3c214, 0xffffffff, 0x06000100,
1299         0x3c220, 0xffffffff, 0x00000100,
1300         0x3c218, 0xffffffff, 0x06000100,
1301         0x3c204, 0xffffffff, 0x00000100,
1302         0x3c2e0, 0xffffffff, 0x00000100,
1303         0x3c224, 0xffffffff, 0x00000100,
1304         0x3c200, 0xffffffff, 0x00000100,
1305         0x3c230, 0xffffffff, 0x00000100,
1306         0x3c234, 0xffffffff, 0x00000100,
1307         0x3c250, 0xffffffff, 0x00000100,
1308         0x3c254, 0xffffffff, 0x00000100,
1309         0x3c258, 0xffffffff, 0x00000100,
1310         0x3c25c, 0xffffffff, 0x00000100,
1311         0x3c260, 0xffffffff, 0x00000100,
1312         0x3c27c, 0xffffffff, 0x00000100,
1313         0x3c278, 0xffffffff, 0x00000100,
1314         0x3c210, 0xffffffff, 0x06000100,
1315         0x3c290, 0xffffffff, 0x00000100,
1316         0x3c274, 0xffffffff, 0x00000100,
1317         0x3c2b4, 0xffffffff, 0x00000100,
1318         0x3c2b0, 0xffffffff, 0x00000100,
1319         0x3c270, 0xffffffff, 0x00000100,
1320         0x30800, 0xffffffff, 0xe0000000,
1321         0x3c020, 0xffffffff, 0x00010000,
1322         0x3c024, 0xffffffff, 0x00030002,
1323         0x3c028, 0xffffffff, 0x00040007,
1324         0x3c02c, 0xffffffff, 0x00060005,
1325         0x3c030, 0xffffffff, 0x00090008,
1326         0x3c034, 0xffffffff, 0x00010000,
1327         0x3c038, 0xffffffff, 0x00030002,
1328         0x3c03c, 0xffffffff, 0x00040007,
1329         0x3c040, 0xffffffff, 0x00060005,
1330         0x3c044, 0xffffffff, 0x00090008,
1331         0x3c000, 0xffffffff, 0x96e00200,
1332         0x8708, 0xffffffff, 0x00900100,
1333         0xc424, 0xffffffff, 0x0020003f,
1334         0x38, 0xffffffff, 0x0140001c,
1335         0x3c, 0x000f0000, 0x000f0000,
1336         0x220, 0xffffffff, 0xC060000C,
1337         0x224, 0xc0000fff, 0x00000100,
1338         0x20a8, 0xffffffff, 0x00000104,
1339         0x55e4, 0xff000fff, 0x00000100,
1340         0x30cc, 0xc0000fff, 0x00000104,
1341         0xc1e4, 0x00000001, 0x00000001,
1342         0xd00c, 0xff000ff0, 0x00000100,
1343         0xd80c, 0xff000ff0, 0x00000100
1344 };
1345
1346 static const u32 hawaii_golden_spm_registers[] =
1347 {
1348         0x30800, 0xe0ffffff, 0xe0000000
1349 };
1350
1351 static const u32 hawaii_golden_common_registers[] =
1352 {
1353         0x30800, 0xffffffff, 0xe0000000,
1354         0x28350, 0xffffffff, 0x3a00161a,
1355         0x28354, 0xffffffff, 0x0000002e,
1356         0x9a10, 0xffffffff, 0x00018208,
1357         0x98f8, 0xffffffff, 0x12011003
1358 };
1359
1360 static const u32 hawaii_golden_registers[] =
1361 {
1362         0x3354, 0x00000333, 0x00000333,
1363         0x9a10, 0x00010000, 0x00058208,
1364         0x9830, 0xffffffff, 0x00000000,
1365         0x9834, 0xf00fffff, 0x00000400,
1366         0x9838, 0x0002021c, 0x00020200,
1367         0xc78, 0x00000080, 0x00000000,
1368         0x5bb0, 0x000000f0, 0x00000070,
1369         0x5bc0, 0xf0311fff, 0x80300000,
1370         0x350c, 0x00810000, 0x408af000,
1371         0x7030, 0x31000111, 0x00000011,
1372         0x2f48, 0x73773777, 0x12010001,
1373         0x2120, 0x0000007f, 0x0000001b,
1374         0x21dc, 0x00007fb6, 0x00002191,
1375         0x3628, 0x0000003f, 0x0000000a,
1376         0x362c, 0x0000003f, 0x0000000a,
1377         0x2ae4, 0x00073ffe, 0x000022a2,
1378         0x240c, 0x000007ff, 0x00000000,
1379         0x8bf0, 0x00002001, 0x00000001,
1380         0x8b24, 0xffffffff, 0x00ffffff,
1381         0x30a04, 0x0000ff0f, 0x00000000,
1382         0x28a4c, 0x07ffffff, 0x06000000,
1383         0x3e78, 0x00000001, 0x00000002,
1384         0xc768, 0x00000008, 0x00000008,
1385         0xc770, 0x00000f00, 0x00000800,
1386         0xc774, 0x00000f00, 0x00000800,
1387         0xc798, 0x00ffffff, 0x00ff7fbf,
1388         0xc79c, 0x00ffffff, 0x00ff7faf,
1389         0x8c00, 0x000000ff, 0x00000800,
1390         0xe40, 0x00001fff, 0x00001fff,
1391         0x9060, 0x0000007f, 0x00000020,
1392         0x9508, 0x00010000, 0x00010000,
1393         0xae00, 0x00100000, 0x000ff07c,
1394         0xac14, 0x000003ff, 0x0000000f,
1395         0xac10, 0xffffffff, 0x7564fdec,
1396         0xac0c, 0xffffffff, 0x3120b9a8,
1397         0xac08, 0x20000000, 0x0f9c0000
1398 };
1399
1400 static const u32 hawaii_mgcg_cgcg_init[] =
1401 {
1402         0xc420, 0xffffffff, 0xfffffffd,
1403         0x30800, 0xffffffff, 0xe0000000,
1404         0x3c2a0, 0xffffffff, 0x00000100,
1405         0x3c208, 0xffffffff, 0x00000100,
1406         0x3c2c0, 0xffffffff, 0x00000100,
1407         0x3c2c8, 0xffffffff, 0x00000100,
1408         0x3c2c4, 0xffffffff, 0x00000100,
1409         0x55e4, 0xffffffff, 0x00200100,
1410         0x3c280, 0xffffffff, 0x00000100,
1411         0x3c214, 0xffffffff, 0x06000100,
1412         0x3c220, 0xffffffff, 0x00000100,
1413         0x3c218, 0xffffffff, 0x06000100,
1414         0x3c204, 0xffffffff, 0x00000100,
1415         0x3c2e0, 0xffffffff, 0x00000100,
1416         0x3c224, 0xffffffff, 0x00000100,
1417         0x3c200, 0xffffffff, 0x00000100,
1418         0x3c230, 0xffffffff, 0x00000100,
1419         0x3c234, 0xffffffff, 0x00000100,
1420         0x3c250, 0xffffffff, 0x00000100,
1421         0x3c254, 0xffffffff, 0x00000100,
1422         0x3c258, 0xffffffff, 0x00000100,
1423         0x3c25c, 0xffffffff, 0x00000100,
1424         0x3c260, 0xffffffff, 0x00000100,
1425         0x3c27c, 0xffffffff, 0x00000100,
1426         0x3c278, 0xffffffff, 0x00000100,
1427         0x3c210, 0xffffffff, 0x06000100,
1428         0x3c290, 0xffffffff, 0x00000100,
1429         0x3c274, 0xffffffff, 0x00000100,
1430         0x3c2b4, 0xffffffff, 0x00000100,
1431         0x3c2b0, 0xffffffff, 0x00000100,
1432         0x3c270, 0xffffffff, 0x00000100,
1433         0x30800, 0xffffffff, 0xe0000000,
1434         0x3c020, 0xffffffff, 0x00010000,
1435         0x3c024, 0xffffffff, 0x00030002,
1436         0x3c028, 0xffffffff, 0x00040007,
1437         0x3c02c, 0xffffffff, 0x00060005,
1438         0x3c030, 0xffffffff, 0x00090008,
1439         0x3c034, 0xffffffff, 0x00010000,
1440         0x3c038, 0xffffffff, 0x00030002,
1441         0x3c03c, 0xffffffff, 0x00040007,
1442         0x3c040, 0xffffffff, 0x00060005,
1443         0x3c044, 0xffffffff, 0x00090008,
1444         0x3c048, 0xffffffff, 0x00010000,
1445         0x3c04c, 0xffffffff, 0x00030002,
1446         0x3c050, 0xffffffff, 0x00040007,
1447         0x3c054, 0xffffffff, 0x00060005,
1448         0x3c058, 0xffffffff, 0x00090008,
1449         0x3c05c, 0xffffffff, 0x00010000,
1450         0x3c060, 0xffffffff, 0x00030002,
1451         0x3c064, 0xffffffff, 0x00040007,
1452         0x3c068, 0xffffffff, 0x00060005,
1453         0x3c06c, 0xffffffff, 0x00090008,
1454         0x3c070, 0xffffffff, 0x00010000,
1455         0x3c074, 0xffffffff, 0x00030002,
1456         0x3c078, 0xffffffff, 0x00040007,
1457         0x3c07c, 0xffffffff, 0x00060005,
1458         0x3c080, 0xffffffff, 0x00090008,
1459         0x3c084, 0xffffffff, 0x00010000,
1460         0x3c088, 0xffffffff, 0x00030002,
1461         0x3c08c, 0xffffffff, 0x00040007,
1462         0x3c090, 0xffffffff, 0x00060005,
1463         0x3c094, 0xffffffff, 0x00090008,
1464         0x3c098, 0xffffffff, 0x00010000,
1465         0x3c09c, 0xffffffff, 0x00030002,
1466         0x3c0a0, 0xffffffff, 0x00040007,
1467         0x3c0a4, 0xffffffff, 0x00060005,
1468         0x3c0a8, 0xffffffff, 0x00090008,
1469         0x3c0ac, 0xffffffff, 0x00010000,
1470         0x3c0b0, 0xffffffff, 0x00030002,
1471         0x3c0b4, 0xffffffff, 0x00040007,
1472         0x3c0b8, 0xffffffff, 0x00060005,
1473         0x3c0bc, 0xffffffff, 0x00090008,
1474         0x3c0c0, 0xffffffff, 0x00010000,
1475         0x3c0c4, 0xffffffff, 0x00030002,
1476         0x3c0c8, 0xffffffff, 0x00040007,
1477         0x3c0cc, 0xffffffff, 0x00060005,
1478         0x3c0d0, 0xffffffff, 0x00090008,
1479         0x3c0d4, 0xffffffff, 0x00010000,
1480         0x3c0d8, 0xffffffff, 0x00030002,
1481         0x3c0dc, 0xffffffff, 0x00040007,
1482         0x3c0e0, 0xffffffff, 0x00060005,
1483         0x3c0e4, 0xffffffff, 0x00090008,
1484         0x3c0e8, 0xffffffff, 0x00010000,
1485         0x3c0ec, 0xffffffff, 0x00030002,
1486         0x3c0f0, 0xffffffff, 0x00040007,
1487         0x3c0f4, 0xffffffff, 0x00060005,
1488         0x3c0f8, 0xffffffff, 0x00090008,
1489         0xc318, 0xffffffff, 0x00020200,
1490         0x3350, 0xffffffff, 0x00000200,
1491         0x15c0, 0xffffffff, 0x00000400,
1492         0x55e8, 0xffffffff, 0x00000000,
1493         0x2f50, 0xffffffff, 0x00000902,
1494         0x3c000, 0xffffffff, 0x96940200,
1495         0x8708, 0xffffffff, 0x00900100,
1496         0xc424, 0xffffffff, 0x0020003f,
1497         0x38, 0xffffffff, 0x0140001c,
1498         0x3c, 0x000f0000, 0x000f0000,
1499         0x220, 0xffffffff, 0xc060000c,
1500         0x224, 0xc0000fff, 0x00000100,
1501         0xf90, 0xffffffff, 0x00000100,
1502         0xf98, 0x00000101, 0x00000000,
1503         0x20a8, 0xffffffff, 0x00000104,
1504         0x55e4, 0xff000fff, 0x00000100,
1505         0x30cc, 0xc0000fff, 0x00000104,
1506         0xc1e4, 0x00000001, 0x00000001,
1507         0xd00c, 0xff000ff0, 0x00000100,
1508         0xd80c, 0xff000ff0, 0x00000100
1509 };
1510
1511 static const u32 godavari_golden_registers[] =
1512 {
1513         0x55e4, 0xff607fff, 0xfc000100,
1514         0x6ed8, 0x00010101, 0x00010000,
1515         0x9830, 0xffffffff, 0x00000000,
1516         0x98302, 0xf00fffff, 0x00000400,
1517         0x6130, 0xffffffff, 0x00010000,
1518         0x5bb0, 0x000000f0, 0x00000070,
1519         0x5bc0, 0xf0311fff, 0x80300000,
1520         0x98f8, 0x73773777, 0x12010001,
1521         0x98fc, 0xffffffff, 0x00000010,
1522         0x8030, 0x00001f0f, 0x0000100a,
1523         0x2f48, 0x73773777, 0x12010001,
1524         0x2408, 0x000fffff, 0x000c007f,
1525         0x8a14, 0xf000003f, 0x00000007,
1526         0x8b24, 0xffffffff, 0x00ff0fff,
1527         0x30a04, 0x0000ff0f, 0x00000000,
1528         0x28a4c, 0x07ffffff, 0x06000000,
1529         0x4d8, 0x00000fff, 0x00000100,
1530         0xd014, 0x00010000, 0x00810001,
1531         0xd814, 0x00010000, 0x00810001,
1532         0x3e78, 0x00000001, 0x00000002,
1533         0xc768, 0x00000008, 0x00000008,
1534         0xc770, 0x00000f00, 0x00000800,
1535         0xc774, 0x00000f00, 0x00000800,
1536         0xc798, 0x00ffffff, 0x00ff7fbf,
1537         0xc79c, 0x00ffffff, 0x00ff7faf,
1538         0x8c00, 0x000000ff, 0x00000001,
1539         0x214f8, 0x01ff01ff, 0x00000002,
1540         0x21498, 0x007ff800, 0x00200000,
1541         0x2015c, 0xffffffff, 0x00000f40,
1542         0x88c4, 0x001f3ae3, 0x00000082,
1543         0x88d4, 0x0000001f, 0x00000010,
1544         0x30934, 0xffffffff, 0x00000000
1545 };
1546
1547
1548 static void cik_init_golden_registers(struct radeon_device *rdev)
1549 {
1550         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1551         mutex_lock(&rdev->grbm_idx_mutex);
1552         switch (rdev->family) {
1553         case CHIP_BONAIRE:
1554                 radeon_program_register_sequence(rdev,
1555                                                  bonaire_mgcg_cgcg_init,
1556                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1557                 radeon_program_register_sequence(rdev,
1558                                                  bonaire_golden_registers,
1559                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1560                 radeon_program_register_sequence(rdev,
1561                                                  bonaire_golden_common_registers,
1562                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1563                 radeon_program_register_sequence(rdev,
1564                                                  bonaire_golden_spm_registers,
1565                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1566                 break;
1567         case CHIP_KABINI:
1568                 radeon_program_register_sequence(rdev,
1569                                                  kalindi_mgcg_cgcg_init,
1570                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1571                 radeon_program_register_sequence(rdev,
1572                                                  kalindi_golden_registers,
1573                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1574                 radeon_program_register_sequence(rdev,
1575                                                  kalindi_golden_common_registers,
1576                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1577                 radeon_program_register_sequence(rdev,
1578                                                  kalindi_golden_spm_registers,
1579                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1580                 break;
1581         case CHIP_MULLINS:
1582                 radeon_program_register_sequence(rdev,
1583                                                  kalindi_mgcg_cgcg_init,
1584                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1585                 radeon_program_register_sequence(rdev,
1586                                                  godavari_golden_registers,
1587                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1588                 radeon_program_register_sequence(rdev,
1589                                                  kalindi_golden_common_registers,
1590                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1591                 radeon_program_register_sequence(rdev,
1592                                                  kalindi_golden_spm_registers,
1593                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1594                 break;
1595         case CHIP_KAVERI:
1596                 radeon_program_register_sequence(rdev,
1597                                                  spectre_mgcg_cgcg_init,
1598                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1599                 radeon_program_register_sequence(rdev,
1600                                                  spectre_golden_registers,
1601                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1602                 radeon_program_register_sequence(rdev,
1603                                                  spectre_golden_common_registers,
1604                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1605                 radeon_program_register_sequence(rdev,
1606                                                  spectre_golden_spm_registers,
1607                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1608                 break;
1609         case CHIP_HAWAII:
1610                 radeon_program_register_sequence(rdev,
1611                                                  hawaii_mgcg_cgcg_init,
1612                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1613                 radeon_program_register_sequence(rdev,
1614                                                  hawaii_golden_registers,
1615                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1616                 radeon_program_register_sequence(rdev,
1617                                                  hawaii_golden_common_registers,
1618                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1619                 radeon_program_register_sequence(rdev,
1620                                                  hawaii_golden_spm_registers,
1621                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1622                 break;
1623         default:
1624                 break;
1625         }
1626         mutex_unlock(&rdev->grbm_idx_mutex);
1627 }
1628
1629 /**
1630  * cik_get_xclk - get the xclk
1631  *
1632  * @rdev: radeon_device pointer
1633  *
1634  * Returns the reference clock used by the gfx engine
1635  * (CIK).
1636  */
1637 u32 cik_get_xclk(struct radeon_device *rdev)
1638 {
1639         u32 reference_clock = rdev->clock.spll.reference_freq;
1640
1641         if (rdev->flags & RADEON_IS_IGP) {
1642                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1643                         return reference_clock / 2;
1644         } else {
1645                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1646                         return reference_clock / 4;
1647         }
1648         return reference_clock;
1649 }
1650
1651 /**
1652  * cik_mm_rdoorbell - read a doorbell dword
1653  *
1654  * @rdev: radeon_device pointer
1655  * @index: doorbell index
1656  *
1657  * Returns the value in the doorbell aperture at the
1658  * requested doorbell index (CIK).
1659  */
1660 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1661 {
1662         if (index < rdev->doorbell.num_doorbells) {
1663                 return readl(rdev->doorbell.ptr + index);
1664         } else {
1665                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1666                 return 0;
1667         }
1668 }
1669
1670 /**
1671  * cik_mm_wdoorbell - write a doorbell dword
1672  *
1673  * @rdev: radeon_device pointer
1674  * @index: doorbell index
1675  * @v: value to write
1676  *
1677  * Writes @v to the doorbell aperture at the
1678  * requested doorbell index (CIK).
1679  */
1680 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1681 {
1682         if (index < rdev->doorbell.num_doorbells) {
1683                 writel(v, rdev->doorbell.ptr + index);
1684         } else {
1685                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1686         }
1687 }
1688
1689 #define BONAIRE_IO_MC_REGS_SIZE 36
1690
1691 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1692 {
1693         {0x00000070, 0x04400000},
1694         {0x00000071, 0x80c01803},
1695         {0x00000072, 0x00004004},
1696         {0x00000073, 0x00000100},
1697         {0x00000074, 0x00ff0000},
1698         {0x00000075, 0x34000000},
1699         {0x00000076, 0x08000014},
1700         {0x00000077, 0x00cc08ec},
1701         {0x00000078, 0x00000400},
1702         {0x00000079, 0x00000000},
1703         {0x0000007a, 0x04090000},
1704         {0x0000007c, 0x00000000},
1705         {0x0000007e, 0x4408a8e8},
1706         {0x0000007f, 0x00000304},
1707         {0x00000080, 0x00000000},
1708         {0x00000082, 0x00000001},
1709         {0x00000083, 0x00000002},
1710         {0x00000084, 0xf3e4f400},
1711         {0x00000085, 0x052024e3},
1712         {0x00000087, 0x00000000},
1713         {0x00000088, 0x01000000},
1714         {0x0000008a, 0x1c0a0000},
1715         {0x0000008b, 0xff010000},
1716         {0x0000008d, 0xffffefff},
1717         {0x0000008e, 0xfff3efff},
1718         {0x0000008f, 0xfff3efbf},
1719         {0x00000092, 0xf7ffffff},
1720         {0x00000093, 0xffffff7f},
1721         {0x00000095, 0x00101101},
1722         {0x00000096, 0x00000fff},
1723         {0x00000097, 0x00116fff},
1724         {0x00000098, 0x60010000},
1725         {0x00000099, 0x10010000},
1726         {0x0000009a, 0x00006000},
1727         {0x0000009b, 0x00001000},
1728         {0x0000009f, 0x00b48000}
1729 };
1730
1731 #define HAWAII_IO_MC_REGS_SIZE 22
1732
1733 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1734 {
1735         {0x0000007d, 0x40000000},
1736         {0x0000007e, 0x40180304},
1737         {0x0000007f, 0x0000ff00},
1738         {0x00000081, 0x00000000},
1739         {0x00000083, 0x00000800},
1740         {0x00000086, 0x00000000},
1741         {0x00000087, 0x00000100},
1742         {0x00000088, 0x00020100},
1743         {0x00000089, 0x00000000},
1744         {0x0000008b, 0x00040000},
1745         {0x0000008c, 0x00000100},
1746         {0x0000008e, 0xff010000},
1747         {0x00000090, 0xffffefff},
1748         {0x00000091, 0xfff3efff},
1749         {0x00000092, 0xfff3efbf},
1750         {0x00000093, 0xf7ffffff},
1751         {0x00000094, 0xffffff7f},
1752         {0x00000095, 0x00000fff},
1753         {0x00000096, 0x00116fff},
1754         {0x00000097, 0x60010000},
1755         {0x00000098, 0x10010000},
1756         {0x0000009f, 0x00c79000}
1757 };
1758
1759
1760 /**
1761  * cik_srbm_select - select specific register instances
1762  *
1763  * @rdev: radeon_device pointer
1764  * @me: selected ME (micro engine)
1765  * @pipe: pipe
1766  * @queue: queue
1767  * @vmid: VMID
1768  *
1769  * Switches the currently active registers instances.  Some
1770  * registers are instanced per VMID, others are instanced per
1771  * me/pipe/queue combination.
1772  */
1773 static void cik_srbm_select(struct radeon_device *rdev,
1774                             u32 me, u32 pipe, u32 queue, u32 vmid)
1775 {
1776         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1777                              MEID(me & 0x3) |
1778                              VMID(vmid & 0xf) |
1779                              QUEUEID(queue & 0x7));
1780         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1781 }
1782
1783 /* ucode loading */
1784 /**
1785  * ci_mc_load_microcode - load MC ucode into the hw
1786  *
1787  * @rdev: radeon_device pointer
1788  *
1789  * Load the GDDR MC ucode into the hw (CIK).
1790  * Returns 0 on success, error on failure.
1791  */
1792 int ci_mc_load_microcode(struct radeon_device *rdev)
1793 {
1794         const __be32 *fw_data = NULL;
1795         const __le32 *new_fw_data = NULL;
1796         u32 running, tmp;
1797         u32 *io_mc_regs = NULL;
1798         const __le32 *new_io_mc_regs = NULL;
1799         int i, regs_size, ucode_size;
1800
1801         if (!rdev->mc_fw)
1802                 return -EINVAL;
1803
1804         if (rdev->new_fw) {
1805                 const struct mc_firmware_header_v1_0 *hdr =
1806                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1807
1808                 radeon_ucode_print_mc_hdr(&hdr->header);
1809
1810                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1811                 new_io_mc_regs = (const __le32 *)
1812                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1813                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1814                 new_fw_data = (const __le32 *)
1815                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1816         } else {
1817                 ucode_size = rdev->mc_fw->size / 4;
1818
1819                 switch (rdev->family) {
1820                 case CHIP_BONAIRE:
1821                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1822                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1823                         break;
1824                 case CHIP_HAWAII:
1825                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1826                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1827                         break;
1828                 default:
1829                         return -EINVAL;
1830                 }
1831                 fw_data = (const __be32 *)rdev->mc_fw->data;
1832         }
1833
1834         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1835
1836         if (running == 0) {
1837                 /* reset the engine and set to writable */
1838                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1839                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1840
1841                 /* load mc io regs */
1842                 for (i = 0; i < regs_size; i++) {
1843                         if (rdev->new_fw) {
1844                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1845                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1846                         } else {
1847                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1848                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1849                         }
1850                 }
1851
1852                 tmp = RREG32(MC_SEQ_MISC0);
1853                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1854                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1855                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1856                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1857                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1858                 }
1859
1860                 /* load the MC ucode */
1861                 for (i = 0; i < ucode_size; i++) {
1862                         if (rdev->new_fw)
1863                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1864                         else
1865                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1866                 }
1867
1868                 /* put the engine back into the active state */
1869                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1870                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1871                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1872
1873                 /* wait for training to complete */
1874                 for (i = 0; i < rdev->usec_timeout; i++) {
1875                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1876                                 break;
1877                         udelay(1);
1878                 }
1879                 for (i = 0; i < rdev->usec_timeout; i++) {
1880                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1881                                 break;
1882                         udelay(1);
1883                 }
1884         }
1885
1886         return 0;
1887 }
1888
1889 /**
1890  * cik_init_microcode - load ucode images from disk
1891  *
1892  * @rdev: radeon_device pointer
1893  *
1894  * Use the firmware interface to load the ucode images into
1895  * the driver (not loaded into hw).
1896  * Returns 0 on success, error on failure.
1897  */
1898 static int cik_init_microcode(struct radeon_device *rdev)
1899 {
1900         const char *chip_name;
1901         const char *new_chip_name;
1902         size_t pfp_req_size, me_req_size, ce_req_size,
1903                 mec_req_size, rlc_req_size, mc_req_size = 0,
1904                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1905         char fw_name[30];
1906         int new_fw = 0;
1907         int err;
1908         int num_fw;
1909         bool new_smc = false;
1910
1911         DRM_DEBUG("\n");
1912
1913         switch (rdev->family) {
1914         case CHIP_BONAIRE:
1915                 chip_name = "BONAIRE";
1916                 if ((rdev->pdev->revision == 0x80) ||
1917                     (rdev->pdev->revision == 0x81) ||
1918                     (rdev->pdev->device == 0x665f))
1919                         new_smc = true;
1920                 new_chip_name = "bonaire";
1921                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1922                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1923                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1924                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1925                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1926                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1927                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1928                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1929                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1930                 num_fw = 8;
1931                 break;
1932         case CHIP_HAWAII:
1933                 chip_name = "HAWAII";
1934                 if (rdev->pdev->revision == 0x80)
1935                         new_smc = true;
1936                 new_chip_name = "hawaii";
1937                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1938                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1939                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1940                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1941                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1942                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1943                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1944                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1945                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1946                 num_fw = 8;
1947                 break;
1948         case CHIP_KAVERI:
1949                 chip_name = "KAVERI";
1950                 new_chip_name = "kaveri";
1951                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1952                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1953                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1954                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1955                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1956                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1957                 num_fw = 7;
1958                 break;
1959         case CHIP_KABINI:
1960                 chip_name = "KABINI";
1961                 new_chip_name = "kabini";
1962                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1963                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1964                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1965                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1966                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1967                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1968                 num_fw = 6;
1969                 break;
1970         case CHIP_MULLINS:
1971                 chip_name = "MULLINS";
1972                 new_chip_name = "mullins";
1973                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1974                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1975                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1976                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1977                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1978                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1979                 num_fw = 6;
1980                 break;
1981         default: BUG();
1982         }
1983
1984         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1985
1986         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1987         err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1988         if (err) {
1989                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1990                 err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1991                 if (err)
1992                         goto out;
1993                 if (rdev->pfp_fw->size != pfp_req_size) {
1994                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
1995                                rdev->pfp_fw->size, fw_name);
1996                         err = -EINVAL;
1997                         goto out;
1998                 }
1999         } else {
2000                 err = radeon_ucode_validate(rdev->pfp_fw);
2001                 if (err) {
2002                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2003                                fw_name);
2004                         goto out;
2005                 } else {
2006                         new_fw++;
2007                 }
2008         }
2009
2010         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2011         err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2012         if (err) {
2013                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2014                 err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2015                 if (err)
2016                         goto out;
2017                 if (rdev->me_fw->size != me_req_size) {
2018                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2019                                rdev->me_fw->size, fw_name);
2020                         err = -EINVAL;
2021                 }
2022         } else {
2023                 err = radeon_ucode_validate(rdev->me_fw);
2024                 if (err) {
2025                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2026                                fw_name);
2027                         goto out;
2028                 } else {
2029                         new_fw++;
2030                 }
2031         }
2032
2033         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2034         err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2035         if (err) {
2036                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2037                 err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2038                 if (err)
2039                         goto out;
2040                 if (rdev->ce_fw->size != ce_req_size) {
2041                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2042                                rdev->ce_fw->size, fw_name);
2043                         err = -EINVAL;
2044                 }
2045         } else {
2046                 err = radeon_ucode_validate(rdev->ce_fw);
2047                 if (err) {
2048                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2049                                fw_name);
2050                         goto out;
2051                 } else {
2052                         new_fw++;
2053                 }
2054         }
2055
2056         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2057         err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2058         if (err) {
2059                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2060                 err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2061                 if (err)
2062                         goto out;
2063                 if (rdev->mec_fw->size != mec_req_size) {
2064                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2065                                rdev->mec_fw->size, fw_name);
2066                         err = -EINVAL;
2067                 }
2068         } else {
2069                 err = radeon_ucode_validate(rdev->mec_fw);
2070                 if (err) {
2071                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2072                                fw_name);
2073                         goto out;
2074                 } else {
2075                         new_fw++;
2076                 }
2077         }
2078
2079         if (rdev->family == CHIP_KAVERI) {
2080                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2081                 err = reject_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2082                 if (err) {
2083                         goto out;
2084                 } else {
2085                         err = radeon_ucode_validate(rdev->mec2_fw);
2086                         if (err) {
2087                                 goto out;
2088                         } else {
2089                                 new_fw++;
2090                         }
2091                 }
2092         }
2093
2094         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2095         err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2096         if (err) {
2097                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2098                 err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2099                 if (err)
2100                         goto out;
2101                 if (rdev->rlc_fw->size != rlc_req_size) {
2102                         pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2103                                rdev->rlc_fw->size, fw_name);
2104                         err = -EINVAL;
2105                 }
2106         } else {
2107                 err = radeon_ucode_validate(rdev->rlc_fw);
2108                 if (err) {
2109                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2110                                fw_name);
2111                         goto out;
2112                 } else {
2113                         new_fw++;
2114                 }
2115         }
2116
2117         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2118         err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2119         if (err) {
2120                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2121                 err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2122                 if (err)
2123                         goto out;
2124                 if (rdev->sdma_fw->size != sdma_req_size) {
2125                         pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2126                                rdev->sdma_fw->size, fw_name);
2127                         err = -EINVAL;
2128                 }
2129         } else {
2130                 err = radeon_ucode_validate(rdev->sdma_fw);
2131                 if (err) {
2132                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2133                                fw_name);
2134                         goto out;
2135                 } else {
2136                         new_fw++;
2137                 }
2138         }
2139
2140         /* No SMC, MC ucode on APUs */
2141         if (!(rdev->flags & RADEON_IS_IGP)) {
2142                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2143                 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2144                 if (err) {
2145                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2146                         err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2147                         if (err) {
2148                                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2149                                 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2150                                 if (err)
2151                                         goto out;
2152                         }
2153                         if ((rdev->mc_fw->size != mc_req_size) &&
2154                             (rdev->mc_fw->size != mc2_req_size)){
2155                                 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2156                                        rdev->mc_fw->size, fw_name);
2157                                 err = -EINVAL;
2158                         }
2159                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2160                 } else {
2161                         err = radeon_ucode_validate(rdev->mc_fw);
2162                         if (err) {
2163                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2164                                        fw_name);
2165                                 goto out;
2166                         } else {
2167                                 new_fw++;
2168                         }
2169                 }
2170
2171                 if (new_smc)
2172                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2173                 else
2174                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2175                 err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2176                 if (err) {
2177                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2178                         err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2179                         if (err) {
2180                                 pr_err("smc: error loading firmware \"%s\"\n",
2181                                        fw_name);
2182                                 release_firmware(rdev->smc_fw);
2183                                 rdev->smc_fw = NULL;
2184                                 err = 0;
2185                         } else if (rdev->smc_fw->size != smc_req_size) {
2186                                 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2187                                        rdev->smc_fw->size, fw_name);
2188                                 err = -EINVAL;
2189                         }
2190                 } else {
2191                         err = radeon_ucode_validate(rdev->smc_fw);
2192                         if (err) {
2193                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2194                                        fw_name);
2195                                 goto out;
2196                         } else {
2197                                 new_fw++;
2198                         }
2199                 }
2200         }
2201
2202         if (new_fw == 0) {
2203                 rdev->new_fw = false;
2204         } else if (new_fw < num_fw) {
2205                 pr_err("ci_fw: mixing new and old firmware!\n");
2206                 err = -EINVAL;
2207         } else {
2208                 rdev->new_fw = true;
2209         }
2210
2211 out:
2212         if (err) {
2213                 if (err != -EINVAL)
2214                         pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2215                                fw_name);
2216                 release_firmware(rdev->pfp_fw);
2217                 rdev->pfp_fw = NULL;
2218                 release_firmware(rdev->me_fw);
2219                 rdev->me_fw = NULL;
2220                 release_firmware(rdev->ce_fw);
2221                 rdev->ce_fw = NULL;
2222                 release_firmware(rdev->mec_fw);
2223                 rdev->mec_fw = NULL;
2224                 release_firmware(rdev->mec2_fw);
2225                 rdev->mec2_fw = NULL;
2226                 release_firmware(rdev->rlc_fw);
2227                 rdev->rlc_fw = NULL;
2228                 release_firmware(rdev->sdma_fw);
2229                 rdev->sdma_fw = NULL;
2230                 release_firmware(rdev->mc_fw);
2231                 rdev->mc_fw = NULL;
2232                 release_firmware(rdev->smc_fw);
2233                 rdev->smc_fw = NULL;
2234         }
2235         return err;
2236 }
2237
2238 /*
2239  * Core functions
2240  */
2241 /**
2242  * cik_tiling_mode_table_init - init the hw tiling table
2243  *
2244  * @rdev: radeon_device pointer
2245  *
2246  * Starting with SI, the tiling setup is done globally in a
2247  * set of 32 tiling modes.  Rather than selecting each set of
2248  * parameters per surface as on older asics, we just select
2249  * which index in the tiling table we want to use, and the
2250  * surface uses those parameters (CIK).
2251  */
2252 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2253 {
2254         u32 *tile = rdev->config.cik.tile_mode_array;
2255         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2256         const u32 num_tile_mode_states =
2257                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2258         const u32 num_secondary_tile_mode_states =
2259                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2260         u32 reg_offset, split_equal_to_row_size;
2261         u32 num_pipe_configs;
2262         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2263                 rdev->config.cik.max_shader_engines;
2264
2265         switch (rdev->config.cik.mem_row_size_in_kb) {
2266         case 1:
2267                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2268                 break;
2269         case 2:
2270         default:
2271                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2272                 break;
2273         case 4:
2274                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2275                 break;
2276         }
2277
2278         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2279         if (num_pipe_configs > 8)
2280                 num_pipe_configs = 16;
2281
2282         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2283                 tile[reg_offset] = 0;
2284         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2285                 macrotile[reg_offset] = 0;
2286
2287         switch(num_pipe_configs) {
2288         case 16:
2289                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2291                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2293                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2295                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2297                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2298                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2299                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2301                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2303                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2305                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2307                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308                            TILE_SPLIT(split_equal_to_row_size));
2309                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2310                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2312                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2313                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2314                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2316                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2317                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2318                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319                            TILE_SPLIT(split_equal_to_row_size));
2320                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2321                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2322                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2323                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2325                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2326                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2329                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2330                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2331                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2332                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2333                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2334                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2335                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2337                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2338                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2340                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2341                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2342                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2345                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2346                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2347                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2349                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2350                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2352                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2353                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2355                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2356                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2357                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2360                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2361                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2362                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2364                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2365                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367
2368                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2370                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2371                            NUM_BANKS(ADDR_SURF_16_BANK));
2372                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2374                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2375                            NUM_BANKS(ADDR_SURF_16_BANK));
2376                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2377                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2378                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2379                            NUM_BANKS(ADDR_SURF_16_BANK));
2380                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2381                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2382                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2383                            NUM_BANKS(ADDR_SURF_16_BANK));
2384                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2386                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2387                            NUM_BANKS(ADDR_SURF_8_BANK));
2388                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2389                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2390                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2391                            NUM_BANKS(ADDR_SURF_4_BANK));
2392                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2394                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2395                            NUM_BANKS(ADDR_SURF_2_BANK));
2396                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2398                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2399                            NUM_BANKS(ADDR_SURF_16_BANK));
2400                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2402                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2403                            NUM_BANKS(ADDR_SURF_16_BANK));
2404                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2406                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2407                             NUM_BANKS(ADDR_SURF_16_BANK));
2408                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2409                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2410                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2411                             NUM_BANKS(ADDR_SURF_8_BANK));
2412                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2413                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2414                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2415                             NUM_BANKS(ADDR_SURF_4_BANK));
2416                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2418                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2419                             NUM_BANKS(ADDR_SURF_2_BANK));
2420                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2421                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2422                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2423                             NUM_BANKS(ADDR_SURF_2_BANK));
2424
2425                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2426                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2427                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2428                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2429                 break;
2430
2431         case 8:
2432                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2434                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2435                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2436                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2438                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2439                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2440                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2441                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2442                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2443                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2444                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2446                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2447                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2448                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2449                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2450                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2451                            TILE_SPLIT(split_equal_to_row_size));
2452                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2453                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2454                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2455                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2456                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2457                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2458                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2459                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2460                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2461                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2462                            TILE_SPLIT(split_equal_to_row_size));
2463                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2464                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2465                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2466                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2467                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2468                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2469                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2470                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2471                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2473                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2474                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2475                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2476                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2477                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2478                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2479                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2481                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2483                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2485                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2487                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2488                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2489                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2490                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2491                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2492                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2493                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2494                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2495                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2496                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2498                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2500                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2503                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2504                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2505                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2506                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2507                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2508                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2510
2511                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2513                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2514                                 NUM_BANKS(ADDR_SURF_16_BANK));
2515                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2517                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518                                 NUM_BANKS(ADDR_SURF_16_BANK));
2519                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522                                 NUM_BANKS(ADDR_SURF_16_BANK));
2523                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2526                                 NUM_BANKS(ADDR_SURF_16_BANK));
2527                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2530                                 NUM_BANKS(ADDR_SURF_8_BANK));
2531                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2533                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2534                                 NUM_BANKS(ADDR_SURF_4_BANK));
2535                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2537                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2538                                 NUM_BANKS(ADDR_SURF_2_BANK));
2539                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2541                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2542                                 NUM_BANKS(ADDR_SURF_16_BANK));
2543                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2546                                 NUM_BANKS(ADDR_SURF_16_BANK));
2547                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2550                                 NUM_BANKS(ADDR_SURF_16_BANK));
2551                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2553                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554                                 NUM_BANKS(ADDR_SURF_16_BANK));
2555                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2557                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2558                                 NUM_BANKS(ADDR_SURF_8_BANK));
2559                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2561                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2562                                 NUM_BANKS(ADDR_SURF_4_BANK));
2563                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566                                 NUM_BANKS(ADDR_SURF_2_BANK));
2567
2568                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2569                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2570                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2571                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2572                 break;
2573
2574         case 4:
2575                 if (num_rbs == 4) {
2576                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2578                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2579                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2580                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2581                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2582                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2583                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2584                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2586                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2587                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2588                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2590                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2591                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2592                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2593                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2594                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2595                            TILE_SPLIT(split_equal_to_row_size));
2596                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2597                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2598                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2599                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2600                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2601                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2602                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2603                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2604                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2605                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2606                            TILE_SPLIT(split_equal_to_row_size));
2607                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2608                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2609                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2610                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2611                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2612                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2613                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2614                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2616                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2617                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2618                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2619                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2621                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2622                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2623                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2624                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2625                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2627                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2629                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2631                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2633                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2635                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2636                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2637                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2639                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2641                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2642                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2643                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2644                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2645                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2646                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2648                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2649                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2651                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2652                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2654
2655                 } else if (num_rbs < 4) {
2656                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2658                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2659                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2660                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2663                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2664                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2666                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2667                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2668                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2670                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2672                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2675                            TILE_SPLIT(split_equal_to_row_size));
2676                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2677                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2679                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2680                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2681                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2682                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2683                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2684                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2685                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2686                            TILE_SPLIT(split_equal_to_row_size));
2687                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2688                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2689                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2690                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2691                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2692                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2694                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2695                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2696                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2697                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2698                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2699                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2700                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2701                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2702                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2703                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2704                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2706                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2707                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2709                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2710                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2712                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2713                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2714                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2716                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2717                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2718                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2721                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2722                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2723                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2724                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2725                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2728                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2729                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2730                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2731                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2732                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2733                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2734                 }
2735
2736                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2738                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2739                                 NUM_BANKS(ADDR_SURF_16_BANK));
2740                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2742                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2743                                 NUM_BANKS(ADDR_SURF_16_BANK));
2744                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747                                 NUM_BANKS(ADDR_SURF_16_BANK));
2748                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2750                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2751                                 NUM_BANKS(ADDR_SURF_16_BANK));
2752                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2754                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2755                                 NUM_BANKS(ADDR_SURF_16_BANK));
2756                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2759                                 NUM_BANKS(ADDR_SURF_8_BANK));
2760                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2762                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2763                                 NUM_BANKS(ADDR_SURF_4_BANK));
2764                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2765                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2766                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2767                                 NUM_BANKS(ADDR_SURF_16_BANK));
2768                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2769                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2770                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771                                 NUM_BANKS(ADDR_SURF_16_BANK));
2772                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2774                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2775                                 NUM_BANKS(ADDR_SURF_16_BANK));
2776                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2778                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2779                                 NUM_BANKS(ADDR_SURF_16_BANK));
2780                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783                                 NUM_BANKS(ADDR_SURF_16_BANK));
2784                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2785                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2786                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2787                                 NUM_BANKS(ADDR_SURF_8_BANK));
2788                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2789                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2790                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2791                                 NUM_BANKS(ADDR_SURF_4_BANK));
2792
2793                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2794                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2795                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2796                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2797                 break;
2798
2799         case 2:
2800                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2801                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2802                            PIPE_CONFIG(ADDR_SURF_P2) |
2803                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2804                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2805                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2806                            PIPE_CONFIG(ADDR_SURF_P2) |
2807                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2808                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810                            PIPE_CONFIG(ADDR_SURF_P2) |
2811                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2812                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2813                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2814                            PIPE_CONFIG(ADDR_SURF_P2) |
2815                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2816                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2817                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2818                            PIPE_CONFIG(ADDR_SURF_P2) |
2819                            TILE_SPLIT(split_equal_to_row_size));
2820                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2821                            PIPE_CONFIG(ADDR_SURF_P2) |
2822                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2823                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2824                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2825                            PIPE_CONFIG(ADDR_SURF_P2) |
2826                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2827                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2828                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2829                            PIPE_CONFIG(ADDR_SURF_P2) |
2830                            TILE_SPLIT(split_equal_to_row_size));
2831                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2832                            PIPE_CONFIG(ADDR_SURF_P2);
2833                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2834                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2835                            PIPE_CONFIG(ADDR_SURF_P2));
2836                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2837                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2838                             PIPE_CONFIG(ADDR_SURF_P2) |
2839                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2840                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2841                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2842                             PIPE_CONFIG(ADDR_SURF_P2) |
2843                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2845                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2846                             PIPE_CONFIG(ADDR_SURF_P2) |
2847                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2848                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2849                             PIPE_CONFIG(ADDR_SURF_P2) |
2850                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2851                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2852                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2853                             PIPE_CONFIG(ADDR_SURF_P2) |
2854                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2856                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2857                             PIPE_CONFIG(ADDR_SURF_P2) |
2858                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2859                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2860                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2861                             PIPE_CONFIG(ADDR_SURF_P2) |
2862                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2864                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2865                             PIPE_CONFIG(ADDR_SURF_P2));
2866                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2867                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2868                             PIPE_CONFIG(ADDR_SURF_P2) |
2869                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2870                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2871                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2872                             PIPE_CONFIG(ADDR_SURF_P2) |
2873                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2874                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2875                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2876                             PIPE_CONFIG(ADDR_SURF_P2) |
2877                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2878
2879                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2880                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2881                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2882                                 NUM_BANKS(ADDR_SURF_16_BANK));
2883                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2884                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2885                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2886                                 NUM_BANKS(ADDR_SURF_16_BANK));
2887                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2888                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2889                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2890                                 NUM_BANKS(ADDR_SURF_16_BANK));
2891                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2892                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2893                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2894                                 NUM_BANKS(ADDR_SURF_16_BANK));
2895                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2896                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2897                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2898                                 NUM_BANKS(ADDR_SURF_16_BANK));
2899                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2900                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2901                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2902                                 NUM_BANKS(ADDR_SURF_16_BANK));
2903                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2904                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2905                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2906                                 NUM_BANKS(ADDR_SURF_8_BANK));
2907                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2908                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2909                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2910                                 NUM_BANKS(ADDR_SURF_16_BANK));
2911                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2912                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2913                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2914                                 NUM_BANKS(ADDR_SURF_16_BANK));
2915                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2916                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2917                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2918                                 NUM_BANKS(ADDR_SURF_16_BANK));
2919                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2920                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2921                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2922                                 NUM_BANKS(ADDR_SURF_16_BANK));
2923                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2925                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2926                                 NUM_BANKS(ADDR_SURF_16_BANK));
2927                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2928                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2929                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2930                                 NUM_BANKS(ADDR_SURF_16_BANK));
2931                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2932                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2933                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2934                                 NUM_BANKS(ADDR_SURF_8_BANK));
2935
2936                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2937                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2938                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2939                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2940                 break;
2941
2942         default:
2943                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2944         }
2945 }
2946
2947 /**
2948  * cik_select_se_sh - select which SE, SH to address
2949  *
2950  * @rdev: radeon_device pointer
2951  * @se_num: shader engine to address
2952  * @sh_num: sh block to address
2953  *
2954  * Select which SE, SH combinations to address. Certain
2955  * registers are instanced per SE or SH.  0xffffffff means
2956  * broadcast to all SEs or SHs (CIK).
2957  */
2958 static void cik_select_se_sh(struct radeon_device *rdev,
2959                              u32 se_num, u32 sh_num)
2960 {
2961         u32 data = INSTANCE_BROADCAST_WRITES;
2962
2963         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2964                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2965         else if (se_num == 0xffffffff)
2966                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2967         else if (sh_num == 0xffffffff)
2968                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2969         else
2970                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2971         WREG32(GRBM_GFX_INDEX, data);
2972 }
2973
2974 /**
2975  * cik_create_bitmask - create a bitmask
2976  *
2977  * @bit_width: length of the mask
2978  *
2979  * create a variable length bit mask (CIK).
2980  * Returns the bitmask.
2981  */
2982 static u32 cik_create_bitmask(u32 bit_width)
2983 {
2984         u32 i, mask = 0;
2985
2986         for (i = 0; i < bit_width; i++) {
2987                 mask <<= 1;
2988                 mask |= 1;
2989         }
2990         return mask;
2991 }
2992
2993 /**
2994  * cik_get_rb_disabled - computes the mask of disabled RBs
2995  *
2996  * @rdev: radeon_device pointer
2997  * @max_rb_num: max RBs (render backends) for the asic
2998  * @se_num: number of SEs (shader engines) for the asic
2999  * @sh_per_se: number of SH blocks per SE for the asic
3000  *
3001  * Calculates the bitmask of disabled RBs (CIK).
3002  * Returns the disabled RB bitmask.
3003  */
3004 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3005                               u32 max_rb_num_per_se,
3006                               u32 sh_per_se)
3007 {
3008         u32 data, mask;
3009
3010         data = RREG32(CC_RB_BACKEND_DISABLE);
3011         if (data & 1)
3012                 data &= BACKEND_DISABLE_MASK;
3013         else
3014                 data = 0;
3015         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3016
3017         data >>= BACKEND_DISABLE_SHIFT;
3018
3019         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3020
3021         return data & mask;
3022 }
3023
3024 /**
3025  * cik_setup_rb - setup the RBs on the asic
3026  *
3027  * @rdev: radeon_device pointer
3028  * @se_num: number of SEs (shader engines) for the asic
3029  * @sh_per_se: number of SH blocks per SE for the asic
3030  * @max_rb_num: max RBs (render backends) for the asic
3031  *
3032  * Configures per-SE/SH RB registers (CIK).
3033  */
3034 static void cik_setup_rb(struct radeon_device *rdev,
3035                          u32 se_num, u32 sh_per_se,
3036                          u32 max_rb_num_per_se)
3037 {
3038         int i, j;
3039         u32 data, mask;
3040         u32 disabled_rbs = 0;
3041         u32 enabled_rbs = 0;
3042
3043         mutex_lock(&rdev->grbm_idx_mutex);
3044         for (i = 0; i < se_num; i++) {
3045                 for (j = 0; j < sh_per_se; j++) {
3046                         cik_select_se_sh(rdev, i, j);
3047                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3048                         if (rdev->family == CHIP_HAWAII)
3049                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3050                         else
3051                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3052                 }
3053         }
3054         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3055         mutex_unlock(&rdev->grbm_idx_mutex);
3056
3057         mask = 1;
3058         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3059                 if (!(disabled_rbs & mask))
3060                         enabled_rbs |= mask;
3061                 mask <<= 1;
3062         }
3063
3064         rdev->config.cik.backend_enable_mask = enabled_rbs;
3065
3066         mutex_lock(&rdev->grbm_idx_mutex);
3067         for (i = 0; i < se_num; i++) {
3068                 cik_select_se_sh(rdev, i, 0xffffffff);
3069                 data = 0;
3070                 for (j = 0; j < sh_per_se; j++) {
3071                         switch (enabled_rbs & 3) {
3072                         case 0:
3073                                 if (j == 0)
3074                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3075                                 else
3076                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3077                                 break;
3078                         case 1:
3079                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3080                                 break;
3081                         case 2:
3082                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3083                                 break;
3084                         case 3:
3085                         default:
3086                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3087                                 break;
3088                         }
3089                         enabled_rbs >>= 2;
3090                 }
3091                 WREG32(PA_SC_RASTER_CONFIG, data);
3092         }
3093         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3094         mutex_unlock(&rdev->grbm_idx_mutex);
3095 }
3096
3097 /**
3098  * cik_gpu_init - setup the 3D engine
3099  *
3100  * @rdev: radeon_device pointer
3101  *
3102  * Configures the 3D engine and tiling configuration
3103  * registers so that the 3D engine is usable.
3104  */
3105 static void cik_gpu_init(struct radeon_device *rdev)
3106 {
3107         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3108         u32 mc_shared_chmap, mc_arb_ramcfg;
3109         u32 hdp_host_path_cntl;
3110         u32 tmp;
3111         int i, j;
3112
3113         switch (rdev->family) {
3114         case CHIP_BONAIRE:
3115                 rdev->config.cik.max_shader_engines = 2;
3116                 rdev->config.cik.max_tile_pipes = 4;
3117                 rdev->config.cik.max_cu_per_sh = 7;
3118                 rdev->config.cik.max_sh_per_se = 1;
3119                 rdev->config.cik.max_backends_per_se = 2;
3120                 rdev->config.cik.max_texture_channel_caches = 4;
3121                 rdev->config.cik.max_gprs = 256;
3122                 rdev->config.cik.max_gs_threads = 32;
3123                 rdev->config.cik.max_hw_contexts = 8;
3124
3125                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3126                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3127                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3128                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3129                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3130                 break;
3131         case CHIP_HAWAII:
3132                 rdev->config.cik.max_shader_engines = 4;
3133                 rdev->config.cik.max_tile_pipes = 16;
3134                 rdev->config.cik.max_cu_per_sh = 11;
3135                 rdev->config.cik.max_sh_per_se = 1;
3136                 rdev->config.cik.max_backends_per_se = 4;
3137                 rdev->config.cik.max_texture_channel_caches = 16;
3138                 rdev->config.cik.max_gprs = 256;
3139                 rdev->config.cik.max_gs_threads = 32;
3140                 rdev->config.cik.max_hw_contexts = 8;
3141
3142                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3143                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3144                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3145                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3146                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3147                 break;
3148         case CHIP_KAVERI:
3149                 rdev->config.cik.max_shader_engines = 1;
3150                 rdev->config.cik.max_tile_pipes = 4;
3151                 rdev->config.cik.max_cu_per_sh = 8;
3152                 rdev->config.cik.max_backends_per_se = 2;
3153                 rdev->config.cik.max_sh_per_se = 1;
3154                 rdev->config.cik.max_texture_channel_caches = 4;
3155                 rdev->config.cik.max_gprs = 256;
3156                 rdev->config.cik.max_gs_threads = 16;
3157                 rdev->config.cik.max_hw_contexts = 8;
3158
3159                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3160                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3161                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3162                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3163                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3164                 break;
3165         case CHIP_KABINI:
3166         case CHIP_MULLINS:
3167         default:
3168                 rdev->config.cik.max_shader_engines = 1;
3169                 rdev->config.cik.max_tile_pipes = 2;
3170                 rdev->config.cik.max_cu_per_sh = 2;
3171                 rdev->config.cik.max_sh_per_se = 1;
3172                 rdev->config.cik.max_backends_per_se = 1;
3173                 rdev->config.cik.max_texture_channel_caches = 2;
3174                 rdev->config.cik.max_gprs = 256;
3175                 rdev->config.cik.max_gs_threads = 16;
3176                 rdev->config.cik.max_hw_contexts = 8;
3177
3178                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3179                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3180                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3181                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3182                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3183                 break;
3184         }
3185
3186         /* Initialize HDP */
3187         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3188                 WREG32((0x2c14 + j), 0x00000000);
3189                 WREG32((0x2c18 + j), 0x00000000);
3190                 WREG32((0x2c1c + j), 0x00000000);
3191                 WREG32((0x2c20 + j), 0x00000000);
3192                 WREG32((0x2c24 + j), 0x00000000);
3193         }
3194
3195         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3196         WREG32(SRBM_INT_CNTL, 0x1);
3197         WREG32(SRBM_INT_ACK, 0x1);
3198
3199         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3200
3201         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3202         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3203
3204         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3205         rdev->config.cik.mem_max_burst_length_bytes = 256;
3206         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3207         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3208         if (rdev->config.cik.mem_row_size_in_kb > 4)
3209                 rdev->config.cik.mem_row_size_in_kb = 4;
3210         /* XXX use MC settings? */
3211         rdev->config.cik.shader_engine_tile_size = 32;
3212         rdev->config.cik.num_gpus = 1;
3213         rdev->config.cik.multi_gpu_tile_size = 64;
3214
3215         /* fix up row size */
3216         gb_addr_config &= ~ROW_SIZE_MASK;
3217         switch (rdev->config.cik.mem_row_size_in_kb) {
3218         case 1:
3219         default:
3220                 gb_addr_config |= ROW_SIZE(0);
3221                 break;
3222         case 2:
3223                 gb_addr_config |= ROW_SIZE(1);
3224                 break;
3225         case 4:
3226                 gb_addr_config |= ROW_SIZE(2);
3227                 break;
3228         }
3229
3230         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3231          * not have bank info, so create a custom tiling dword.
3232          * bits 3:0   num_pipes
3233          * bits 7:4   num_banks
3234          * bits 11:8  group_size
3235          * bits 15:12 row_size
3236          */
3237         rdev->config.cik.tile_config = 0;
3238         switch (rdev->config.cik.num_tile_pipes) {
3239         case 1:
3240                 rdev->config.cik.tile_config |= (0 << 0);
3241                 break;
3242         case 2:
3243                 rdev->config.cik.tile_config |= (1 << 0);
3244                 break;
3245         case 4:
3246                 rdev->config.cik.tile_config |= (2 << 0);
3247                 break;
3248         case 8:
3249         default:
3250                 /* XXX what about 12? */
3251                 rdev->config.cik.tile_config |= (3 << 0);
3252                 break;
3253         }
3254         rdev->config.cik.tile_config |=
3255                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3256         rdev->config.cik.tile_config |=
3257                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3258         rdev->config.cik.tile_config |=
3259                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3260
3261         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3262         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3263         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3264         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3265         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3266         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3267         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3268         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3269
3270         cik_tiling_mode_table_init(rdev);
3271
3272         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3273                      rdev->config.cik.max_sh_per_se,
3274                      rdev->config.cik.max_backends_per_se);
3275
3276         rdev->config.cik.active_cus = 0;
3277         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3278                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3279                         rdev->config.cik.active_cus +=
3280                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3281                 }
3282         }
3283
3284         /* set HW defaults for 3D engine */
3285         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3286
3287         mutex_lock(&rdev->grbm_idx_mutex);
3288         /*
3289          * making sure that the following register writes will be broadcasted
3290          * to all the shaders
3291          */
3292         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3293         WREG32(SX_DEBUG_1, 0x20);
3294
3295         WREG32(TA_CNTL_AUX, 0x00010000);
3296
3297         tmp = RREG32(SPI_CONFIG_CNTL);
3298         tmp |= 0x03000000;
3299         WREG32(SPI_CONFIG_CNTL, tmp);
3300
3301         WREG32(SQ_CONFIG, 1);
3302
3303         WREG32(DB_DEBUG, 0);
3304
3305         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3306         tmp |= 0x00000400;
3307         WREG32(DB_DEBUG2, tmp);
3308
3309         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3310         tmp |= 0x00020200;
3311         WREG32(DB_DEBUG3, tmp);
3312
3313         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3314         tmp |= 0x00018208;
3315         WREG32(CB_HW_CONTROL, tmp);
3316
3317         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3318
3319         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3320                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3321                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3322                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3323
3324         WREG32(VGT_NUM_INSTANCES, 1);
3325
3326         WREG32(CP_PERFMON_CNTL, 0);
3327
3328         WREG32(SQ_CONFIG, 0);
3329
3330         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3331                                           FORCE_EOV_MAX_REZ_CNT(255)));
3332
3333         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3334                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3335
3336         WREG32(VGT_GS_VERTEX_REUSE, 16);
3337         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3338
3339         tmp = RREG32(HDP_MISC_CNTL);
3340         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3341         WREG32(HDP_MISC_CNTL, tmp);
3342
3343         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3344         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3345
3346         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3347         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3348         mutex_unlock(&rdev->grbm_idx_mutex);
3349
3350         udelay(50);
3351 }
3352
3353 /*
3354  * GPU scratch registers helpers function.
3355  */
3356 /**
3357  * cik_scratch_init - setup driver info for CP scratch regs
3358  *
3359  * @rdev: radeon_device pointer
3360  *
3361  * Set up the number and offset of the CP scratch registers.
3362  * NOTE: use of CP scratch registers is a legacy inferface and
3363  * is not used by default on newer asics (r6xx+).  On newer asics,
3364  * memory buffers are used for fences rather than scratch regs.
3365  */
3366 static void cik_scratch_init(struct radeon_device *rdev)
3367 {
3368         int i;
3369
3370         rdev->scratch.num_reg = 7;
3371         rdev->scratch.reg_base = SCRATCH_REG0;
3372         for (i = 0; i < rdev->scratch.num_reg; i++) {
3373                 rdev->scratch.free[i] = true;
3374                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3375         }
3376 }
3377
3378 /**
3379  * cik_ring_test - basic gfx ring test
3380  *
3381  * @rdev: radeon_device pointer
3382  * @ring: radeon_ring structure holding ring information
3383  *
3384  * Allocate a scratch register and write to it using the gfx ring (CIK).
3385  * Provides a basic gfx ring test to verify that the ring is working.
3386  * Used by cik_cp_gfx_resume();
3387  * Returns 0 on success, error on failure.
3388  */
3389 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3390 {
3391         uint32_t scratch;
3392         uint32_t tmp = 0;
3393         unsigned i;
3394         int r;
3395
3396         r = radeon_scratch_get(rdev, &scratch);
3397         if (r) {
3398                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3399                 return r;
3400         }
3401         WREG32(scratch, 0xCAFEDEAD);
3402         r = radeon_ring_lock(rdev, ring, 3);
3403         if (r) {
3404                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3405                 radeon_scratch_free(rdev, scratch);
3406                 return r;
3407         }
3408         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3409         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3410         radeon_ring_write(ring, 0xDEADBEEF);
3411         radeon_ring_unlock_commit(rdev, ring, false);
3412
3413         for (i = 0; i < rdev->usec_timeout; i++) {
3414                 tmp = RREG32(scratch);
3415                 if (tmp == 0xDEADBEEF)
3416                         break;
3417                 DRM_UDELAY(1);
3418         }
3419         if (i < rdev->usec_timeout) {
3420                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3421         } else {
3422                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3423                           ring->idx, scratch, tmp);
3424                 r = -EINVAL;
3425         }
3426         radeon_scratch_free(rdev, scratch);
3427         return r;
3428 }
3429
3430 /**
3431  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3432  *
3433  * @rdev: radeon_device pointer
3434  * @ridx: radeon ring index
3435  *
3436  * Emits an hdp flush on the cp.
3437  */
3438 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3439                                        int ridx)
3440 {
3441         struct radeon_ring *ring = &rdev->ring[ridx];
3442         u32 ref_and_mask;
3443
3444         switch (ring->idx) {
3445         case CAYMAN_RING_TYPE_CP1_INDEX:
3446         case CAYMAN_RING_TYPE_CP2_INDEX:
3447         default:
3448                 switch (ring->me) {
3449                 case 0:
3450                         ref_and_mask = CP2 << ring->pipe;
3451                         break;
3452                 case 1:
3453                         ref_and_mask = CP6 << ring->pipe;
3454                         break;
3455                 default:
3456                         return;
3457                 }
3458                 break;
3459         case RADEON_RING_TYPE_GFX_INDEX:
3460                 ref_and_mask = CP0;
3461                 break;
3462         }
3463
3464         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3465         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3466                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3467                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3468         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3469         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3470         radeon_ring_write(ring, ref_and_mask);
3471         radeon_ring_write(ring, ref_and_mask);
3472         radeon_ring_write(ring, 0x20); /* poll interval */
3473 }
3474
3475 /**
3476  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3477  *
3478  * @rdev: radeon_device pointer
3479  * @fence: radeon fence object
3480  *
3481  * Emits a fence sequnce number on the gfx ring and flushes
3482  * GPU caches.
3483  */
3484 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3485                              struct radeon_fence *fence)
3486 {
3487         struct radeon_ring *ring = &rdev->ring[fence->ring];
3488         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3489
3490         /* Workaround for cache flush problems. First send a dummy EOP
3491          * event down the pipe with seq one below.
3492          */
3493         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3494         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3495                                  EOP_TC_ACTION_EN |
3496                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3497                                  EVENT_INDEX(5)));
3498         radeon_ring_write(ring, addr & 0xfffffffc);
3499         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3500                                 DATA_SEL(1) | INT_SEL(0));
3501         radeon_ring_write(ring, fence->seq - 1);
3502         radeon_ring_write(ring, 0);
3503
3504         /* Then send the real EOP event down the pipe. */
3505         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3506         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3507                                  EOP_TC_ACTION_EN |
3508                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3509                                  EVENT_INDEX(5)));
3510         radeon_ring_write(ring, addr & 0xfffffffc);
3511         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3512         radeon_ring_write(ring, fence->seq);
3513         radeon_ring_write(ring, 0);
3514 }
3515
3516 /**
3517  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3518  *
3519  * @rdev: radeon_device pointer
3520  * @fence: radeon fence object
3521  *
3522  * Emits a fence sequnce number on the compute ring and flushes
3523  * GPU caches.
3524  */
3525 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3526                                  struct radeon_fence *fence)
3527 {
3528         struct radeon_ring *ring = &rdev->ring[fence->ring];
3529         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3530
3531         /* RELEASE_MEM - flush caches, send int */
3532         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3533         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3534                                  EOP_TC_ACTION_EN |
3535                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3536                                  EVENT_INDEX(5)));
3537         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3538         radeon_ring_write(ring, addr & 0xfffffffc);
3539         radeon_ring_write(ring, upper_32_bits(addr));
3540         radeon_ring_write(ring, fence->seq);
3541         radeon_ring_write(ring, 0);
3542 }
3543
3544 /**
3545  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3546  *
3547  * @rdev: radeon_device pointer
3548  * @ring: radeon ring buffer object
3549  * @semaphore: radeon semaphore object
3550  * @emit_wait: Is this a sempahore wait?
3551  *
3552  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3553  * from running ahead of semaphore waits.
3554  */
3555 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3556                              struct radeon_ring *ring,
3557                              struct radeon_semaphore *semaphore,
3558                              bool emit_wait)
3559 {
3560         uint64_t addr = semaphore->gpu_addr;
3561         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3562
3563         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3564         radeon_ring_write(ring, lower_32_bits(addr));
3565         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3566
3567         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3568                 /* Prevent the PFP from running ahead of the semaphore wait */
3569                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3570                 radeon_ring_write(ring, 0x0);
3571         }
3572
3573         return true;
3574 }
3575
3576 /**
3577  * cik_copy_cpdma - copy pages using the CP DMA engine
3578  *
3579  * @rdev: radeon_device pointer
3580  * @src_offset: src GPU address
3581  * @dst_offset: dst GPU address
3582  * @num_gpu_pages: number of GPU pages to xfer
3583  * @resv: reservation object to sync to
3584  *
3585  * Copy GPU paging using the CP DMA engine (CIK+).
3586  * Used by the radeon ttm implementation to move pages if
3587  * registered as the asic copy callback.
3588  */
3589 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3590                                     uint64_t src_offset, uint64_t dst_offset,
3591                                     unsigned num_gpu_pages,
3592                                     struct reservation_object *resv)
3593 {
3594         struct radeon_fence *fence;
3595         struct radeon_sync sync;
3596         int ring_index = rdev->asic->copy.blit_ring_index;
3597         struct radeon_ring *ring = &rdev->ring[ring_index];
3598         u32 size_in_bytes, cur_size_in_bytes, control;
3599         int i, num_loops;
3600         int r = 0;
3601
3602         radeon_sync_create(&sync);
3603
3604         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3605         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3606         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3607         if (r) {
3608                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3609                 radeon_sync_free(rdev, &sync, NULL);
3610                 return ERR_PTR(r);
3611         }
3612
3613         radeon_sync_resv(rdev, &sync, resv, false);
3614         radeon_sync_rings(rdev, &sync, ring->idx);
3615
3616         for (i = 0; i < num_loops; i++) {
3617                 cur_size_in_bytes = size_in_bytes;
3618                 if (cur_size_in_bytes > 0x1fffff)
3619                         cur_size_in_bytes = 0x1fffff;
3620                 size_in_bytes -= cur_size_in_bytes;
3621                 control = 0;
3622                 if (size_in_bytes == 0)
3623                         control |= PACKET3_DMA_DATA_CP_SYNC;
3624                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3625                 radeon_ring_write(ring, control);
3626                 radeon_ring_write(ring, lower_32_bits(src_offset));
3627                 radeon_ring_write(ring, upper_32_bits(src_offset));
3628                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3629                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3630                 radeon_ring_write(ring, cur_size_in_bytes);
3631                 src_offset += cur_size_in_bytes;
3632                 dst_offset += cur_size_in_bytes;
3633         }
3634
3635         r = radeon_fence_emit(rdev, &fence, ring->idx);
3636         if (r) {
3637                 radeon_ring_unlock_undo(rdev, ring);
3638                 radeon_sync_free(rdev, &sync, NULL);
3639                 return ERR_PTR(r);
3640         }
3641
3642         radeon_ring_unlock_commit(rdev, ring, false);
3643         radeon_sync_free(rdev, &sync, fence);
3644
3645         return fence;
3646 }
3647
3648 /*
3649  * IB stuff
3650  */
3651 /**
3652  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3653  *
3654  * @rdev: radeon_device pointer
3655  * @ib: radeon indirect buffer object
3656  *
3657  * Emits a DE (drawing engine) or CE (constant engine) IB
3658  * on the gfx ring.  IBs are usually generated by userspace
3659  * acceleration drivers and submitted to the kernel for
3660  * scheduling on the ring.  This function schedules the IB
3661  * on the gfx ring for execution by the GPU.
3662  */
3663 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3664 {
3665         struct radeon_ring *ring = &rdev->ring[ib->ring];
3666         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3667         u32 header, control = INDIRECT_BUFFER_VALID;
3668
3669         if (ib->is_const_ib) {
3670                 /* set switch buffer packet before const IB */
3671                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3672                 radeon_ring_write(ring, 0);
3673
3674                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3675         } else {
3676                 u32 next_rptr;
3677                 if (ring->rptr_save_reg) {
3678                         next_rptr = ring->wptr + 3 + 4;
3679                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3680                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3681                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3682                         radeon_ring_write(ring, next_rptr);
3683                 } else if (rdev->wb.enabled) {
3684                         next_rptr = ring->wptr + 5 + 4;
3685                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3686                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3687                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3688                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3689                         radeon_ring_write(ring, next_rptr);
3690                 }
3691
3692                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3693         }
3694
3695         control |= ib->length_dw | (vm_id << 24);
3696
3697         radeon_ring_write(ring, header);
3698         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3699         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3700         radeon_ring_write(ring, control);
3701 }
3702
3703 /**
3704  * cik_ib_test - basic gfx ring IB test
3705  *
3706  * @rdev: radeon_device pointer
3707  * @ring: radeon_ring structure holding ring information
3708  *
3709  * Allocate an IB and execute it on the gfx ring (CIK).
3710  * Provides a basic gfx ring test to verify that IBs are working.
3711  * Returns 0 on success, error on failure.
3712  */
3713 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3714 {
3715         struct radeon_ib ib;
3716         uint32_t scratch;
3717         uint32_t tmp = 0;
3718         unsigned i;
3719         int r;
3720
3721         r = radeon_scratch_get(rdev, &scratch);
3722         if (r) {
3723                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3724                 return r;
3725         }
3726         WREG32(scratch, 0xCAFEDEAD);
3727         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3728         if (r) {
3729                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3730                 radeon_scratch_free(rdev, scratch);
3731                 return r;
3732         }
3733         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3734         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3735         ib.ptr[2] = 0xDEADBEEF;
3736         ib.length_dw = 3;
3737         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3738         if (r) {
3739                 radeon_scratch_free(rdev, scratch);
3740                 radeon_ib_free(rdev, &ib);
3741                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3742                 return r;
3743         }
3744         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3745                 RADEON_USEC_IB_TEST_TIMEOUT));
3746         if (r < 0) {
3747                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3748                 radeon_scratch_free(rdev, scratch);
3749                 radeon_ib_free(rdev, &ib);
3750                 return r;
3751         } else if (r == 0) {
3752                 DRM_ERROR("radeon: fence wait timed out.\n");
3753                 radeon_scratch_free(rdev, scratch);
3754                 radeon_ib_free(rdev, &ib);
3755                 return -ETIMEDOUT;
3756         }
3757         r = 0;
3758         for (i = 0; i < rdev->usec_timeout; i++) {
3759                 tmp = RREG32(scratch);
3760                 if (tmp == 0xDEADBEEF)
3761                         break;
3762                 DRM_UDELAY(1);
3763         }
3764         if (i < rdev->usec_timeout) {
3765                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3766         } else {
3767                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3768                           scratch, tmp);
3769                 r = -EINVAL;
3770         }
3771         radeon_scratch_free(rdev, scratch);
3772         radeon_ib_free(rdev, &ib);
3773         return r;
3774 }
3775
3776 /*
3777  * CP.
3778  * On CIK, gfx and compute now have independant command processors.
3779  *
3780  * GFX
3781  * Gfx consists of a single ring and can process both gfx jobs and
3782  * compute jobs.  The gfx CP consists of three microengines (ME):
3783  * PFP - Pre-Fetch Parser
3784  * ME - Micro Engine
3785  * CE - Constant Engine
3786  * The PFP and ME make up what is considered the Drawing Engine (DE).
3787  * The CE is an asynchronous engine used for updating buffer desciptors
3788  * used by the DE so that they can be loaded into cache in parallel
3789  * while the DE is processing state update packets.
3790  *
3791  * Compute
3792  * The compute CP consists of two microengines (ME):
3793  * MEC1 - Compute MicroEngine 1
3794  * MEC2 - Compute MicroEngine 2
3795  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3796  * The queues are exposed to userspace and are programmed directly
3797  * by the compute runtime.
3798  */
3799 /**
3800  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3801  *
3802  * @rdev: radeon_device pointer
3803  * @enable: enable or disable the MEs
3804  *
3805  * Halts or unhalts the gfx MEs.
3806  */
3807 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3808 {
3809         if (enable)
3810                 WREG32(CP_ME_CNTL, 0);
3811         else {
3812                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3813                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3814                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3815                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3816         }
3817         udelay(50);
3818 }
3819
3820 /**
3821  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3822  *
3823  * @rdev: radeon_device pointer
3824  *
3825  * Loads the gfx PFP, ME, and CE ucode.
3826  * Returns 0 for success, -EINVAL if the ucode is not available.
3827  */
3828 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3829 {
3830         int i;
3831
3832         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3833                 return -EINVAL;
3834
3835         cik_cp_gfx_enable(rdev, false);
3836
3837         if (rdev->new_fw) {
3838                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3839                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3840                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3841                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3842                 const struct gfx_firmware_header_v1_0 *me_hdr =
3843                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3844                 const __le32 *fw_data;
3845                 u32 fw_size;
3846
3847                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3848                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3849                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3850
3851                 /* PFP */
3852                 fw_data = (const __le32 *)
3853                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3854                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3855                 WREG32(CP_PFP_UCODE_ADDR, 0);
3856                 for (i = 0; i < fw_size; i++)
3857                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3858                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3859
3860                 /* CE */
3861                 fw_data = (const __le32 *)
3862                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3863                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3864                 WREG32(CP_CE_UCODE_ADDR, 0);
3865                 for (i = 0; i < fw_size; i++)
3866                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3867                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3868
3869                 /* ME */
3870                 fw_data = (const __be32 *)
3871                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3872                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3873                 WREG32(CP_ME_RAM_WADDR, 0);
3874                 for (i = 0; i < fw_size; i++)
3875                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3876                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3877                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3878         } else {
3879                 const __be32 *fw_data;
3880
3881                 /* PFP */
3882                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3883                 WREG32(CP_PFP_UCODE_ADDR, 0);
3884                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3885                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3886                 WREG32(CP_PFP_UCODE_ADDR, 0);
3887
3888                 /* CE */
3889                 fw_data = (const __be32 *)rdev->ce_fw->data;
3890                 WREG32(CP_CE_UCODE_ADDR, 0);
3891                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3892                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3893                 WREG32(CP_CE_UCODE_ADDR, 0);
3894
3895                 /* ME */
3896                 fw_data = (const __be32 *)rdev->me_fw->data;
3897                 WREG32(CP_ME_RAM_WADDR, 0);
3898                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3899                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3900                 WREG32(CP_ME_RAM_WADDR, 0);
3901         }
3902
3903         return 0;
3904 }
3905
3906 /**
3907  * cik_cp_gfx_start - start the gfx ring
3908  *
3909  * @rdev: radeon_device pointer
3910  *
3911  * Enables the ring and loads the clear state context and other
3912  * packets required to init the ring.
3913  * Returns 0 for success, error for failure.
3914  */
3915 static int cik_cp_gfx_start(struct radeon_device *rdev)
3916 {
3917         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3918         int r, i;
3919
3920         /* init the CP */
3921         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3922         WREG32(CP_ENDIAN_SWAP, 0);
3923         WREG32(CP_DEVICE_ID, 1);
3924
3925         cik_cp_gfx_enable(rdev, true);
3926
3927         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3928         if (r) {
3929                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3930                 return r;
3931         }
3932
3933         /* init the CE partitions.  CE only used for gfx on CIK */
3934         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3935         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3936         radeon_ring_write(ring, 0x8000);
3937         radeon_ring_write(ring, 0x8000);
3938
3939         /* setup clear context state */
3940         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3941         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3942
3943         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3944         radeon_ring_write(ring, 0x80000000);
3945         radeon_ring_write(ring, 0x80000000);
3946
3947         for (i = 0; i < cik_default_size; i++)
3948                 radeon_ring_write(ring, cik_default_state[i]);
3949
3950         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3951         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3952
3953         /* set clear context state */
3954         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3955         radeon_ring_write(ring, 0);
3956
3957         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3958         radeon_ring_write(ring, 0x00000316);
3959         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3960         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3961
3962         radeon_ring_unlock_commit(rdev, ring, false);
3963
3964         return 0;
3965 }
3966
3967 /**
3968  * cik_cp_gfx_fini - stop the gfx ring
3969  *
3970  * @rdev: radeon_device pointer
3971  *
3972  * Stop the gfx ring and tear down the driver ring
3973  * info.
3974  */
3975 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3976 {
3977         cik_cp_gfx_enable(rdev, false);
3978         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3979 }
3980
3981 /**
3982  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3983  *
3984  * @rdev: radeon_device pointer
3985  *
3986  * Program the location and size of the gfx ring buffer
3987  * and test it to make sure it's working.
3988  * Returns 0 for success, error for failure.
3989  */
3990 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3991 {
3992         struct radeon_ring *ring;
3993         u32 tmp;
3994         u32 rb_bufsz;
3995         u64 rb_addr;
3996         int r;
3997
3998         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3999         if (rdev->family != CHIP_HAWAII)
4000                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4001
4002         /* Set the write pointer delay */
4003         WREG32(CP_RB_WPTR_DELAY, 0);
4004
4005         /* set the RB to use vmid 0 */
4006         WREG32(CP_RB_VMID, 0);
4007
4008         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4009
4010         /* ring 0 - compute and gfx */
4011         /* Set ring buffer size */
4012         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4013         rb_bufsz = order_base_2(ring->ring_size / 8);
4014         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4015 #ifdef __BIG_ENDIAN
4016         tmp |= BUF_SWAP_32BIT;
4017 #endif
4018         WREG32(CP_RB0_CNTL, tmp);
4019
4020         /* Initialize the ring buffer's read and write pointers */
4021         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4022         ring->wptr = 0;
4023         WREG32(CP_RB0_WPTR, ring->wptr);
4024
4025         /* set the wb address wether it's enabled or not */
4026         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4027         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4028
4029         /* scratch register shadowing is no longer supported */
4030         WREG32(SCRATCH_UMSK, 0);
4031
4032         if (!rdev->wb.enabled)
4033                 tmp |= RB_NO_UPDATE;
4034
4035         mdelay(1);
4036         WREG32(CP_RB0_CNTL, tmp);
4037
4038         rb_addr = ring->gpu_addr >> 8;
4039         WREG32(CP_RB0_BASE, rb_addr);
4040         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4041
4042         /* start the ring */
4043         cik_cp_gfx_start(rdev);
4044         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4045         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4046         if (r) {
4047                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4048                 return r;
4049         }
4050
4051         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4052                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4053
4054         return 0;
4055 }
4056
4057 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4058                      struct radeon_ring *ring)
4059 {
4060         u32 rptr;
4061
4062         if (rdev->wb.enabled)
4063                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4064         else
4065                 rptr = RREG32(CP_RB0_RPTR);
4066
4067         return rptr;
4068 }
4069
4070 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4071                      struct radeon_ring *ring)
4072 {
4073         return RREG32(CP_RB0_WPTR);
4074 }
4075
4076 void cik_gfx_set_wptr(struct radeon_device *rdev,
4077                       struct radeon_ring *ring)
4078 {
4079         WREG32(CP_RB0_WPTR, ring->wptr);
4080         (void)RREG32(CP_RB0_WPTR);
4081 }
4082
4083 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4084                          struct radeon_ring *ring)
4085 {
4086         u32 rptr;
4087
4088         if (rdev->wb.enabled) {
4089                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4090         } else {
4091                 mutex_lock(&rdev->srbm_mutex);
4092                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4093                 rptr = RREG32(CP_HQD_PQ_RPTR);
4094                 cik_srbm_select(rdev, 0, 0, 0, 0);
4095                 mutex_unlock(&rdev->srbm_mutex);
4096         }
4097
4098         return rptr;
4099 }
4100
4101 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4102                          struct radeon_ring *ring)
4103 {
4104         u32 wptr;
4105
4106         if (rdev->wb.enabled) {
4107                 /* XXX check if swapping is necessary on BE */
4108                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4109         } else {
4110                 mutex_lock(&rdev->srbm_mutex);
4111                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4112                 wptr = RREG32(CP_HQD_PQ_WPTR);
4113                 cik_srbm_select(rdev, 0, 0, 0, 0);
4114                 mutex_unlock(&rdev->srbm_mutex);
4115         }
4116
4117         return wptr;
4118 }
4119
4120 void cik_compute_set_wptr(struct radeon_device *rdev,
4121                           struct radeon_ring *ring)
4122 {
4123         /* XXX check if swapping is necessary on BE */
4124         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4125         WDOORBELL32(ring->doorbell_index, ring->wptr);
4126 }
4127
4128 static void cik_compute_stop(struct radeon_device *rdev,
4129                              struct radeon_ring *ring)
4130 {
4131         u32 j, tmp;
4132
4133         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4134         /* Disable wptr polling. */
4135         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4136         tmp &= ~WPTR_POLL_EN;
4137         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4138         /* Disable HQD. */
4139         if (RREG32(CP_HQD_ACTIVE) & 1) {
4140                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4141                 for (j = 0; j < rdev->usec_timeout; j++) {
4142                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4143                                 break;
4144                         udelay(1);
4145                 }
4146                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4147                 WREG32(CP_HQD_PQ_RPTR, 0);
4148                 WREG32(CP_HQD_PQ_WPTR, 0);
4149         }
4150         cik_srbm_select(rdev, 0, 0, 0, 0);
4151 }
4152
4153 /**
4154  * cik_cp_compute_enable - enable/disable the compute CP MEs
4155  *
4156  * @rdev: radeon_device pointer
4157  * @enable: enable or disable the MEs
4158  *
4159  * Halts or unhalts the compute MEs.
4160  */
4161 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4162 {
4163         if (enable)
4164                 WREG32(CP_MEC_CNTL, 0);
4165         else {
4166                 /*
4167                  * To make hibernation reliable we need to clear compute ring
4168                  * configuration before halting the compute ring.
4169                  */
4170                 mutex_lock(&rdev->srbm_mutex);
4171                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4172                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4173                 mutex_unlock(&rdev->srbm_mutex);
4174
4175                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4176                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4177                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4178         }
4179         udelay(50);
4180 }
4181
4182 /**
4183  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4184  *
4185  * @rdev: radeon_device pointer
4186  *
4187  * Loads the compute MEC1&2 ucode.
4188  * Returns 0 for success, -EINVAL if the ucode is not available.
4189  */
4190 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4191 {
4192         int i;
4193
4194         if (!rdev->mec_fw)
4195                 return -EINVAL;
4196
4197         cik_cp_compute_enable(rdev, false);
4198
4199         if (rdev->new_fw) {
4200                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4201                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4202                 const __le32 *fw_data;
4203                 u32 fw_size;
4204
4205                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4206
4207                 /* MEC1 */
4208                 fw_data = (const __le32 *)
4209                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4210                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4211                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4212                 for (i = 0; i < fw_size; i++)
4213                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4214                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4215
4216                 /* MEC2 */
4217                 if (rdev->family == CHIP_KAVERI) {
4218                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4219                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4220
4221                         fw_data = (const __le32 *)
4222                                 (rdev->mec2_fw->data +
4223                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4224                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4225                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4226                         for (i = 0; i < fw_size; i++)
4227                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4228                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4229                 }
4230         } else {
4231                 const __be32 *fw_data;
4232
4233                 /* MEC1 */
4234                 fw_data = (const __be32 *)rdev->mec_fw->data;
4235                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4236                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4237                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4238                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4239
4240                 if (rdev->family == CHIP_KAVERI) {
4241                         /* MEC2 */
4242                         fw_data = (const __be32 *)rdev->mec_fw->data;
4243                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4244                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4245                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4246                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4247                 }
4248         }
4249
4250         return 0;
4251 }
4252
4253 /**
4254  * cik_cp_compute_start - start the compute queues
4255  *
4256  * @rdev: radeon_device pointer
4257  *
4258  * Enable the compute queues.
4259  * Returns 0 for success, error for failure.
4260  */
4261 static int cik_cp_compute_start(struct radeon_device *rdev)
4262 {
4263         cik_cp_compute_enable(rdev, true);
4264
4265         return 0;
4266 }
4267
4268 /**
4269  * cik_cp_compute_fini - stop the compute queues
4270  *
4271  * @rdev: radeon_device pointer
4272  *
4273  * Stop the compute queues and tear down the driver queue
4274  * info.
4275  */
4276 static void cik_cp_compute_fini(struct radeon_device *rdev)
4277 {
4278         int i, idx, r;
4279
4280         cik_cp_compute_enable(rdev, false);
4281
4282         for (i = 0; i < 2; i++) {
4283                 if (i == 0)
4284                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4285                 else
4286                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4287
4288                 if (rdev->ring[idx].mqd_obj) {
4289                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4290                         if (unlikely(r != 0))
4291                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4292
4293                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4294                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4295
4296                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4297                         rdev->ring[idx].mqd_obj = NULL;
4298                 }
4299         }
4300 }
4301
4302 static void cik_mec_fini(struct radeon_device *rdev)
4303 {
4304         int r;
4305
4306         if (rdev->mec.hpd_eop_obj) {
4307                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4308                 if (unlikely(r != 0))
4309                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4310                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4311                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4312
4313                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4314                 rdev->mec.hpd_eop_obj = NULL;
4315         }
4316 }
4317
4318 #define MEC_HPD_SIZE 2048
4319
4320 static int cik_mec_init(struct radeon_device *rdev)
4321 {
4322         int r;
4323         u32 *hpd;
4324
4325         /*
4326          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4327          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4328          * Nonetheless, we assign only 1 pipe because all other pipes will
4329          * be handled by KFD
4330          */
4331         rdev->mec.num_mec = 1;
4332         rdev->mec.num_pipe = 1;
4333         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4334
4335         if (rdev->mec.hpd_eop_obj == NULL) {
4336                 r = radeon_bo_create(rdev,
4337                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4338                                      PAGE_SIZE, true,
4339                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4340                                      &rdev->mec.hpd_eop_obj);
4341                 if (r) {
4342                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4343                         return r;
4344                 }
4345         }
4346
4347         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4348         if (unlikely(r != 0)) {
4349                 cik_mec_fini(rdev);
4350                 return r;
4351         }
4352         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4353                           &rdev->mec.hpd_eop_gpu_addr);
4354         if (r) {
4355                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4356                 cik_mec_fini(rdev);
4357                 return r;
4358         }
4359         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4360         if (r) {
4361                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4362                 cik_mec_fini(rdev);
4363                 return r;
4364         }
4365
4366         /* clear memory.  Not sure if this is required or not */
4367         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4368
4369         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4370         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4371
4372         return 0;
4373 }
4374
4375 struct hqd_registers
4376 {
4377         u32 cp_mqd_base_addr;
4378         u32 cp_mqd_base_addr_hi;
4379         u32 cp_hqd_active;
4380         u32 cp_hqd_vmid;
4381         u32 cp_hqd_persistent_state;
4382         u32 cp_hqd_pipe_priority;
4383         u32 cp_hqd_queue_priority;
4384         u32 cp_hqd_quantum;
4385         u32 cp_hqd_pq_base;
4386         u32 cp_hqd_pq_base_hi;
4387         u32 cp_hqd_pq_rptr;
4388         u32 cp_hqd_pq_rptr_report_addr;
4389         u32 cp_hqd_pq_rptr_report_addr_hi;
4390         u32 cp_hqd_pq_wptr_poll_addr;
4391         u32 cp_hqd_pq_wptr_poll_addr_hi;
4392         u32 cp_hqd_pq_doorbell_control;
4393         u32 cp_hqd_pq_wptr;
4394         u32 cp_hqd_pq_control;
4395         u32 cp_hqd_ib_base_addr;
4396         u32 cp_hqd_ib_base_addr_hi;
4397         u32 cp_hqd_ib_rptr;
4398         u32 cp_hqd_ib_control;
4399         u32 cp_hqd_iq_timer;
4400         u32 cp_hqd_iq_rptr;
4401         u32 cp_hqd_dequeue_request;
4402         u32 cp_hqd_dma_offload;
4403         u32 cp_hqd_sema_cmd;
4404         u32 cp_hqd_msg_type;
4405         u32 cp_hqd_atomic0_preop_lo;
4406         u32 cp_hqd_atomic0_preop_hi;
4407         u32 cp_hqd_atomic1_preop_lo;
4408         u32 cp_hqd_atomic1_preop_hi;
4409         u32 cp_hqd_hq_scheduler0;
4410         u32 cp_hqd_hq_scheduler1;
4411         u32 cp_mqd_control;
4412 };
4413
4414 struct bonaire_mqd
4415 {
4416         u32 header;
4417         u32 dispatch_initiator;
4418         u32 dimensions[3];
4419         u32 start_idx[3];
4420         u32 num_threads[3];
4421         u32 pipeline_stat_enable;
4422         u32 perf_counter_enable;
4423         u32 pgm[2];
4424         u32 tba[2];
4425         u32 tma[2];
4426         u32 pgm_rsrc[2];
4427         u32 vmid;
4428         u32 resource_limits;
4429         u32 static_thread_mgmt01[2];
4430         u32 tmp_ring_size;
4431         u32 static_thread_mgmt23[2];
4432         u32 restart[3];
4433         u32 thread_trace_enable;
4434         u32 reserved1;
4435         u32 user_data[16];
4436         u32 vgtcs_invoke_count[2];
4437         struct hqd_registers queue_state;
4438         u32 dequeue_cntr;
4439         u32 interrupt_queue[64];
4440 };
4441
4442 /**
4443  * cik_cp_compute_resume - setup the compute queue registers
4444  *
4445  * @rdev: radeon_device pointer
4446  *
4447  * Program the compute queues and test them to make sure they
4448  * are working.
4449  * Returns 0 for success, error for failure.
4450  */
4451 static int cik_cp_compute_resume(struct radeon_device *rdev)
4452 {
4453         int r, i, j, idx;
4454         u32 tmp;
4455         bool use_doorbell = true;
4456         u64 hqd_gpu_addr;
4457         u64 mqd_gpu_addr;
4458         u64 eop_gpu_addr;
4459         u64 wb_gpu_addr;
4460         u32 *buf;
4461         struct bonaire_mqd *mqd;
4462
4463         r = cik_cp_compute_start(rdev);
4464         if (r)
4465                 return r;
4466
4467         /* fix up chicken bits */
4468         tmp = RREG32(CP_CPF_DEBUG);
4469         tmp |= (1 << 23);
4470         WREG32(CP_CPF_DEBUG, tmp);
4471
4472         /* init the pipes */
4473         mutex_lock(&rdev->srbm_mutex);
4474
4475         for (i = 0; i < rdev->mec.num_pipe; ++i) {
4476                 cik_srbm_select(rdev, 0, i, 0, 0);
4477
4478                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4479                 /* write the EOP addr */
4480                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4481                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4482
4483                 /* set the VMID assigned */
4484                 WREG32(CP_HPD_EOP_VMID, 0);
4485
4486                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4487                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4488                 tmp &= ~EOP_SIZE_MASK;
4489                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4490                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4491
4492         }
4493         mutex_unlock(&rdev->srbm_mutex);
4494
4495         /* init the queues.  Just two for now. */
4496         for (i = 0; i < 2; i++) {
4497                 if (i == 0)
4498                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4499                 else
4500                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4501
4502                 if (rdev->ring[idx].mqd_obj == NULL) {
4503                         r = radeon_bo_create(rdev,
4504                                              sizeof(struct bonaire_mqd),
4505                                              PAGE_SIZE, true,
4506                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4507                                              NULL, &rdev->ring[idx].mqd_obj);
4508                         if (r) {
4509                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4510                                 return r;
4511                         }
4512                 }
4513
4514                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4515                 if (unlikely(r != 0)) {
4516                         cik_cp_compute_fini(rdev);
4517                         return r;
4518                 }
4519                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4520                                   &mqd_gpu_addr);
4521                 if (r) {
4522                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4523                         cik_cp_compute_fini(rdev);
4524                         return r;
4525                 }
4526                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4527                 if (r) {
4528                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4529                         cik_cp_compute_fini(rdev);
4530                         return r;
4531                 }
4532
4533                 /* init the mqd struct */
4534                 memset(buf, 0, sizeof(struct bonaire_mqd));
4535
4536                 mqd = (struct bonaire_mqd *)buf;
4537                 mqd->header = 0xC0310800;
4538                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4539                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4540                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4541                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4542
4543                 mutex_lock(&rdev->srbm_mutex);
4544                 cik_srbm_select(rdev, rdev->ring[idx].me,
4545                                 rdev->ring[idx].pipe,
4546                                 rdev->ring[idx].queue, 0);
4547
4548                 /* disable wptr polling */
4549                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4550                 tmp &= ~WPTR_POLL_EN;
4551                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4552
4553                 /* enable doorbell? */
4554                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4555                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4556                 if (use_doorbell)
4557                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4558                 else
4559                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4560                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4561                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4562
4563                 /* disable the queue if it's active */
4564                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4565                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4566                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4567                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4568                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4569                         for (j = 0; j < rdev->usec_timeout; j++) {
4570                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4571                                         break;
4572                                 udelay(1);
4573                         }
4574                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4575                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4576                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4577                 }
4578
4579                 /* set the pointer to the MQD */
4580                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4581                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4582                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4583                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4584                 /* set MQD vmid to 0 */
4585                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4586                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4587                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4588
4589                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4590                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4591                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4592                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4593                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4594                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4595
4596                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4597                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4598                 mqd->queue_state.cp_hqd_pq_control &=
4599                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4600
4601                 mqd->queue_state.cp_hqd_pq_control |=
4602                         order_base_2(rdev->ring[idx].ring_size / 8);
4603                 mqd->queue_state.cp_hqd_pq_control |=
4604                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4605 #ifdef __BIG_ENDIAN
4606                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4607 #endif
4608                 mqd->queue_state.cp_hqd_pq_control &=
4609                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4610                 mqd->queue_state.cp_hqd_pq_control |=
4611                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4612                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4613
4614                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4615                 if (i == 0)
4616                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4617                 else
4618                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4619                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4620                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4621                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4622                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4623                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4624
4625                 /* set the wb address wether it's enabled or not */
4626                 if (i == 0)
4627                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4628                 else
4629                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4630                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4631                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4632                         upper_32_bits(wb_gpu_addr) & 0xffff;
4633                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4634                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4635                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4636                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4637
4638                 /* enable the doorbell if requested */
4639                 if (use_doorbell) {
4640                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4641                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4642                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4643                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4644                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4645                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4646                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4647                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4648
4649                 } else {
4650                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4651                 }
4652                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4653                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4654
4655                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4656                 rdev->ring[idx].wptr = 0;
4657                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4658                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4659                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4660
4661                 /* set the vmid for the queue */
4662                 mqd->queue_state.cp_hqd_vmid = 0;
4663                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4664
4665                 /* activate the queue */
4666                 mqd->queue_state.cp_hqd_active = 1;
4667                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4668
4669                 cik_srbm_select(rdev, 0, 0, 0, 0);
4670                 mutex_unlock(&rdev->srbm_mutex);
4671
4672                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4673                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4674
4675                 rdev->ring[idx].ready = true;
4676                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4677                 if (r)
4678                         rdev->ring[idx].ready = false;
4679         }
4680
4681         return 0;
4682 }
4683
4684 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4685 {
4686         cik_cp_gfx_enable(rdev, enable);
4687         cik_cp_compute_enable(rdev, enable);
4688 }
4689
4690 static int cik_cp_load_microcode(struct radeon_device *rdev)
4691 {
4692         int r;
4693
4694         r = cik_cp_gfx_load_microcode(rdev);
4695         if (r)
4696                 return r;
4697         r = cik_cp_compute_load_microcode(rdev);
4698         if (r)
4699                 return r;
4700
4701         return 0;
4702 }
4703
4704 static void cik_cp_fini(struct radeon_device *rdev)
4705 {
4706         cik_cp_gfx_fini(rdev);
4707         cik_cp_compute_fini(rdev);
4708 }
4709
4710 static int cik_cp_resume(struct radeon_device *rdev)
4711 {
4712         int r;
4713
4714         cik_enable_gui_idle_interrupt(rdev, false);
4715
4716         r = cik_cp_load_microcode(rdev);
4717         if (r)
4718                 return r;
4719
4720         r = cik_cp_gfx_resume(rdev);
4721         if (r)
4722                 return r;
4723         r = cik_cp_compute_resume(rdev);
4724         if (r)
4725                 return r;
4726
4727         cik_enable_gui_idle_interrupt(rdev, true);
4728
4729         return 0;
4730 }
4731
4732 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4733 {
4734         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4735                 RREG32(GRBM_STATUS));
4736         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4737                 RREG32(GRBM_STATUS2));
4738         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4739                 RREG32(GRBM_STATUS_SE0));
4740         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4741                 RREG32(GRBM_STATUS_SE1));
4742         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4743                 RREG32(GRBM_STATUS_SE2));
4744         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4745                 RREG32(GRBM_STATUS_SE3));
4746         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4747                 RREG32(SRBM_STATUS));
4748         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4749                 RREG32(SRBM_STATUS2));
4750         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4751                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4752         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4753                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4754         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4755         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4756                  RREG32(CP_STALLED_STAT1));
4757         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4758                  RREG32(CP_STALLED_STAT2));
4759         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4760                  RREG32(CP_STALLED_STAT3));
4761         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4762                  RREG32(CP_CPF_BUSY_STAT));
4763         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4764                  RREG32(CP_CPF_STALLED_STAT1));
4765         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4766         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4767         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4768                  RREG32(CP_CPC_STALLED_STAT1));
4769         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4770 }
4771
4772 /**
4773  * cik_gpu_check_soft_reset - check which blocks are busy
4774  *
4775  * @rdev: radeon_device pointer
4776  *
4777  * Check which blocks are busy and return the relevant reset
4778  * mask to be used by cik_gpu_soft_reset().
4779  * Returns a mask of the blocks to be reset.
4780  */
4781 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4782 {
4783         u32 reset_mask = 0;
4784         u32 tmp;
4785
4786         /* GRBM_STATUS */
4787         tmp = RREG32(GRBM_STATUS);
4788         if (tmp & (PA_BUSY | SC_BUSY |
4789                    BCI_BUSY | SX_BUSY |
4790                    TA_BUSY | VGT_BUSY |
4791                    DB_BUSY | CB_BUSY |
4792                    GDS_BUSY | SPI_BUSY |
4793                    IA_BUSY | IA_BUSY_NO_DMA))
4794                 reset_mask |= RADEON_RESET_GFX;
4795
4796         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4797                 reset_mask |= RADEON_RESET_CP;
4798
4799         /* GRBM_STATUS2 */
4800         tmp = RREG32(GRBM_STATUS2);
4801         if (tmp & RLC_BUSY)
4802                 reset_mask |= RADEON_RESET_RLC;
4803
4804         /* SDMA0_STATUS_REG */
4805         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4806         if (!(tmp & SDMA_IDLE))
4807                 reset_mask |= RADEON_RESET_DMA;
4808
4809         /* SDMA1_STATUS_REG */
4810         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4811         if (!(tmp & SDMA_IDLE))
4812                 reset_mask |= RADEON_RESET_DMA1;
4813
4814         /* SRBM_STATUS2 */
4815         tmp = RREG32(SRBM_STATUS2);
4816         if (tmp & SDMA_BUSY)
4817                 reset_mask |= RADEON_RESET_DMA;
4818
4819         if (tmp & SDMA1_BUSY)
4820                 reset_mask |= RADEON_RESET_DMA1;
4821
4822         /* SRBM_STATUS */
4823         tmp = RREG32(SRBM_STATUS);
4824
4825         if (tmp & IH_BUSY)
4826                 reset_mask |= RADEON_RESET_IH;
4827
4828         if (tmp & SEM_BUSY)
4829                 reset_mask |= RADEON_RESET_SEM;
4830
4831         if (tmp & GRBM_RQ_PENDING)
4832                 reset_mask |= RADEON_RESET_GRBM;
4833
4834         if (tmp & VMC_BUSY)
4835                 reset_mask |= RADEON_RESET_VMC;
4836
4837         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4838                    MCC_BUSY | MCD_BUSY))
4839                 reset_mask |= RADEON_RESET_MC;
4840
4841         if (evergreen_is_display_hung(rdev))
4842                 reset_mask |= RADEON_RESET_DISPLAY;
4843
4844         /* Skip MC reset as it's mostly likely not hung, just busy */
4845         if (reset_mask & RADEON_RESET_MC) {
4846                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4847                 reset_mask &= ~RADEON_RESET_MC;
4848         }
4849
4850         return reset_mask;
4851 }
4852
4853 /**
4854  * cik_gpu_soft_reset - soft reset GPU
4855  *
4856  * @rdev: radeon_device pointer
4857  * @reset_mask: mask of which blocks to reset
4858  *
4859  * Soft reset the blocks specified in @reset_mask.
4860  */
4861 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4862 {
4863         struct evergreen_mc_save save;
4864         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4865         u32 tmp;
4866
4867         if (reset_mask == 0)
4868                 return;
4869
4870         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4871
4872         cik_print_gpu_status_regs(rdev);
4873         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4874                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4875         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4876                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4877
4878         /* disable CG/PG */
4879         cik_fini_pg(rdev);
4880         cik_fini_cg(rdev);
4881
4882         /* stop the rlc */
4883         cik_rlc_stop(rdev);
4884
4885         /* Disable GFX parsing/prefetching */
4886         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4887
4888         /* Disable MEC parsing/prefetching */
4889         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4890
4891         if (reset_mask & RADEON_RESET_DMA) {
4892                 /* sdma0 */
4893                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4894                 tmp |= SDMA_HALT;
4895                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4896         }
4897         if (reset_mask & RADEON_RESET_DMA1) {
4898                 /* sdma1 */
4899                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4900                 tmp |= SDMA_HALT;
4901                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4902         }
4903
4904         evergreen_mc_stop(rdev, &save);
4905         if (evergreen_mc_wait_for_idle(rdev)) {
4906                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4907         }
4908
4909         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4910                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4911
4912         if (reset_mask & RADEON_RESET_CP) {
4913                 grbm_soft_reset |= SOFT_RESET_CP;
4914
4915                 srbm_soft_reset |= SOFT_RESET_GRBM;
4916         }
4917
4918         if (reset_mask & RADEON_RESET_DMA)
4919                 srbm_soft_reset |= SOFT_RESET_SDMA;
4920
4921         if (reset_mask & RADEON_RESET_DMA1)
4922                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4923
4924         if (reset_mask & RADEON_RESET_DISPLAY)
4925                 srbm_soft_reset |= SOFT_RESET_DC;
4926
4927         if (reset_mask & RADEON_RESET_RLC)
4928                 grbm_soft_reset |= SOFT_RESET_RLC;
4929
4930         if (reset_mask & RADEON_RESET_SEM)
4931                 srbm_soft_reset |= SOFT_RESET_SEM;
4932
4933         if (reset_mask & RADEON_RESET_IH)
4934                 srbm_soft_reset |= SOFT_RESET_IH;
4935
4936         if (reset_mask & RADEON_RESET_GRBM)
4937                 srbm_soft_reset |= SOFT_RESET_GRBM;
4938
4939         if (reset_mask & RADEON_RESET_VMC)
4940                 srbm_soft_reset |= SOFT_RESET_VMC;
4941
4942         if (!(rdev->flags & RADEON_IS_IGP)) {
4943                 if (reset_mask & RADEON_RESET_MC)
4944                         srbm_soft_reset |= SOFT_RESET_MC;
4945         }
4946
4947         if (grbm_soft_reset) {
4948                 tmp = RREG32(GRBM_SOFT_RESET);
4949                 tmp |= grbm_soft_reset;
4950                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4951                 WREG32(GRBM_SOFT_RESET, tmp);
4952                 tmp = RREG32(GRBM_SOFT_RESET);
4953
4954                 udelay(50);
4955
4956                 tmp &= ~grbm_soft_reset;
4957                 WREG32(GRBM_SOFT_RESET, tmp);
4958                 tmp = RREG32(GRBM_SOFT_RESET);
4959         }
4960
4961         if (srbm_soft_reset) {
4962                 tmp = RREG32(SRBM_SOFT_RESET);
4963                 tmp |= srbm_soft_reset;
4964                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4965                 WREG32(SRBM_SOFT_RESET, tmp);
4966                 tmp = RREG32(SRBM_SOFT_RESET);
4967
4968                 udelay(50);
4969
4970                 tmp &= ~srbm_soft_reset;
4971                 WREG32(SRBM_SOFT_RESET, tmp);
4972                 tmp = RREG32(SRBM_SOFT_RESET);
4973         }
4974
4975         /* Wait a little for things to settle down */
4976         udelay(50);
4977
4978         evergreen_mc_resume(rdev, &save);
4979         udelay(50);
4980
4981         cik_print_gpu_status_regs(rdev);
4982 }
4983
4984 struct kv_reset_save_regs {
4985         u32 gmcon_reng_execute;
4986         u32 gmcon_misc;
4987         u32 gmcon_misc3;
4988 };
4989
4990 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4991                                    struct kv_reset_save_regs *save)
4992 {
4993         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4994         save->gmcon_misc = RREG32(GMCON_MISC);
4995         save->gmcon_misc3 = RREG32(GMCON_MISC3);
4996
4997         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4998         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4999                                                 STCTRL_STUTTER_EN));
5000 }
5001
5002 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5003                                       struct kv_reset_save_regs *save)
5004 {
5005         int i;
5006
5007         WREG32(GMCON_PGFSM_WRITE, 0);
5008         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5009
5010         for (i = 0; i < 5; i++)
5011                 WREG32(GMCON_PGFSM_WRITE, 0);
5012
5013         WREG32(GMCON_PGFSM_WRITE, 0);
5014         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5015
5016         for (i = 0; i < 5; i++)
5017                 WREG32(GMCON_PGFSM_WRITE, 0);
5018
5019         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5020         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5021
5022         for (i = 0; i < 5; i++)
5023                 WREG32(GMCON_PGFSM_WRITE, 0);
5024
5025         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5026         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5027
5028         for (i = 0; i < 5; i++)
5029                 WREG32(GMCON_PGFSM_WRITE, 0);
5030
5031         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5032         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5033
5034         for (i = 0; i < 5; i++)
5035                 WREG32(GMCON_PGFSM_WRITE, 0);
5036
5037         WREG32(GMCON_PGFSM_WRITE, 0);
5038         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5039
5040         for (i = 0; i < 5; i++)
5041                 WREG32(GMCON_PGFSM_WRITE, 0);
5042
5043         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5044         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5045
5046         for (i = 0; i < 5; i++)
5047                 WREG32(GMCON_PGFSM_WRITE, 0);
5048
5049         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5050         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5051
5052         for (i = 0; i < 5; i++)
5053                 WREG32(GMCON_PGFSM_WRITE, 0);
5054
5055         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5056         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5057
5058         for (i = 0; i < 5; i++)
5059                 WREG32(GMCON_PGFSM_WRITE, 0);
5060
5061         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5062         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5063
5064         for (i = 0; i < 5; i++)
5065                 WREG32(GMCON_PGFSM_WRITE, 0);
5066
5067         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5068         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5069
5070         WREG32(GMCON_MISC3, save->gmcon_misc3);
5071         WREG32(GMCON_MISC, save->gmcon_misc);
5072         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5073 }
5074
5075 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5076 {
5077         struct evergreen_mc_save save;
5078         struct kv_reset_save_regs kv_save = { 0 };
5079         u32 tmp, i;
5080
5081         dev_info(rdev->dev, "GPU pci config reset\n");
5082
5083         /* disable dpm? */
5084
5085         /* disable cg/pg */
5086         cik_fini_pg(rdev);
5087         cik_fini_cg(rdev);
5088
5089         /* Disable GFX parsing/prefetching */
5090         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5091
5092         /* Disable MEC parsing/prefetching */
5093         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5094
5095         /* sdma0 */
5096         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5097         tmp |= SDMA_HALT;
5098         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5099         /* sdma1 */
5100         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5101         tmp |= SDMA_HALT;
5102         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5103         /* XXX other engines? */
5104
5105         /* halt the rlc, disable cp internal ints */
5106         cik_rlc_stop(rdev);
5107
5108         udelay(50);
5109
5110         /* disable mem access */
5111         evergreen_mc_stop(rdev, &save);
5112         if (evergreen_mc_wait_for_idle(rdev)) {
5113                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5114         }
5115
5116         if (rdev->flags & RADEON_IS_IGP)
5117                 kv_save_regs_for_reset(rdev, &kv_save);
5118
5119         /* disable BM */
5120         pci_clear_master(rdev->pdev);
5121         /* reset */
5122         radeon_pci_config_reset(rdev);
5123
5124         udelay(100);
5125
5126         /* wait for asic to come out of reset */
5127         for (i = 0; i < rdev->usec_timeout; i++) {
5128                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5129                         break;
5130                 udelay(1);
5131         }
5132
5133         /* does asic init need to be run first??? */
5134         if (rdev->flags & RADEON_IS_IGP)
5135                 kv_restore_regs_for_reset(rdev, &kv_save);
5136 }
5137
5138 /**
5139  * cik_asic_reset - soft reset GPU
5140  *
5141  * @rdev: radeon_device pointer
5142  * @hard: force hard reset
5143  *
5144  * Look up which blocks are hung and attempt
5145  * to reset them.
5146  * Returns 0 for success.
5147  */
5148 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5149 {
5150         u32 reset_mask;
5151
5152         if (hard) {
5153                 cik_gpu_pci_config_reset(rdev);
5154                 return 0;
5155         }
5156
5157         reset_mask = cik_gpu_check_soft_reset(rdev);
5158
5159         if (reset_mask)
5160                 r600_set_bios_scratch_engine_hung(rdev, true);
5161
5162         /* try soft reset */
5163         cik_gpu_soft_reset(rdev, reset_mask);
5164
5165         reset_mask = cik_gpu_check_soft_reset(rdev);
5166
5167         /* try pci config reset */
5168         if (reset_mask && radeon_hard_reset)
5169                 cik_gpu_pci_config_reset(rdev);
5170
5171         reset_mask = cik_gpu_check_soft_reset(rdev);
5172
5173         if (!reset_mask)
5174                 r600_set_bios_scratch_engine_hung(rdev, false);
5175
5176         return 0;
5177 }
5178
5179 /**
5180  * cik_gfx_is_lockup - check if the 3D engine is locked up
5181  *
5182  * @rdev: radeon_device pointer
5183  * @ring: radeon_ring structure holding ring information
5184  *
5185  * Check if the 3D engine is locked up (CIK).
5186  * Returns true if the engine is locked, false if not.
5187  */
5188 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5189 {
5190         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5191
5192         if (!(reset_mask & (RADEON_RESET_GFX |
5193                             RADEON_RESET_COMPUTE |
5194                             RADEON_RESET_CP))) {
5195                 radeon_ring_lockup_update(rdev, ring);
5196                 return false;
5197         }
5198         return radeon_ring_test_lockup(rdev, ring);
5199 }
5200
5201 /* MC */
5202 /**
5203  * cik_mc_program - program the GPU memory controller
5204  *
5205  * @rdev: radeon_device pointer
5206  *
5207  * Set the location of vram, gart, and AGP in the GPU's
5208  * physical address space (CIK).
5209  */
5210 static void cik_mc_program(struct radeon_device *rdev)
5211 {
5212         struct evergreen_mc_save save;
5213         u32 tmp;
5214         int i, j;
5215
5216         /* Initialize HDP */
5217         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5218                 WREG32((0x2c14 + j), 0x00000000);
5219                 WREG32((0x2c18 + j), 0x00000000);
5220                 WREG32((0x2c1c + j), 0x00000000);
5221                 WREG32((0x2c20 + j), 0x00000000);
5222                 WREG32((0x2c24 + j), 0x00000000);
5223         }
5224         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5225
5226         evergreen_mc_stop(rdev, &save);
5227         if (radeon_mc_wait_for_idle(rdev)) {
5228                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5229         }
5230         /* Lockout access through VGA aperture*/
5231         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5232         /* Update configuration */
5233         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5234                rdev->mc.vram_start >> 12);
5235         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5236                rdev->mc.vram_end >> 12);
5237         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5238                rdev->vram_scratch.gpu_addr >> 12);
5239         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5240         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5241         WREG32(MC_VM_FB_LOCATION, tmp);
5242         /* XXX double check these! */
5243         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5244         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5245         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5246         WREG32(MC_VM_AGP_BASE, 0);
5247         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5248         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5249         if (radeon_mc_wait_for_idle(rdev)) {
5250                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5251         }
5252         evergreen_mc_resume(rdev, &save);
5253         /* we need to own VRAM, so turn off the VGA renderer here
5254          * to stop it overwriting our objects */
5255         rv515_vga_render_disable(rdev);
5256 }
5257
5258 /**
5259  * cik_mc_init - initialize the memory controller driver params
5260  *
5261  * @rdev: radeon_device pointer
5262  *
5263  * Look up the amount of vram, vram width, and decide how to place
5264  * vram and gart within the GPU's physical address space (CIK).
5265  * Returns 0 for success.
5266  */
5267 static int cik_mc_init(struct radeon_device *rdev)
5268 {
5269         u32 tmp;
5270         int chansize, numchan;
5271
5272         /* Get VRAM informations */
5273         rdev->mc.vram_is_ddr = true;
5274         tmp = RREG32(MC_ARB_RAMCFG);
5275         if (tmp & CHANSIZE_MASK) {
5276                 chansize = 64;
5277         } else {
5278                 chansize = 32;
5279         }
5280         tmp = RREG32(MC_SHARED_CHMAP);
5281         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5282         case 0:
5283         default:
5284                 numchan = 1;
5285                 break;
5286         case 1:
5287                 numchan = 2;
5288                 break;
5289         case 2:
5290                 numchan = 4;
5291                 break;
5292         case 3:
5293                 numchan = 8;
5294                 break;
5295         case 4:
5296                 numchan = 3;
5297                 break;
5298         case 5:
5299                 numchan = 6;
5300                 break;
5301         case 6:
5302                 numchan = 10;
5303                 break;
5304         case 7:
5305                 numchan = 12;
5306                 break;
5307         case 8:
5308                 numchan = 16;
5309                 break;
5310         }
5311         rdev->mc.vram_width = numchan * chansize;
5312         /* Could aper size report 0 ? */
5313         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5314         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5315         /* size in MB on si */
5316         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5317         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5318         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5319         si_vram_gtt_location(rdev, &rdev->mc);
5320         radeon_update_bandwidth_info(rdev);
5321
5322         return 0;
5323 }
5324
5325 /*
5326  * GART
5327  * VMID 0 is the physical GPU addresses as used by the kernel.
5328  * VMIDs 1-15 are used for userspace clients and are handled
5329  * by the radeon vm/hsa code.
5330  */
5331 /**
5332  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5333  *
5334  * @rdev: radeon_device pointer
5335  *
5336  * Flush the TLB for the VMID 0 page table (CIK).
5337  */
5338 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5339 {
5340         /* flush hdp cache */
5341         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5342
5343         /* bits 0-15 are the VM contexts0-15 */
5344         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5345 }
5346
5347 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5348 {
5349         int i;
5350         uint32_t sh_mem_bases, sh_mem_config;
5351
5352         sh_mem_bases = 0x6000 | 0x6000 << 16;
5353         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5354         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5355
5356         mutex_lock(&rdev->srbm_mutex);
5357         for (i = 8; i < 16; i++) {
5358                 cik_srbm_select(rdev, 0, 0, 0, i);
5359                 /* CP and shaders */
5360                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5361                 WREG32(SH_MEM_APE1_BASE, 1);
5362                 WREG32(SH_MEM_APE1_LIMIT, 0);
5363                 WREG32(SH_MEM_BASES, sh_mem_bases);
5364         }
5365         cik_srbm_select(rdev, 0, 0, 0, 0);
5366         mutex_unlock(&rdev->srbm_mutex);
5367 }
5368
5369 /**
5370  * cik_pcie_gart_enable - gart enable
5371  *
5372  * @rdev: radeon_device pointer
5373  *
5374  * This sets up the TLBs, programs the page tables for VMID0,
5375  * sets up the hw for VMIDs 1-15 which are allocated on
5376  * demand, and sets up the global locations for the LDS, GDS,
5377  * and GPUVM for FSA64 clients (CIK).
5378  * Returns 0 for success, errors for failure.
5379  */
5380 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5381 {
5382         int r, i;
5383
5384         if (rdev->gart.robj == NULL) {
5385                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5386                 return -EINVAL;
5387         }
5388         r = radeon_gart_table_vram_pin(rdev);
5389         if (r)
5390                 return r;
5391         /* Setup TLB control */
5392         WREG32(MC_VM_MX_L1_TLB_CNTL,
5393                (0xA << 7) |
5394                ENABLE_L1_TLB |
5395                ENABLE_L1_FRAGMENT_PROCESSING |
5396                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5397                ENABLE_ADVANCED_DRIVER_MODEL |
5398                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5399         /* Setup L2 cache */
5400         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5401                ENABLE_L2_FRAGMENT_PROCESSING |
5402                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5403                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5404                EFFECTIVE_L2_QUEUE_SIZE(7) |
5405                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5406         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5407         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5408                BANK_SELECT(4) |
5409                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5410         /* setup context0 */
5411         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5412         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5413         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5414         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5415                         (u32)(rdev->dummy_page.addr >> 12));
5416         WREG32(VM_CONTEXT0_CNTL2, 0);
5417         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5418                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5419
5420         WREG32(0x15D4, 0);
5421         WREG32(0x15D8, 0);
5422         WREG32(0x15DC, 0);
5423
5424         /* restore context1-15 */
5425         /* set vm size, must be a multiple of 4 */
5426         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5427         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5428         for (i = 1; i < 16; i++) {
5429                 if (i < 8)
5430                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5431                                rdev->vm_manager.saved_table_addr[i]);
5432                 else
5433                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5434                                rdev->vm_manager.saved_table_addr[i]);
5435         }
5436
5437         /* enable context1-15 */
5438         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5439                (u32)(rdev->dummy_page.addr >> 12));
5440         WREG32(VM_CONTEXT1_CNTL2, 4);
5441         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5442                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5443                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5444                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5445                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5446                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5447                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5448                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5449                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5450                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5451                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5452                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5453                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5454                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5455
5456         if (rdev->family == CHIP_KAVERI) {
5457                 u32 tmp = RREG32(CHUB_CONTROL);
5458                 tmp &= ~BYPASS_VM;
5459                 WREG32(CHUB_CONTROL, tmp);
5460         }
5461
5462         /* XXX SH_MEM regs */
5463         /* where to put LDS, scratch, GPUVM in FSA64 space */
5464         mutex_lock(&rdev->srbm_mutex);
5465         for (i = 0; i < 16; i++) {
5466                 cik_srbm_select(rdev, 0, 0, 0, i);
5467                 /* CP and shaders */
5468                 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5469                 WREG32(SH_MEM_APE1_BASE, 1);
5470                 WREG32(SH_MEM_APE1_LIMIT, 0);
5471                 WREG32(SH_MEM_BASES, 0);
5472                 /* SDMA GFX */
5473                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5474                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5475                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5476                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5477                 /* XXX SDMA RLC - todo */
5478         }
5479         cik_srbm_select(rdev, 0, 0, 0, 0);
5480         mutex_unlock(&rdev->srbm_mutex);
5481
5482         cik_pcie_init_compute_vmid(rdev);
5483
5484         cik_pcie_gart_tlb_flush(rdev);
5485         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5486                  (unsigned)(rdev->mc.gtt_size >> 20),
5487                  (unsigned long long)rdev->gart.table_addr);
5488         rdev->gart.ready = true;
5489         return 0;
5490 }
5491
5492 /**
5493  * cik_pcie_gart_disable - gart disable
5494  *
5495  * @rdev: radeon_device pointer
5496  *
5497  * This disables all VM page table (CIK).
5498  */
5499 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5500 {
5501         unsigned i;
5502
5503         for (i = 1; i < 16; ++i) {
5504                 uint32_t reg;
5505                 if (i < 8)
5506                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5507                 else
5508                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5509                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5510         }
5511
5512         /* Disable all tables */
5513         WREG32(VM_CONTEXT0_CNTL, 0);
5514         WREG32(VM_CONTEXT1_CNTL, 0);
5515         /* Setup TLB control */
5516         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5517                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5518         /* Setup L2 cache */
5519         WREG32(VM_L2_CNTL,
5520                ENABLE_L2_FRAGMENT_PROCESSING |
5521                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5522                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5523                EFFECTIVE_L2_QUEUE_SIZE(7) |
5524                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5525         WREG32(VM_L2_CNTL2, 0);
5526         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5527                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5528         radeon_gart_table_vram_unpin(rdev);
5529 }
5530
5531 /**
5532  * cik_pcie_gart_fini - vm fini callback
5533  *
5534  * @rdev: radeon_device pointer
5535  *
5536  * Tears down the driver GART/VM setup (CIK).
5537  */
5538 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5539 {
5540         cik_pcie_gart_disable(rdev);
5541         radeon_gart_table_vram_free(rdev);
5542         radeon_gart_fini(rdev);
5543 }
5544
5545 /* vm parser */
5546 /**
5547  * cik_ib_parse - vm ib_parse callback
5548  *
5549  * @rdev: radeon_device pointer
5550  * @ib: indirect buffer pointer
5551  *
5552  * CIK uses hw IB checking so this is a nop (CIK).
5553  */
5554 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5555 {
5556         return 0;
5557 }
5558
5559 /*
5560  * vm
5561  * VMID 0 is the physical GPU addresses as used by the kernel.
5562  * VMIDs 1-15 are used for userspace clients and are handled
5563  * by the radeon vm/hsa code.
5564  */
5565 /**
5566  * cik_vm_init - cik vm init callback
5567  *
5568  * @rdev: radeon_device pointer
5569  *
5570  * Inits cik specific vm parameters (number of VMs, base of vram for
5571  * VMIDs 1-15) (CIK).
5572  * Returns 0 for success.
5573  */
5574 int cik_vm_init(struct radeon_device *rdev)
5575 {
5576         /*
5577          * number of VMs
5578          * VMID 0 is reserved for System
5579          * radeon graphics/compute will use VMIDs 1-7
5580          * amdkfd will use VMIDs 8-15
5581          */
5582         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5583         /* base offset of vram pages */
5584         if (rdev->flags & RADEON_IS_IGP) {
5585                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5586                 tmp <<= 22;
5587                 rdev->vm_manager.vram_base_offset = tmp;
5588         } else
5589                 rdev->vm_manager.vram_base_offset = 0;
5590
5591         return 0;
5592 }
5593
5594 /**
5595  * cik_vm_fini - cik vm fini callback
5596  *
5597  * @rdev: radeon_device pointer
5598  *
5599  * Tear down any asic specific VM setup (CIK).
5600  */
5601 void cik_vm_fini(struct radeon_device *rdev)
5602 {
5603 }
5604
5605 /**
5606  * cik_vm_decode_fault - print human readable fault info
5607  *
5608  * @rdev: radeon_device pointer
5609  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5610  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5611  *
5612  * Print human readable fault information (CIK).
5613  */
5614 static void cik_vm_decode_fault(struct radeon_device *rdev,
5615                                 u32 status, u32 addr, u32 mc_client)
5616 {
5617         u32 mc_id;
5618         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5619         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5620         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5621                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5622
5623         if (rdev->family == CHIP_HAWAII)
5624                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5625         else
5626                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5627
5628         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5629                protections, vmid, addr,
5630                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5631                block, mc_client, mc_id);
5632 }
5633
5634 /**
5635  * cik_vm_flush - cik vm flush using the CP
5636  *
5637  * @rdev: radeon_device pointer
5638  *
5639  * Update the page table base and flush the VM TLB
5640  * using the CP (CIK).
5641  */
5642 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5643                   unsigned vm_id, uint64_t pd_addr)
5644 {
5645         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5646
5647         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5648         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5649                                  WRITE_DATA_DST_SEL(0)));
5650         if (vm_id < 8) {
5651                 radeon_ring_write(ring,
5652                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5653         } else {
5654                 radeon_ring_write(ring,
5655                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5656         }
5657         radeon_ring_write(ring, 0);
5658         radeon_ring_write(ring, pd_addr >> 12);
5659
5660         /* update SH_MEM_* regs */
5661         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5662         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5663                                  WRITE_DATA_DST_SEL(0)));
5664         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5665         radeon_ring_write(ring, 0);
5666         radeon_ring_write(ring, VMID(vm_id));
5667
5668         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5669         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5670                                  WRITE_DATA_DST_SEL(0)));
5671         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5672         radeon_ring_write(ring, 0);
5673
5674         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5675         radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5676         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5677         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5678
5679         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5680         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5681                                  WRITE_DATA_DST_SEL(0)));
5682         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5683         radeon_ring_write(ring, 0);
5684         radeon_ring_write(ring, VMID(0));
5685
5686         /* HDP flush */
5687         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5688
5689         /* bits 0-15 are the VM contexts0-15 */
5690         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5691         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5692                                  WRITE_DATA_DST_SEL(0)));
5693         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5694         radeon_ring_write(ring, 0);
5695         radeon_ring_write(ring, 1 << vm_id);
5696
5697         /* wait for the invalidate to complete */
5698         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5699         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5700                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5701                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5702         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5703         radeon_ring_write(ring, 0);
5704         radeon_ring_write(ring, 0); /* ref */
5705         radeon_ring_write(ring, 0); /* mask */
5706         radeon_ring_write(ring, 0x20); /* poll interval */
5707
5708         /* compute doesn't have PFP */
5709         if (usepfp) {
5710                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5711                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5712                 radeon_ring_write(ring, 0x0);
5713         }
5714 }
5715
5716 /*
5717  * RLC
5718  * The RLC is a multi-purpose microengine that handles a
5719  * variety of functions, the most important of which is
5720  * the interrupt controller.
5721  */
5722 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5723                                           bool enable)
5724 {
5725         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5726
5727         if (enable)
5728                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5729         else
5730                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5731         WREG32(CP_INT_CNTL_RING0, tmp);
5732 }
5733
5734 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5735 {
5736         u32 tmp;
5737
5738         tmp = RREG32(RLC_LB_CNTL);
5739         if (enable)
5740                 tmp |= LOAD_BALANCE_ENABLE;
5741         else
5742                 tmp &= ~LOAD_BALANCE_ENABLE;
5743         WREG32(RLC_LB_CNTL, tmp);
5744 }
5745
5746 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5747 {
5748         u32 i, j, k;
5749         u32 mask;
5750
5751         mutex_lock(&rdev->grbm_idx_mutex);
5752         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5753                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5754                         cik_select_se_sh(rdev, i, j);
5755                         for (k = 0; k < rdev->usec_timeout; k++) {
5756                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5757                                         break;
5758                                 udelay(1);
5759                         }
5760                 }
5761         }
5762         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5763         mutex_unlock(&rdev->grbm_idx_mutex);
5764
5765         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5766         for (k = 0; k < rdev->usec_timeout; k++) {
5767                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5768                         break;
5769                 udelay(1);
5770         }
5771 }
5772
5773 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5774 {
5775         u32 tmp;
5776
5777         tmp = RREG32(RLC_CNTL);
5778         if (tmp != rlc)
5779                 WREG32(RLC_CNTL, rlc);
5780 }
5781
5782 static u32 cik_halt_rlc(struct radeon_device *rdev)
5783 {
5784         u32 data, orig;
5785
5786         orig = data = RREG32(RLC_CNTL);
5787
5788         if (data & RLC_ENABLE) {
5789                 u32 i;
5790
5791                 data &= ~RLC_ENABLE;
5792                 WREG32(RLC_CNTL, data);
5793
5794                 for (i = 0; i < rdev->usec_timeout; i++) {
5795                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5796                                 break;
5797                         udelay(1);
5798                 }
5799
5800                 cik_wait_for_rlc_serdes(rdev);
5801         }
5802
5803         return orig;
5804 }
5805
5806 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5807 {
5808         u32 tmp, i, mask;
5809
5810         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5811         WREG32(RLC_GPR_REG2, tmp);
5812
5813         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5814         for (i = 0; i < rdev->usec_timeout; i++) {
5815                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5816                         break;
5817                 udelay(1);
5818         }
5819
5820         for (i = 0; i < rdev->usec_timeout; i++) {
5821                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5822                         break;
5823                 udelay(1);
5824         }
5825 }
5826
5827 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5828 {
5829         u32 tmp;
5830
5831         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5832         WREG32(RLC_GPR_REG2, tmp);
5833 }
5834
5835 /**
5836  * cik_rlc_stop - stop the RLC ME
5837  *
5838  * @rdev: radeon_device pointer
5839  *
5840  * Halt the RLC ME (MicroEngine) (CIK).
5841  */
5842 static void cik_rlc_stop(struct radeon_device *rdev)
5843 {
5844         WREG32(RLC_CNTL, 0);
5845
5846         cik_enable_gui_idle_interrupt(rdev, false);
5847
5848         cik_wait_for_rlc_serdes(rdev);
5849 }
5850
5851 /**
5852  * cik_rlc_start - start the RLC ME
5853  *
5854  * @rdev: radeon_device pointer
5855  *
5856  * Unhalt the RLC ME (MicroEngine) (CIK).
5857  */
5858 static void cik_rlc_start(struct radeon_device *rdev)
5859 {
5860         WREG32(RLC_CNTL, RLC_ENABLE);
5861
5862         cik_enable_gui_idle_interrupt(rdev, true);
5863
5864         udelay(50);
5865 }
5866
5867 /**
5868  * cik_rlc_resume - setup the RLC hw
5869  *
5870  * @rdev: radeon_device pointer
5871  *
5872  * Initialize the RLC registers, load the ucode,
5873  * and start the RLC (CIK).
5874  * Returns 0 for success, -EINVAL if the ucode is not available.
5875  */
5876 static int cik_rlc_resume(struct radeon_device *rdev)
5877 {
5878         u32 i, size, tmp;
5879
5880         if (!rdev->rlc_fw)
5881                 return -EINVAL;
5882
5883         cik_rlc_stop(rdev);
5884
5885         /* disable CG */
5886         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5887         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5888
5889         si_rlc_reset(rdev);
5890
5891         cik_init_pg(rdev);
5892
5893         cik_init_cg(rdev);
5894
5895         WREG32(RLC_LB_CNTR_INIT, 0);
5896         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5897
5898         mutex_lock(&rdev->grbm_idx_mutex);
5899         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5900         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5901         WREG32(RLC_LB_PARAMS, 0x00600408);
5902         WREG32(RLC_LB_CNTL, 0x80000004);
5903         mutex_unlock(&rdev->grbm_idx_mutex);
5904
5905         WREG32(RLC_MC_CNTL, 0);
5906         WREG32(RLC_UCODE_CNTL, 0);
5907
5908         if (rdev->new_fw) {
5909                 const struct rlc_firmware_header_v1_0 *hdr =
5910                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5911                 const __le32 *fw_data = (const __le32 *)
5912                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5913
5914                 radeon_ucode_print_rlc_hdr(&hdr->header);
5915
5916                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5917                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5918                 for (i = 0; i < size; i++)
5919                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5920                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5921         } else {
5922                 const __be32 *fw_data;
5923
5924                 switch (rdev->family) {
5925                 case CHIP_BONAIRE:
5926                 case CHIP_HAWAII:
5927                 default:
5928                         size = BONAIRE_RLC_UCODE_SIZE;
5929                         break;
5930                 case CHIP_KAVERI:
5931                         size = KV_RLC_UCODE_SIZE;
5932                         break;
5933                 case CHIP_KABINI:
5934                         size = KB_RLC_UCODE_SIZE;
5935                         break;
5936                 case CHIP_MULLINS:
5937                         size = ML_RLC_UCODE_SIZE;
5938                         break;
5939                 }
5940
5941                 fw_data = (const __be32 *)rdev->rlc_fw->data;
5942                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5943                 for (i = 0; i < size; i++)
5944                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5945                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5946         }
5947
5948         /* XXX - find out what chips support lbpw */
5949         cik_enable_lbpw(rdev, false);
5950
5951         if (rdev->family == CHIP_BONAIRE)
5952                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5953
5954         cik_rlc_start(rdev);
5955
5956         return 0;
5957 }
5958
5959 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5960 {
5961         u32 data, orig, tmp, tmp2;
5962
5963         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5964
5965         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5966                 cik_enable_gui_idle_interrupt(rdev, true);
5967
5968                 tmp = cik_halt_rlc(rdev);
5969
5970                 mutex_lock(&rdev->grbm_idx_mutex);
5971                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5972                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5973                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5974                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5975                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5976                 mutex_unlock(&rdev->grbm_idx_mutex);
5977
5978                 cik_update_rlc(rdev, tmp);
5979
5980                 data |= CGCG_EN | CGLS_EN;
5981         } else {
5982                 cik_enable_gui_idle_interrupt(rdev, false);
5983
5984                 RREG32(CB_CGTT_SCLK_CTRL);
5985                 RREG32(CB_CGTT_SCLK_CTRL);
5986                 RREG32(CB_CGTT_SCLK_CTRL);
5987                 RREG32(CB_CGTT_SCLK_CTRL);
5988
5989                 data &= ~(CGCG_EN | CGLS_EN);
5990         }
5991
5992         if (orig != data)
5993                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5994
5995 }
5996
5997 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5998 {
5999         u32 data, orig, tmp = 0;
6000
6001         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6002                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6003                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6004                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6005                                 data |= CP_MEM_LS_EN;
6006                                 if (orig != data)
6007                                         WREG32(CP_MEM_SLP_CNTL, data);
6008                         }
6009                 }
6010
6011                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6012                 data |= 0x00000001;
6013                 data &= 0xfffffffd;
6014                 if (orig != data)
6015                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6016
6017                 tmp = cik_halt_rlc(rdev);
6018
6019                 mutex_lock(&rdev->grbm_idx_mutex);
6020                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6021                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6022                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6023                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6024                 WREG32(RLC_SERDES_WR_CTRL, data);
6025                 mutex_unlock(&rdev->grbm_idx_mutex);
6026
6027                 cik_update_rlc(rdev, tmp);
6028
6029                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6030                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6031                         data &= ~SM_MODE_MASK;
6032                         data |= SM_MODE(0x2);
6033                         data |= SM_MODE_ENABLE;
6034                         data &= ~CGTS_OVERRIDE;
6035                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6036                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6037                                 data &= ~CGTS_LS_OVERRIDE;
6038                         data &= ~ON_MONITOR_ADD_MASK;
6039                         data |= ON_MONITOR_ADD_EN;
6040                         data |= ON_MONITOR_ADD(0x96);
6041                         if (orig != data)
6042                                 WREG32(CGTS_SM_CTRL_REG, data);
6043                 }
6044         } else {
6045                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6046                 data |= 0x00000003;
6047                 if (orig != data)
6048                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6049
6050                 data = RREG32(RLC_MEM_SLP_CNTL);
6051                 if (data & RLC_MEM_LS_EN) {
6052                         data &= ~RLC_MEM_LS_EN;
6053                         WREG32(RLC_MEM_SLP_CNTL, data);
6054                 }
6055
6056                 data = RREG32(CP_MEM_SLP_CNTL);
6057                 if (data & CP_MEM_LS_EN) {
6058                         data &= ~CP_MEM_LS_EN;
6059                         WREG32(CP_MEM_SLP_CNTL, data);
6060                 }
6061
6062                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6063                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6064                 if (orig != data)
6065                         WREG32(CGTS_SM_CTRL_REG, data);
6066
6067                 tmp = cik_halt_rlc(rdev);
6068
6069                 mutex_lock(&rdev->grbm_idx_mutex);
6070                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6071                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6072                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6073                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6074                 WREG32(RLC_SERDES_WR_CTRL, data);
6075                 mutex_unlock(&rdev->grbm_idx_mutex);
6076
6077                 cik_update_rlc(rdev, tmp);
6078         }
6079 }
6080
6081 static const u32 mc_cg_registers[] =
6082 {
6083         MC_HUB_MISC_HUB_CG,
6084         MC_HUB_MISC_SIP_CG,
6085         MC_HUB_MISC_VM_CG,
6086         MC_XPB_CLK_GAT,
6087         ATC_MISC_CG,
6088         MC_CITF_MISC_WR_CG,
6089         MC_CITF_MISC_RD_CG,
6090         MC_CITF_MISC_VM_CG,
6091         VM_L2_CG,
6092 };
6093
6094 static void cik_enable_mc_ls(struct radeon_device *rdev,
6095                              bool enable)
6096 {
6097         int i;
6098         u32 orig, data;
6099
6100         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6101                 orig = data = RREG32(mc_cg_registers[i]);
6102                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6103                         data |= MC_LS_ENABLE;
6104                 else
6105                         data &= ~MC_LS_ENABLE;
6106                 if (data != orig)
6107                         WREG32(mc_cg_registers[i], data);
6108         }
6109 }
6110
6111 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6112                                bool enable)
6113 {
6114         int i;
6115         u32 orig, data;
6116
6117         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6118                 orig = data = RREG32(mc_cg_registers[i]);
6119                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6120                         data |= MC_CG_ENABLE;
6121                 else
6122                         data &= ~MC_CG_ENABLE;
6123                 if (data != orig)
6124                         WREG32(mc_cg_registers[i], data);
6125         }
6126 }
6127
6128 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6129                                  bool enable)
6130 {
6131         u32 orig, data;
6132
6133         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6134                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6135                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6136         } else {
6137                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6138                 data |= 0xff000000;
6139                 if (data != orig)
6140                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6141
6142                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6143                 data |= 0xff000000;
6144                 if (data != orig)
6145                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6146         }
6147 }
6148
6149 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6150                                  bool enable)
6151 {
6152         u32 orig, data;
6153
6154         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6155                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6156                 data |= 0x100;
6157                 if (orig != data)
6158                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6159
6160                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6161                 data |= 0x100;
6162                 if (orig != data)
6163                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6164         } else {
6165                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6166                 data &= ~0x100;
6167                 if (orig != data)
6168                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6169
6170                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6171                 data &= ~0x100;
6172                 if (orig != data)
6173                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6174         }
6175 }
6176
6177 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6178                                 bool enable)
6179 {
6180         u32 orig, data;
6181
6182         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6183                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6184                 data = 0xfff;
6185                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6186
6187                 orig = data = RREG32(UVD_CGC_CTRL);
6188                 data |= DCM;
6189                 if (orig != data)
6190                         WREG32(UVD_CGC_CTRL, data);
6191         } else {
6192                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6193                 data &= ~0xfff;
6194                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6195
6196                 orig = data = RREG32(UVD_CGC_CTRL);
6197                 data &= ~DCM;
6198                 if (orig != data)
6199                         WREG32(UVD_CGC_CTRL, data);
6200         }
6201 }
6202
6203 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6204                                bool enable)
6205 {
6206         u32 orig, data;
6207
6208         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6209
6210         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6211                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6212                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6213         else
6214                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6215                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6216
6217         if (orig != data)
6218                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6219 }
6220
6221 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6222                                 bool enable)
6223 {
6224         u32 orig, data;
6225
6226         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6227
6228         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6229                 data &= ~CLOCK_GATING_DIS;
6230         else
6231                 data |= CLOCK_GATING_DIS;
6232
6233         if (orig != data)
6234                 WREG32(HDP_HOST_PATH_CNTL, data);
6235 }
6236
6237 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6238                               bool enable)
6239 {
6240         u32 orig, data;
6241
6242         orig = data = RREG32(HDP_MEM_POWER_LS);
6243
6244         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6245                 data |= HDP_LS_ENABLE;
6246         else
6247                 data &= ~HDP_LS_ENABLE;
6248
6249         if (orig != data)
6250                 WREG32(HDP_MEM_POWER_LS, data);
6251 }
6252
6253 void cik_update_cg(struct radeon_device *rdev,
6254                    u32 block, bool enable)
6255 {
6256
6257         if (block & RADEON_CG_BLOCK_GFX) {
6258                 cik_enable_gui_idle_interrupt(rdev, false);
6259                 /* order matters! */
6260                 if (enable) {
6261                         cik_enable_mgcg(rdev, true);
6262                         cik_enable_cgcg(rdev, true);
6263                 } else {
6264                         cik_enable_cgcg(rdev, false);
6265                         cik_enable_mgcg(rdev, false);
6266                 }
6267                 cik_enable_gui_idle_interrupt(rdev, true);
6268         }
6269
6270         if (block & RADEON_CG_BLOCK_MC) {
6271                 if (!(rdev->flags & RADEON_IS_IGP)) {
6272                         cik_enable_mc_mgcg(rdev, enable);
6273                         cik_enable_mc_ls(rdev, enable);
6274                 }
6275         }
6276
6277         if (block & RADEON_CG_BLOCK_SDMA) {
6278                 cik_enable_sdma_mgcg(rdev, enable);
6279                 cik_enable_sdma_mgls(rdev, enable);
6280         }
6281
6282         if (block & RADEON_CG_BLOCK_BIF) {
6283                 cik_enable_bif_mgls(rdev, enable);
6284         }
6285
6286         if (block & RADEON_CG_BLOCK_UVD) {
6287                 if (rdev->has_uvd)
6288                         cik_enable_uvd_mgcg(rdev, enable);
6289         }
6290
6291         if (block & RADEON_CG_BLOCK_HDP) {
6292                 cik_enable_hdp_mgcg(rdev, enable);
6293                 cik_enable_hdp_ls(rdev, enable);
6294         }
6295
6296         if (block & RADEON_CG_BLOCK_VCE) {
6297                 vce_v2_0_enable_mgcg(rdev, enable);
6298         }
6299 }
6300
6301 static void cik_init_cg(struct radeon_device *rdev)
6302 {
6303
6304         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6305
6306         if (rdev->has_uvd)
6307                 si_init_uvd_internal_cg(rdev);
6308
6309         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6310                              RADEON_CG_BLOCK_SDMA |
6311                              RADEON_CG_BLOCK_BIF |
6312                              RADEON_CG_BLOCK_UVD |
6313                              RADEON_CG_BLOCK_HDP), true);
6314 }
6315
6316 static void cik_fini_cg(struct radeon_device *rdev)
6317 {
6318         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6319                              RADEON_CG_BLOCK_SDMA |
6320                              RADEON_CG_BLOCK_BIF |
6321                              RADEON_CG_BLOCK_UVD |
6322                              RADEON_CG_BLOCK_HDP), false);
6323
6324         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6325 }
6326
6327 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6328                                           bool enable)
6329 {
6330         u32 data, orig;
6331
6332         orig = data = RREG32(RLC_PG_CNTL);
6333         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6334                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6335         else
6336                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6337         if (orig != data)
6338                 WREG32(RLC_PG_CNTL, data);
6339 }
6340
6341 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6342                                           bool enable)
6343 {
6344         u32 data, orig;
6345
6346         orig = data = RREG32(RLC_PG_CNTL);
6347         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6348                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6349         else
6350                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6351         if (orig != data)
6352                 WREG32(RLC_PG_CNTL, data);
6353 }
6354
6355 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6356 {
6357         u32 data, orig;
6358
6359         orig = data = RREG32(RLC_PG_CNTL);
6360         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6361                 data &= ~DISABLE_CP_PG;
6362         else
6363                 data |= DISABLE_CP_PG;
6364         if (orig != data)
6365                 WREG32(RLC_PG_CNTL, data);
6366 }
6367
6368 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6369 {
6370         u32 data, orig;
6371
6372         orig = data = RREG32(RLC_PG_CNTL);
6373         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6374                 data &= ~DISABLE_GDS_PG;
6375         else
6376                 data |= DISABLE_GDS_PG;
6377         if (orig != data)
6378                 WREG32(RLC_PG_CNTL, data);
6379 }
6380
6381 #define CP_ME_TABLE_SIZE    96
6382 #define CP_ME_TABLE_OFFSET  2048
6383 #define CP_MEC_TABLE_OFFSET 4096
6384
6385 void cik_init_cp_pg_table(struct radeon_device *rdev)
6386 {
6387         volatile u32 *dst_ptr;
6388         int me, i, max_me = 4;
6389         u32 bo_offset = 0;
6390         u32 table_offset, table_size;
6391
6392         if (rdev->family == CHIP_KAVERI)
6393                 max_me = 5;
6394
6395         if (rdev->rlc.cp_table_ptr == NULL)
6396                 return;
6397
6398         /* write the cp table buffer */
6399         dst_ptr = rdev->rlc.cp_table_ptr;
6400         for (me = 0; me < max_me; me++) {
6401                 if (rdev->new_fw) {
6402                         const __le32 *fw_data;
6403                         const struct gfx_firmware_header_v1_0 *hdr;
6404
6405                         if (me == 0) {
6406                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6407                                 fw_data = (const __le32 *)
6408                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6409                                 table_offset = le32_to_cpu(hdr->jt_offset);
6410                                 table_size = le32_to_cpu(hdr->jt_size);
6411                         } else if (me == 1) {
6412                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6413                                 fw_data = (const __le32 *)
6414                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6415                                 table_offset = le32_to_cpu(hdr->jt_offset);
6416                                 table_size = le32_to_cpu(hdr->jt_size);
6417                         } else if (me == 2) {
6418                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6419                                 fw_data = (const __le32 *)
6420                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6421                                 table_offset = le32_to_cpu(hdr->jt_offset);
6422                                 table_size = le32_to_cpu(hdr->jt_size);
6423                         } else if (me == 3) {
6424                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6425                                 fw_data = (const __le32 *)
6426                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6427                                 table_offset = le32_to_cpu(hdr->jt_offset);
6428                                 table_size = le32_to_cpu(hdr->jt_size);
6429                         } else {
6430                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6431                                 fw_data = (const __le32 *)
6432                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6433                                 table_offset = le32_to_cpu(hdr->jt_offset);
6434                                 table_size = le32_to_cpu(hdr->jt_size);
6435                         }
6436
6437                         for (i = 0; i < table_size; i ++) {
6438                                 dst_ptr[bo_offset + i] =
6439                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6440                         }
6441                         bo_offset += table_size;
6442                 } else {
6443                         const __be32 *fw_data;
6444                         table_size = CP_ME_TABLE_SIZE;
6445
6446                         if (me == 0) {
6447                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6448                                 table_offset = CP_ME_TABLE_OFFSET;
6449                         } else if (me == 1) {
6450                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6451                                 table_offset = CP_ME_TABLE_OFFSET;
6452                         } else if (me == 2) {
6453                                 fw_data = (const __be32 *)rdev->me_fw->data;
6454                                 table_offset = CP_ME_TABLE_OFFSET;
6455                         } else {
6456                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6457                                 table_offset = CP_MEC_TABLE_OFFSET;
6458                         }
6459
6460                         for (i = 0; i < table_size; i ++) {
6461                                 dst_ptr[bo_offset + i] =
6462                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6463                         }
6464                         bo_offset += table_size;
6465                 }
6466         }
6467 }
6468
6469 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6470                                 bool enable)
6471 {
6472         u32 data, orig;
6473
6474         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6475                 orig = data = RREG32(RLC_PG_CNTL);
6476                 data |= GFX_PG_ENABLE;
6477                 if (orig != data)
6478                         WREG32(RLC_PG_CNTL, data);
6479
6480                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6481                 data |= AUTO_PG_EN;
6482                 if (orig != data)
6483                         WREG32(RLC_AUTO_PG_CTRL, data);
6484         } else {
6485                 orig = data = RREG32(RLC_PG_CNTL);
6486                 data &= ~GFX_PG_ENABLE;
6487                 if (orig != data)
6488                         WREG32(RLC_PG_CNTL, data);
6489
6490                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6491                 data &= ~AUTO_PG_EN;
6492                 if (orig != data)
6493                         WREG32(RLC_AUTO_PG_CTRL, data);
6494
6495                 data = RREG32(DB_RENDER_CONTROL);
6496         }
6497 }
6498
6499 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6500 {
6501         u32 mask = 0, tmp, tmp1;
6502         int i;
6503
6504         mutex_lock(&rdev->grbm_idx_mutex);
6505         cik_select_se_sh(rdev, se, sh);
6506         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6507         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6508         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6509         mutex_unlock(&rdev->grbm_idx_mutex);
6510
6511         tmp &= 0xffff0000;
6512
6513         tmp |= tmp1;
6514         tmp >>= 16;
6515
6516         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6517                 mask <<= 1;
6518                 mask |= 1;
6519         }
6520
6521         return (~tmp) & mask;
6522 }
6523
6524 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6525 {
6526         u32 i, j, k, active_cu_number = 0;
6527         u32 mask, counter, cu_bitmap;
6528         u32 tmp = 0;
6529
6530         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6531                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6532                         mask = 1;
6533                         cu_bitmap = 0;
6534                         counter = 0;
6535                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6536                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6537                                         if (counter < 2)
6538                                                 cu_bitmap |= mask;
6539                                         counter ++;
6540                                 }
6541                                 mask <<= 1;
6542                         }
6543
6544                         active_cu_number += counter;
6545                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6546                 }
6547         }
6548
6549         WREG32(RLC_PG_AO_CU_MASK, tmp);
6550
6551         tmp = RREG32(RLC_MAX_PG_CU);
6552         tmp &= ~MAX_PU_CU_MASK;
6553         tmp |= MAX_PU_CU(active_cu_number);
6554         WREG32(RLC_MAX_PG_CU, tmp);
6555 }
6556
6557 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6558                                        bool enable)
6559 {
6560         u32 data, orig;
6561
6562         orig = data = RREG32(RLC_PG_CNTL);
6563         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6564                 data |= STATIC_PER_CU_PG_ENABLE;
6565         else
6566                 data &= ~STATIC_PER_CU_PG_ENABLE;
6567         if (orig != data)
6568                 WREG32(RLC_PG_CNTL, data);
6569 }
6570
6571 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6572                                         bool enable)
6573 {
6574         u32 data, orig;
6575
6576         orig = data = RREG32(RLC_PG_CNTL);
6577         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6578                 data |= DYN_PER_CU_PG_ENABLE;
6579         else
6580                 data &= ~DYN_PER_CU_PG_ENABLE;
6581         if (orig != data)
6582                 WREG32(RLC_PG_CNTL, data);
6583 }
6584
6585 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6586 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6587
6588 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6589 {
6590         u32 data, orig;
6591         u32 i;
6592
6593         if (rdev->rlc.cs_data) {
6594                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6595                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6596                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6597                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6598         } else {
6599                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6600                 for (i = 0; i < 3; i++)
6601                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6602         }
6603         if (rdev->rlc.reg_list) {
6604                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6605                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6606                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6607         }
6608
6609         orig = data = RREG32(RLC_PG_CNTL);
6610         data |= GFX_PG_SRC;
6611         if (orig != data)
6612                 WREG32(RLC_PG_CNTL, data);
6613
6614         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6615         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6616
6617         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6618         data &= ~IDLE_POLL_COUNT_MASK;
6619         data |= IDLE_POLL_COUNT(0x60);
6620         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6621
6622         data = 0x10101010;
6623         WREG32(RLC_PG_DELAY, data);
6624
6625         data = RREG32(RLC_PG_DELAY_2);
6626         data &= ~0xff;
6627         data |= 0x3;
6628         WREG32(RLC_PG_DELAY_2, data);
6629
6630         data = RREG32(RLC_AUTO_PG_CTRL);
6631         data &= ~GRBM_REG_SGIT_MASK;
6632         data |= GRBM_REG_SGIT(0x700);
6633         WREG32(RLC_AUTO_PG_CTRL, data);
6634
6635 }
6636
6637 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6638 {
6639         cik_enable_gfx_cgpg(rdev, enable);
6640         cik_enable_gfx_static_mgpg(rdev, enable);
6641         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6642 }
6643
6644 u32 cik_get_csb_size(struct radeon_device *rdev)
6645 {
6646         u32 count = 0;
6647         const struct cs_section_def *sect = NULL;
6648         const struct cs_extent_def *ext = NULL;
6649
6650         if (rdev->rlc.cs_data == NULL)
6651                 return 0;
6652
6653         /* begin clear state */
6654         count += 2;
6655         /* context control state */
6656         count += 3;
6657
6658         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6659                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6660                         if (sect->id == SECT_CONTEXT)
6661                                 count += 2 + ext->reg_count;
6662                         else
6663                                 return 0;
6664                 }
6665         }
6666         /* pa_sc_raster_config/pa_sc_raster_config1 */
6667         count += 4;
6668         /* end clear state */
6669         count += 2;
6670         /* clear state */
6671         count += 2;
6672
6673         return count;
6674 }
6675
6676 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6677 {
6678         u32 count = 0, i;
6679         const struct cs_section_def *sect = NULL;
6680         const struct cs_extent_def *ext = NULL;
6681
6682         if (rdev->rlc.cs_data == NULL)
6683                 return;
6684         if (buffer == NULL)
6685                 return;
6686
6687         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6688         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6689
6690         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6691         buffer[count++] = cpu_to_le32(0x80000000);
6692         buffer[count++] = cpu_to_le32(0x80000000);
6693
6694         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6695                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6696                         if (sect->id == SECT_CONTEXT) {
6697                                 buffer[count++] =
6698                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6699                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6700                                 for (i = 0; i < ext->reg_count; i++)
6701                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6702                         } else {
6703                                 return;
6704                         }
6705                 }
6706         }
6707
6708         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6709         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6710         switch (rdev->family) {
6711         case CHIP_BONAIRE:
6712                 buffer[count++] = cpu_to_le32(0x16000012);
6713                 buffer[count++] = cpu_to_le32(0x00000000);
6714                 break;
6715         case CHIP_KAVERI:
6716                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6717                 buffer[count++] = cpu_to_le32(0x00000000);
6718                 break;
6719         case CHIP_KABINI:
6720         case CHIP_MULLINS:
6721                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6722                 buffer[count++] = cpu_to_le32(0x00000000);
6723                 break;
6724         case CHIP_HAWAII:
6725                 buffer[count++] = cpu_to_le32(0x3a00161a);
6726                 buffer[count++] = cpu_to_le32(0x0000002e);
6727                 break;
6728         default:
6729                 buffer[count++] = cpu_to_le32(0x00000000);
6730                 buffer[count++] = cpu_to_le32(0x00000000);
6731                 break;
6732         }
6733
6734         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6735         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6736
6737         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6738         buffer[count++] = cpu_to_le32(0);
6739 }
6740
6741 static void cik_init_pg(struct radeon_device *rdev)
6742 {
6743         if (rdev->pg_flags) {
6744                 cik_enable_sck_slowdown_on_pu(rdev, true);
6745                 cik_enable_sck_slowdown_on_pd(rdev, true);
6746                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6747                         cik_init_gfx_cgpg(rdev);
6748                         cik_enable_cp_pg(rdev, true);
6749                         cik_enable_gds_pg(rdev, true);
6750                 }
6751                 cik_init_ao_cu_mask(rdev);
6752                 cik_update_gfx_pg(rdev, true);
6753         }
6754 }
6755
6756 static void cik_fini_pg(struct radeon_device *rdev)
6757 {
6758         if (rdev->pg_flags) {
6759                 cik_update_gfx_pg(rdev, false);
6760                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6761                         cik_enable_cp_pg(rdev, false);
6762                         cik_enable_gds_pg(rdev, false);
6763                 }
6764         }
6765 }
6766
6767 /*
6768  * Interrupts
6769  * Starting with r6xx, interrupts are handled via a ring buffer.
6770  * Ring buffers are areas of GPU accessible memory that the GPU
6771  * writes interrupt vectors into and the host reads vectors out of.
6772  * There is a rptr (read pointer) that determines where the
6773  * host is currently reading, and a wptr (write pointer)
6774  * which determines where the GPU has written.  When the
6775  * pointers are equal, the ring is idle.  When the GPU
6776  * writes vectors to the ring buffer, it increments the
6777  * wptr.  When there is an interrupt, the host then starts
6778  * fetching commands and processing them until the pointers are
6779  * equal again at which point it updates the rptr.
6780  */
6781
6782 /**
6783  * cik_enable_interrupts - Enable the interrupt ring buffer
6784  *
6785  * @rdev: radeon_device pointer
6786  *
6787  * Enable the interrupt ring buffer (CIK).
6788  */
6789 static void cik_enable_interrupts(struct radeon_device *rdev)
6790 {
6791         u32 ih_cntl = RREG32(IH_CNTL);
6792         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6793
6794         ih_cntl |= ENABLE_INTR;
6795         ih_rb_cntl |= IH_RB_ENABLE;
6796         WREG32(IH_CNTL, ih_cntl);
6797         WREG32(IH_RB_CNTL, ih_rb_cntl);
6798         rdev->ih.enabled = true;
6799 }
6800
6801 /**
6802  * cik_disable_interrupts - Disable the interrupt ring buffer
6803  *
6804  * @rdev: radeon_device pointer
6805  *
6806  * Disable the interrupt ring buffer (CIK).
6807  */
6808 static void cik_disable_interrupts(struct radeon_device *rdev)
6809 {
6810         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6811         u32 ih_cntl = RREG32(IH_CNTL);
6812
6813         ih_rb_cntl &= ~IH_RB_ENABLE;
6814         ih_cntl &= ~ENABLE_INTR;
6815         WREG32(IH_RB_CNTL, ih_rb_cntl);
6816         WREG32(IH_CNTL, ih_cntl);
6817         /* set rptr, wptr to 0 */
6818         WREG32(IH_RB_RPTR, 0);
6819         WREG32(IH_RB_WPTR, 0);
6820         rdev->ih.enabled = false;
6821         rdev->ih.rptr = 0;
6822 }
6823
6824 /**
6825  * cik_disable_interrupt_state - Disable all interrupt sources
6826  *
6827  * @rdev: radeon_device pointer
6828  *
6829  * Clear all interrupt enable bits used by the driver (CIK).
6830  */
6831 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6832 {
6833         u32 tmp;
6834
6835         /* gfx ring */
6836         tmp = RREG32(CP_INT_CNTL_RING0) &
6837                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6838         WREG32(CP_INT_CNTL_RING0, tmp);
6839         /* sdma */
6840         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6841         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6842         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6843         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6844         /* compute queues */
6845         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6846         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6847         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6848         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6849         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6850         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6851         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6852         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6853         /* grbm */
6854         WREG32(GRBM_INT_CNTL, 0);
6855         /* SRBM */
6856         WREG32(SRBM_INT_CNTL, 0);
6857         /* vline/vblank, etc. */
6858         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6859         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6860         if (rdev->num_crtc >= 4) {
6861                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6862                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6863         }
6864         if (rdev->num_crtc >= 6) {
6865                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6866                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6867         }
6868         /* pflip */
6869         if (rdev->num_crtc >= 2) {
6870                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6871                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6872         }
6873         if (rdev->num_crtc >= 4) {
6874                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6875                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6876         }
6877         if (rdev->num_crtc >= 6) {
6878                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6879                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6880         }
6881
6882         /* dac hotplug */
6883         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6884
6885         /* digital hotplug */
6886         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6887         WREG32(DC_HPD1_INT_CONTROL, tmp);
6888         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6889         WREG32(DC_HPD2_INT_CONTROL, tmp);
6890         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6891         WREG32(DC_HPD3_INT_CONTROL, tmp);
6892         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6893         WREG32(DC_HPD4_INT_CONTROL, tmp);
6894         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6895         WREG32(DC_HPD5_INT_CONTROL, tmp);
6896         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6897         WREG32(DC_HPD6_INT_CONTROL, tmp);
6898
6899 }
6900
6901 /**
6902  * cik_irq_init - init and enable the interrupt ring
6903  *
6904  * @rdev: radeon_device pointer
6905  *
6906  * Allocate a ring buffer for the interrupt controller,
6907  * enable the RLC, disable interrupts, enable the IH
6908  * ring buffer and enable it (CIK).
6909  * Called at device load and reume.
6910  * Returns 0 for success, errors for failure.
6911  */
6912 static int cik_irq_init(struct radeon_device *rdev)
6913 {
6914         int ret = 0;
6915         int rb_bufsz;
6916         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6917
6918         /* allocate ring */
6919         ret = r600_ih_ring_alloc(rdev);
6920         if (ret)
6921                 return ret;
6922
6923         /* disable irqs */
6924         cik_disable_interrupts(rdev);
6925
6926         /* init rlc */
6927         ret = cik_rlc_resume(rdev);
6928         if (ret) {
6929                 r600_ih_ring_fini(rdev);
6930                 return ret;
6931         }
6932
6933         /* setup interrupt control */
6934         /* set dummy read address to dummy page address */
6935         WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6936         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6937         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6938          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6939          */
6940         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6941         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6942         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6943         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6944
6945         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6946         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6947
6948         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6949                       IH_WPTR_OVERFLOW_CLEAR |
6950                       (rb_bufsz << 1));
6951
6952         if (rdev->wb.enabled)
6953                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6954
6955         /* set the writeback address whether it's enabled or not */
6956         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6957         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6958
6959         WREG32(IH_RB_CNTL, ih_rb_cntl);
6960
6961         /* set rptr, wptr to 0 */
6962         WREG32(IH_RB_RPTR, 0);
6963         WREG32(IH_RB_WPTR, 0);
6964
6965         /* Default settings for IH_CNTL (disabled at first) */
6966         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6967         /* RPTR_REARM only works if msi's are enabled */
6968         if (rdev->msi_enabled)
6969                 ih_cntl |= RPTR_REARM;
6970         WREG32(IH_CNTL, ih_cntl);
6971
6972         /* force the active interrupt state to all disabled */
6973         cik_disable_interrupt_state(rdev);
6974
6975         pci_set_master(rdev->pdev);
6976
6977         /* enable irqs */
6978         cik_enable_interrupts(rdev);
6979
6980         return ret;
6981 }
6982
6983 /**
6984  * cik_irq_set - enable/disable interrupt sources
6985  *
6986  * @rdev: radeon_device pointer
6987  *
6988  * Enable interrupt sources on the GPU (vblanks, hpd,
6989  * etc.) (CIK).
6990  * Returns 0 for success, errors for failure.
6991  */
6992 int cik_irq_set(struct radeon_device *rdev)
6993 {
6994         u32 cp_int_cntl;
6995         u32 cp_m1p0;
6996         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6997         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6998         u32 grbm_int_cntl = 0;
6999         u32 dma_cntl, dma_cntl1;
7000
7001         if (!rdev->irq.installed) {
7002                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7003                 return -EINVAL;
7004         }
7005         /* don't enable anything if the ih is disabled */
7006         if (!rdev->ih.enabled) {
7007                 cik_disable_interrupts(rdev);
7008                 /* force the active interrupt state to all disabled */
7009                 cik_disable_interrupt_state(rdev);
7010                 return 0;
7011         }
7012
7013         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7014                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7015         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7016
7017         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7018         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7019         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7020         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7021         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7022         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7023
7024         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7025         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7026
7027         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7028
7029         /* enable CP interrupts on all rings */
7030         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7031                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7032                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7033         }
7034         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7035                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7036                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7037                 if (ring->me == 1) {
7038                         switch (ring->pipe) {
7039                         case 0:
7040                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7041                                 break;
7042                         default:
7043                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7044                                 break;
7045                         }
7046                 } else {
7047                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7048                 }
7049         }
7050         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7051                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7052                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7053                 if (ring->me == 1) {
7054                         switch (ring->pipe) {
7055                         case 0:
7056                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7057                                 break;
7058                         default:
7059                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7060                                 break;
7061                         }
7062                 } else {
7063                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7064                 }
7065         }
7066
7067         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7068                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7069                 dma_cntl |= TRAP_ENABLE;
7070         }
7071
7072         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7073                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7074                 dma_cntl1 |= TRAP_ENABLE;
7075         }
7076
7077         if (rdev->irq.crtc_vblank_int[0] ||
7078             atomic_read(&rdev->irq.pflip[0])) {
7079                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7080                 crtc1 |= VBLANK_INTERRUPT_MASK;
7081         }
7082         if (rdev->irq.crtc_vblank_int[1] ||
7083             atomic_read(&rdev->irq.pflip[1])) {
7084                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7085                 crtc2 |= VBLANK_INTERRUPT_MASK;
7086         }
7087         if (rdev->irq.crtc_vblank_int[2] ||
7088             atomic_read(&rdev->irq.pflip[2])) {
7089                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7090                 crtc3 |= VBLANK_INTERRUPT_MASK;
7091         }
7092         if (rdev->irq.crtc_vblank_int[3] ||
7093             atomic_read(&rdev->irq.pflip[3])) {
7094                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7095                 crtc4 |= VBLANK_INTERRUPT_MASK;
7096         }
7097         if (rdev->irq.crtc_vblank_int[4] ||
7098             atomic_read(&rdev->irq.pflip[4])) {
7099                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7100                 crtc5 |= VBLANK_INTERRUPT_MASK;
7101         }
7102         if (rdev->irq.crtc_vblank_int[5] ||
7103             atomic_read(&rdev->irq.pflip[5])) {
7104                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7105                 crtc6 |= VBLANK_INTERRUPT_MASK;
7106         }
7107         if (rdev->irq.hpd[0]) {
7108                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7109                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7110         }
7111         if (rdev->irq.hpd[1]) {
7112                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7113                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7114         }
7115         if (rdev->irq.hpd[2]) {
7116                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7117                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7118         }
7119         if (rdev->irq.hpd[3]) {
7120                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7121                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7122         }
7123         if (rdev->irq.hpd[4]) {
7124                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7125                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7126         }
7127         if (rdev->irq.hpd[5]) {
7128                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7129                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7130         }
7131
7132         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7133
7134         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7135         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7136
7137         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7138
7139         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7140
7141         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7142         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7143         if (rdev->num_crtc >= 4) {
7144                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7145                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7146         }
7147         if (rdev->num_crtc >= 6) {
7148                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7149                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7150         }
7151
7152         if (rdev->num_crtc >= 2) {
7153                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7154                        GRPH_PFLIP_INT_MASK);
7155                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7156                        GRPH_PFLIP_INT_MASK);
7157         }
7158         if (rdev->num_crtc >= 4) {
7159                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7160                        GRPH_PFLIP_INT_MASK);
7161                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7162                        GRPH_PFLIP_INT_MASK);
7163         }
7164         if (rdev->num_crtc >= 6) {
7165                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7166                        GRPH_PFLIP_INT_MASK);
7167                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7168                        GRPH_PFLIP_INT_MASK);
7169         }
7170
7171         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7172         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7173         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7174         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7175         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7176         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7177
7178         /* posting read */
7179         RREG32(SRBM_STATUS);
7180
7181         return 0;
7182 }
7183
7184 /**
7185  * cik_irq_ack - ack interrupt sources
7186  *
7187  * @rdev: radeon_device pointer
7188  *
7189  * Ack interrupt sources on the GPU (vblanks, hpd,
7190  * etc.) (CIK).  Certain interrupts sources are sw
7191  * generated and do not require an explicit ack.
7192  */
7193 static inline void cik_irq_ack(struct radeon_device *rdev)
7194 {
7195         u32 tmp;
7196
7197         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7198         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7199         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7200         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7201         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7202         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7203         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7204
7205         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7206                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7207         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7208                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7209         if (rdev->num_crtc >= 4) {
7210                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7211                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7212                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7213                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7214         }
7215         if (rdev->num_crtc >= 6) {
7216                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7217                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7218                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7219                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7220         }
7221
7222         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7223                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7224                        GRPH_PFLIP_INT_CLEAR);
7225         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7226                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7227                        GRPH_PFLIP_INT_CLEAR);
7228         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7229                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7230         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7231                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7232         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7233                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7234         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7235                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7236
7237         if (rdev->num_crtc >= 4) {
7238                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7239                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7240                                GRPH_PFLIP_INT_CLEAR);
7241                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7242                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7243                                GRPH_PFLIP_INT_CLEAR);
7244                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7245                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7246                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7247                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7248                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7249                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7250                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7251                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7252         }
7253
7254         if (rdev->num_crtc >= 6) {
7255                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7256                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7257                                GRPH_PFLIP_INT_CLEAR);
7258                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7259                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7260                                GRPH_PFLIP_INT_CLEAR);
7261                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7262                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7263                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7264                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7265                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7266                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7267                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7268                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7269         }
7270
7271         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7272                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7273                 tmp |= DC_HPDx_INT_ACK;
7274                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7275         }
7276         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7277                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7278                 tmp |= DC_HPDx_INT_ACK;
7279                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7280         }
7281         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7282                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7283                 tmp |= DC_HPDx_INT_ACK;
7284                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7285         }
7286         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7287                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7288                 tmp |= DC_HPDx_INT_ACK;
7289                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7290         }
7291         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7292                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7293                 tmp |= DC_HPDx_INT_ACK;
7294                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7295         }
7296         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7297                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7298                 tmp |= DC_HPDx_INT_ACK;
7299                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7300         }
7301         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7302                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7303                 tmp |= DC_HPDx_RX_INT_ACK;
7304                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7305         }
7306         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7307                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7308                 tmp |= DC_HPDx_RX_INT_ACK;
7309                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7310         }
7311         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7312                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7313                 tmp |= DC_HPDx_RX_INT_ACK;
7314                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7315         }
7316         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7317                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7318                 tmp |= DC_HPDx_RX_INT_ACK;
7319                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7320         }
7321         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7322                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7323                 tmp |= DC_HPDx_RX_INT_ACK;
7324                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7325         }
7326         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7327                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7328                 tmp |= DC_HPDx_RX_INT_ACK;
7329                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7330         }
7331 }
7332
7333 /**
7334  * cik_irq_disable - disable interrupts
7335  *
7336  * @rdev: radeon_device pointer
7337  *
7338  * Disable interrupts on the hw (CIK).
7339  */
7340 static void cik_irq_disable(struct radeon_device *rdev)
7341 {
7342         cik_disable_interrupts(rdev);
7343         /* Wait and acknowledge irq */
7344         mdelay(1);
7345         cik_irq_ack(rdev);
7346         cik_disable_interrupt_state(rdev);
7347 }
7348
7349 /**
7350  * cik_irq_disable - disable interrupts for suspend
7351  *
7352  * @rdev: radeon_device pointer
7353  *
7354  * Disable interrupts and stop the RLC (CIK).
7355  * Used for suspend.
7356  */
7357 static void cik_irq_suspend(struct radeon_device *rdev)
7358 {
7359         cik_irq_disable(rdev);
7360         cik_rlc_stop(rdev);
7361 }
7362
7363 /**
7364  * cik_irq_fini - tear down interrupt support
7365  *
7366  * @rdev: radeon_device pointer
7367  *
7368  * Disable interrupts on the hw and free the IH ring
7369  * buffer (CIK).
7370  * Used for driver unload.
7371  */
7372 static void cik_irq_fini(struct radeon_device *rdev)
7373 {
7374         cik_irq_suspend(rdev);
7375         r600_ih_ring_fini(rdev);
7376 }
7377
7378 /**
7379  * cik_get_ih_wptr - get the IH ring buffer wptr
7380  *
7381  * @rdev: radeon_device pointer
7382  *
7383  * Get the IH ring buffer wptr from either the register
7384  * or the writeback memory buffer (CIK).  Also check for
7385  * ring buffer overflow and deal with it.
7386  * Used by cik_irq_process().
7387  * Returns the value of the wptr.
7388  */
7389 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7390 {
7391         u32 wptr, tmp;
7392
7393         if (rdev->wb.enabled)
7394                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7395         else
7396                 wptr = RREG32(IH_RB_WPTR);
7397
7398         if (wptr & RB_OVERFLOW) {
7399                 wptr &= ~RB_OVERFLOW;
7400                 /* When a ring buffer overflow happen start parsing interrupt
7401                  * from the last not overwritten vector (wptr + 16). Hopefully
7402                  * this should allow us to catchup.
7403                  */
7404                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7405                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7406                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7407                 tmp = RREG32(IH_RB_CNTL);
7408                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7409                 WREG32(IH_RB_CNTL, tmp);
7410         }
7411         return (wptr & rdev->ih.ptr_mask);
7412 }
7413
7414 /*        CIK IV Ring
7415  * Each IV ring entry is 128 bits:
7416  * [7:0]    - interrupt source id
7417  * [31:8]   - reserved
7418  * [59:32]  - interrupt source data
7419  * [63:60]  - reserved
7420  * [71:64]  - RINGID
7421  *            CP:
7422  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7423  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7424  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7425  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7426  *            PIPE_ID - ME0 0=3D
7427  *                    - ME1&2 compute dispatcher (4 pipes each)
7428  *            SDMA:
7429  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7430  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7431  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7432  * [79:72]  - VMID
7433  * [95:80]  - PASID
7434  * [127:96] - reserved
7435  */
7436 /**
7437  * cik_irq_process - interrupt handler
7438  *
7439  * @rdev: radeon_device pointer
7440  *
7441  * Interrupt hander (CIK).  Walk the IH ring,
7442  * ack interrupts and schedule work to handle
7443  * interrupt events.
7444  * Returns irq process return code.
7445  */
7446 int cik_irq_process(struct radeon_device *rdev)
7447 {
7448         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7449         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7450         u32 wptr;
7451         u32 rptr;
7452         u32 src_id, src_data, ring_id;
7453         u8 me_id, pipe_id, queue_id;
7454         u32 ring_index;
7455         bool queue_hotplug = false;
7456         bool queue_dp = false;
7457         bool queue_reset = false;
7458         u32 addr, status, mc_client;
7459         bool queue_thermal = false;
7460
7461         if (!rdev->ih.enabled || rdev->shutdown)
7462                 return IRQ_NONE;
7463
7464         wptr = cik_get_ih_wptr(rdev);
7465
7466 restart_ih:
7467         /* is somebody else already processing irqs? */
7468         if (atomic_xchg(&rdev->ih.lock, 1))
7469                 return IRQ_NONE;
7470
7471         rptr = rdev->ih.rptr;
7472         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7473
7474         /* Order reading of wptr vs. reading of IH ring data */
7475         rmb();
7476
7477         /* display interrupts */
7478         cik_irq_ack(rdev);
7479
7480         while (rptr != wptr) {
7481                 /* wptr/rptr are in bytes! */
7482                 ring_index = rptr / 4;
7483
7484                 radeon_kfd_interrupt(rdev,
7485                                 (const void *) &rdev->ih.ring[ring_index]);
7486
7487                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7488                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7489                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7490
7491                 switch (src_id) {
7492                 case 1: /* D1 vblank/vline */
7493                         switch (src_data) {
7494                         case 0: /* D1 vblank */
7495                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7496                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7497
7498                                 if (rdev->irq.crtc_vblank_int[0]) {
7499                                         drm_handle_vblank(rdev->ddev, 0);
7500                                         rdev->pm.vblank_sync = true;
7501                                         wake_up(&rdev->irq.vblank_queue);
7502                                 }
7503                                 if (atomic_read(&rdev->irq.pflip[0]))
7504                                         radeon_crtc_handle_vblank(rdev, 0);
7505                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7506                                 DRM_DEBUG("IH: D1 vblank\n");
7507
7508                                 break;
7509                         case 1: /* D1 vline */
7510                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7511                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7512
7513                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7514                                 DRM_DEBUG("IH: D1 vline\n");
7515
7516                                 break;
7517                         default:
7518                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7519                                 break;
7520                         }
7521                         break;
7522                 case 2: /* D2 vblank/vline */
7523                         switch (src_data) {
7524                         case 0: /* D2 vblank */
7525                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7526                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7527
7528                                 if (rdev->irq.crtc_vblank_int[1]) {
7529                                         drm_handle_vblank(rdev->ddev, 1);
7530                                         rdev->pm.vblank_sync = true;
7531                                         wake_up(&rdev->irq.vblank_queue);
7532                                 }
7533                                 if (atomic_read(&rdev->irq.pflip[1]))
7534                                         radeon_crtc_handle_vblank(rdev, 1);
7535                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7536                                 DRM_DEBUG("IH: D2 vblank\n");
7537
7538                                 break;
7539                         case 1: /* D2 vline */
7540                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7541                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7542
7543                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7544                                 DRM_DEBUG("IH: D2 vline\n");
7545
7546                                 break;
7547                         default:
7548                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7549                                 break;
7550                         }
7551                         break;
7552                 case 3: /* D3 vblank/vline */
7553                         switch (src_data) {
7554                         case 0: /* D3 vblank */
7555                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7556                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7557
7558                                 if (rdev->irq.crtc_vblank_int[2]) {
7559                                         drm_handle_vblank(rdev->ddev, 2);
7560                                         rdev->pm.vblank_sync = true;
7561                                         wake_up(&rdev->irq.vblank_queue);
7562                                 }
7563                                 if (atomic_read(&rdev->irq.pflip[2]))
7564                                         radeon_crtc_handle_vblank(rdev, 2);
7565                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7566                                 DRM_DEBUG("IH: D3 vblank\n");
7567
7568                                 break;
7569                         case 1: /* D3 vline */
7570                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7571                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7572
7573                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7574                                 DRM_DEBUG("IH: D3 vline\n");
7575
7576                                 break;
7577                         default:
7578                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7579                                 break;
7580                         }
7581                         break;
7582                 case 4: /* D4 vblank/vline */
7583                         switch (src_data) {
7584                         case 0: /* D4 vblank */
7585                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7586                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7587
7588                                 if (rdev->irq.crtc_vblank_int[3]) {
7589                                         drm_handle_vblank(rdev->ddev, 3);
7590                                         rdev->pm.vblank_sync = true;
7591                                         wake_up(&rdev->irq.vblank_queue);
7592                                 }
7593                                 if (atomic_read(&rdev->irq.pflip[3]))
7594                                         radeon_crtc_handle_vblank(rdev, 3);
7595                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7596                                 DRM_DEBUG("IH: D4 vblank\n");
7597
7598                                 break;
7599                         case 1: /* D4 vline */
7600                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7601                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7602
7603                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7604                                 DRM_DEBUG("IH: D4 vline\n");
7605
7606                                 break;
7607                         default:
7608                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7609                                 break;
7610                         }
7611                         break;
7612                 case 5: /* D5 vblank/vline */
7613                         switch (src_data) {
7614                         case 0: /* D5 vblank */
7615                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7616                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7617
7618                                 if (rdev->irq.crtc_vblank_int[4]) {
7619                                         drm_handle_vblank(rdev->ddev, 4);
7620                                         rdev->pm.vblank_sync = true;
7621                                         wake_up(&rdev->irq.vblank_queue);
7622                                 }
7623                                 if (atomic_read(&rdev->irq.pflip[4]))
7624                                         radeon_crtc_handle_vblank(rdev, 4);
7625                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7626                                 DRM_DEBUG("IH: D5 vblank\n");
7627
7628                                 break;
7629                         case 1: /* D5 vline */
7630                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7631                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7632
7633                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7634                                 DRM_DEBUG("IH: D5 vline\n");
7635
7636                                 break;
7637                         default:
7638                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7639                                 break;
7640                         }
7641                         break;
7642                 case 6: /* D6 vblank/vline */
7643                         switch (src_data) {
7644                         case 0: /* D6 vblank */
7645                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7646                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7647
7648                                 if (rdev->irq.crtc_vblank_int[5]) {
7649                                         drm_handle_vblank(rdev->ddev, 5);
7650                                         rdev->pm.vblank_sync = true;
7651                                         wake_up(&rdev->irq.vblank_queue);
7652                                 }
7653                                 if (atomic_read(&rdev->irq.pflip[5]))
7654                                         radeon_crtc_handle_vblank(rdev, 5);
7655                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7656                                 DRM_DEBUG("IH: D6 vblank\n");
7657
7658                                 break;
7659                         case 1: /* D6 vline */
7660                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7661                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7662
7663                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7664                                 DRM_DEBUG("IH: D6 vline\n");
7665
7666                                 break;
7667                         default:
7668                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7669                                 break;
7670                         }
7671                         break;
7672                 case 8: /* D1 page flip */
7673                 case 10: /* D2 page flip */
7674                 case 12: /* D3 page flip */
7675                 case 14: /* D4 page flip */
7676                 case 16: /* D5 page flip */
7677                 case 18: /* D6 page flip */
7678                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7679                         if (radeon_use_pflipirq > 0)
7680                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7681                         break;
7682                 case 42: /* HPD hotplug */
7683                         switch (src_data) {
7684                         case 0:
7685                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7686                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7687
7688                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7689                                 queue_hotplug = true;
7690                                 DRM_DEBUG("IH: HPD1\n");
7691
7692                                 break;
7693                         case 1:
7694                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7695                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7696
7697                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7698                                 queue_hotplug = true;
7699                                 DRM_DEBUG("IH: HPD2\n");
7700
7701                                 break;
7702                         case 2:
7703                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7704                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7705
7706                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7707                                 queue_hotplug = true;
7708                                 DRM_DEBUG("IH: HPD3\n");
7709
7710                                 break;
7711                         case 3:
7712                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7713                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7714
7715                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7716                                 queue_hotplug = true;
7717                                 DRM_DEBUG("IH: HPD4\n");
7718
7719                                 break;
7720                         case 4:
7721                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7722                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7723
7724                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7725                                 queue_hotplug = true;
7726                                 DRM_DEBUG("IH: HPD5\n");
7727
7728                                 break;
7729                         case 5:
7730                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7731                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7732
7733                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7734                                 queue_hotplug = true;
7735                                 DRM_DEBUG("IH: HPD6\n");
7736
7737                                 break;
7738                         case 6:
7739                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7740                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7741
7742                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7743                                 queue_dp = true;
7744                                 DRM_DEBUG("IH: HPD_RX 1\n");
7745
7746                                 break;
7747                         case 7:
7748                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7749                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7750
7751                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7752                                 queue_dp = true;
7753                                 DRM_DEBUG("IH: HPD_RX 2\n");
7754
7755                                 break;
7756                         case 8:
7757                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7758                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7759
7760                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7761                                 queue_dp = true;
7762                                 DRM_DEBUG("IH: HPD_RX 3\n");
7763
7764                                 break;
7765                         case 9:
7766                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7767                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7768
7769                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7770                                 queue_dp = true;
7771                                 DRM_DEBUG("IH: HPD_RX 4\n");
7772
7773                                 break;
7774                         case 10:
7775                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7776                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7777
7778                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7779                                 queue_dp = true;
7780                                 DRM_DEBUG("IH: HPD_RX 5\n");
7781
7782                                 break;
7783                         case 11:
7784                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7785                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7786
7787                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7788                                 queue_dp = true;
7789                                 DRM_DEBUG("IH: HPD_RX 6\n");
7790
7791                                 break;
7792                         default:
7793                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7794                                 break;
7795                         }
7796                         break;
7797                 case 96:
7798                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7799                         WREG32(SRBM_INT_ACK, 0x1);
7800                         break;
7801                 case 124: /* UVD */
7802                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7803                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7804                         break;
7805                 case 146:
7806                 case 147:
7807                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7808                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7809                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7810                         /* reset addr and status */
7811                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7812                         if (addr == 0x0 && status == 0x0)
7813                                 break;
7814                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7815                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7816                                 addr);
7817                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7818                                 status);
7819                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7820                         break;
7821                 case 167: /* VCE */
7822                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7823                         switch (src_data) {
7824                         case 0:
7825                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7826                                 break;
7827                         case 1:
7828                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7829                                 break;
7830                         default:
7831                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7832                                 break;
7833                         }
7834                         break;
7835                 case 176: /* GFX RB CP_INT */
7836                 case 177: /* GFX IB CP_INT */
7837                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7838                         break;
7839                 case 181: /* CP EOP event */
7840                         DRM_DEBUG("IH: CP EOP\n");
7841                         /* XXX check the bitfield order! */
7842                         me_id = (ring_id & 0x60) >> 5;
7843                         pipe_id = (ring_id & 0x18) >> 3;
7844                         queue_id = (ring_id & 0x7) >> 0;
7845                         switch (me_id) {
7846                         case 0:
7847                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7848                                 break;
7849                         case 1:
7850                         case 2:
7851                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7852                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7853                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7854                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7855                                 break;
7856                         }
7857                         break;
7858                 case 184: /* CP Privileged reg access */
7859                         DRM_ERROR("Illegal register access in command stream\n");
7860                         /* XXX check the bitfield order! */
7861                         me_id = (ring_id & 0x60) >> 5;
7862                         pipe_id = (ring_id & 0x18) >> 3;
7863                         queue_id = (ring_id & 0x7) >> 0;
7864                         switch (me_id) {
7865                         case 0:
7866                                 /* This results in a full GPU reset, but all we need to do is soft
7867                                  * reset the CP for gfx
7868                                  */
7869                                 queue_reset = true;
7870                                 break;
7871                         case 1:
7872                                 /* XXX compute */
7873                                 queue_reset = true;
7874                                 break;
7875                         case 2:
7876                                 /* XXX compute */
7877                                 queue_reset = true;
7878                                 break;
7879                         }
7880                         break;
7881                 case 185: /* CP Privileged inst */
7882                         DRM_ERROR("Illegal instruction in command stream\n");
7883                         /* XXX check the bitfield order! */
7884                         me_id = (ring_id & 0x60) >> 5;
7885                         pipe_id = (ring_id & 0x18) >> 3;
7886                         queue_id = (ring_id & 0x7) >> 0;
7887                         switch (me_id) {
7888                         case 0:
7889                                 /* This results in a full GPU reset, but all we need to do is soft
7890                                  * reset the CP for gfx
7891                                  */
7892                                 queue_reset = true;
7893                                 break;
7894                         case 1:
7895                                 /* XXX compute */
7896                                 queue_reset = true;
7897                                 break;
7898                         case 2:
7899                                 /* XXX compute */
7900                                 queue_reset = true;
7901                                 break;
7902                         }
7903                         break;
7904                 case 224: /* SDMA trap event */
7905                         /* XXX check the bitfield order! */
7906                         me_id = (ring_id & 0x3) >> 0;
7907                         queue_id = (ring_id & 0xc) >> 2;
7908                         DRM_DEBUG("IH: SDMA trap\n");
7909                         switch (me_id) {
7910                         case 0:
7911                                 switch (queue_id) {
7912                                 case 0:
7913                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7914                                         break;
7915                                 case 1:
7916                                         /* XXX compute */
7917                                         break;
7918                                 case 2:
7919                                         /* XXX compute */
7920                                         break;
7921                                 }
7922                                 break;
7923                         case 1:
7924                                 switch (queue_id) {
7925                                 case 0:
7926                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7927                                         break;
7928                                 case 1:
7929                                         /* XXX compute */
7930                                         break;
7931                                 case 2:
7932                                         /* XXX compute */
7933                                         break;
7934                                 }
7935                                 break;
7936                         }
7937                         break;
7938                 case 230: /* thermal low to high */
7939                         DRM_DEBUG("IH: thermal low to high\n");
7940                         rdev->pm.dpm.thermal.high_to_low = false;
7941                         queue_thermal = true;
7942                         break;
7943                 case 231: /* thermal high to low */
7944                         DRM_DEBUG("IH: thermal high to low\n");
7945                         rdev->pm.dpm.thermal.high_to_low = true;
7946                         queue_thermal = true;
7947                         break;
7948                 case 233: /* GUI IDLE */
7949                         DRM_DEBUG("IH: GUI idle\n");
7950                         break;
7951                 case 241: /* SDMA Privileged inst */
7952                 case 247: /* SDMA Privileged inst */
7953                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
7954                         /* XXX check the bitfield order! */
7955                         me_id = (ring_id & 0x3) >> 0;
7956                         queue_id = (ring_id & 0xc) >> 2;
7957                         switch (me_id) {
7958                         case 0:
7959                                 switch (queue_id) {
7960                                 case 0:
7961                                         queue_reset = true;
7962                                         break;
7963                                 case 1:
7964                                         /* XXX compute */
7965                                         queue_reset = true;
7966                                         break;
7967                                 case 2:
7968                                         /* XXX compute */
7969                                         queue_reset = true;
7970                                         break;
7971                                 }
7972                                 break;
7973                         case 1:
7974                                 switch (queue_id) {
7975                                 case 0:
7976                                         queue_reset = true;
7977                                         break;
7978                                 case 1:
7979                                         /* XXX compute */
7980                                         queue_reset = true;
7981                                         break;
7982                                 case 2:
7983                                         /* XXX compute */
7984                                         queue_reset = true;
7985                                         break;
7986                                 }
7987                                 break;
7988                         }
7989                         break;
7990                 default:
7991                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7992                         break;
7993                 }
7994
7995                 /* wptr/rptr are in bytes! */
7996                 rptr += 16;
7997                 rptr &= rdev->ih.ptr_mask;
7998                 WREG32(IH_RB_RPTR, rptr);
7999         }
8000         if (queue_dp)
8001                 schedule_work(&rdev->dp_work);
8002         if (queue_hotplug)
8003                 schedule_delayed_work(&rdev->hotplug_work, 0);
8004         if (queue_reset) {
8005                 rdev->needs_reset = true;
8006                 wake_up_all(&rdev->fence_queue);
8007         }
8008         if (queue_thermal)
8009                 schedule_work(&rdev->pm.dpm.thermal.work);
8010         rdev->ih.rptr = rptr;
8011         atomic_set(&rdev->ih.lock, 0);
8012
8013         /* make sure wptr hasn't changed while processing */
8014         wptr = cik_get_ih_wptr(rdev);
8015         if (wptr != rptr)
8016                 goto restart_ih;
8017
8018         return IRQ_HANDLED;
8019 }
8020
8021 /*
8022  * startup/shutdown callbacks
8023  */
8024 static void cik_uvd_init(struct radeon_device *rdev)
8025 {
8026         int r;
8027
8028         if (!rdev->has_uvd)
8029                 return;
8030
8031         r = radeon_uvd_init(rdev);
8032         if (r) {
8033                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8034                 /*
8035                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8036                  * to early fails cik_uvd_start() and thus nothing happens
8037                  * there. So it is pointless to try to go through that code
8038                  * hence why we disable uvd here.
8039                  */
8040                 rdev->has_uvd = 0;
8041                 return;
8042         }
8043         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8044         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8045 }
8046
8047 static void cik_uvd_start(struct radeon_device *rdev)
8048 {
8049         int r;
8050
8051         if (!rdev->has_uvd)
8052                 return;
8053
8054         r = radeon_uvd_resume(rdev);
8055         if (r) {
8056                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8057                 goto error;
8058         }
8059         r = uvd_v4_2_resume(rdev);
8060         if (r) {
8061                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8062                 goto error;
8063         }
8064         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8065         if (r) {
8066                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8067                 goto error;
8068         }
8069         return;
8070
8071 error:
8072         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8073 }
8074
8075 static void cik_uvd_resume(struct radeon_device *rdev)
8076 {
8077         struct radeon_ring *ring;
8078         int r;
8079
8080         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8081                 return;
8082
8083         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8084         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8085         if (r) {
8086                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8087                 return;
8088         }
8089         r = uvd_v1_0_init(rdev);
8090         if (r) {
8091                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8092                 return;
8093         }
8094 }
8095
8096 static void cik_vce_init(struct radeon_device *rdev)
8097 {
8098         int r;
8099
8100         if (!rdev->has_vce)
8101                 return;
8102
8103         r = radeon_vce_init(rdev);
8104         if (r) {
8105                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8106                 /*
8107                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8108                  * to early fails cik_vce_start() and thus nothing happens
8109                  * there. So it is pointless to try to go through that code
8110                  * hence why we disable vce here.
8111                  */
8112                 rdev->has_vce = 0;
8113                 return;
8114         }
8115         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8116         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8117         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8118         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8119 }
8120
8121 static void cik_vce_start(struct radeon_device *rdev)
8122 {
8123         int r;
8124
8125         if (!rdev->has_vce)
8126                 return;
8127
8128         r = radeon_vce_resume(rdev);
8129         if (r) {
8130                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8131                 goto error;
8132         }
8133         r = vce_v2_0_resume(rdev);
8134         if (r) {
8135                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8136                 goto error;
8137         }
8138         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8139         if (r) {
8140                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8141                 goto error;
8142         }
8143         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8144         if (r) {
8145                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8146                 goto error;
8147         }
8148         return;
8149
8150 error:
8151         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8152         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8153 }
8154
8155 static void cik_vce_resume(struct radeon_device *rdev)
8156 {
8157         struct radeon_ring *ring;
8158         int r;
8159
8160         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8161                 return;
8162
8163         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8164         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8165         if (r) {
8166                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8167                 return;
8168         }
8169         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8170         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8171         if (r) {
8172                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8173                 return;
8174         }
8175         r = vce_v1_0_init(rdev);
8176         if (r) {
8177                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8178                 return;
8179         }
8180 }
8181
8182 /**
8183  * cik_startup - program the asic to a functional state
8184  *
8185  * @rdev: radeon_device pointer
8186  *
8187  * Programs the asic to a functional state (CIK).
8188  * Called by cik_init() and cik_resume().
8189  * Returns 0 for success, error for failure.
8190  */
8191 static int cik_startup(struct radeon_device *rdev)
8192 {
8193         struct radeon_ring *ring;
8194         u32 nop;
8195         int r;
8196
8197         /* enable pcie gen2/3 link */
8198         cik_pcie_gen3_enable(rdev);
8199         /* enable aspm */
8200         cik_program_aspm(rdev);
8201
8202         /* scratch needs to be initialized before MC */
8203         r = r600_vram_scratch_init(rdev);
8204         if (r)
8205                 return r;
8206
8207         cik_mc_program(rdev);
8208
8209         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8210                 r = ci_mc_load_microcode(rdev);
8211                 if (r) {
8212                         DRM_ERROR("Failed to load MC firmware!\n");
8213                         return r;
8214                 }
8215         }
8216
8217         r = cik_pcie_gart_enable(rdev);
8218         if (r)
8219                 return r;
8220         cik_gpu_init(rdev);
8221
8222         /* allocate rlc buffers */
8223         if (rdev->flags & RADEON_IS_IGP) {
8224                 if (rdev->family == CHIP_KAVERI) {
8225                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8226                         rdev->rlc.reg_list_size =
8227                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8228                 } else {
8229                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8230                         rdev->rlc.reg_list_size =
8231                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8232                 }
8233         }
8234         rdev->rlc.cs_data = ci_cs_data;
8235         rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8236         rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8237         r = sumo_rlc_init(rdev);
8238         if (r) {
8239                 DRM_ERROR("Failed to init rlc BOs!\n");
8240                 return r;
8241         }
8242
8243         /* allocate wb buffer */
8244         r = radeon_wb_init(rdev);
8245         if (r)
8246                 return r;
8247
8248         /* allocate mec buffers */
8249         r = cik_mec_init(rdev);
8250         if (r) {
8251                 DRM_ERROR("Failed to init MEC BOs!\n");
8252                 return r;
8253         }
8254
8255         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8256         if (r) {
8257                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8258                 return r;
8259         }
8260
8261         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8262         if (r) {
8263                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8264                 return r;
8265         }
8266
8267         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8268         if (r) {
8269                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8270                 return r;
8271         }
8272
8273         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8274         if (r) {
8275                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8276                 return r;
8277         }
8278
8279         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8280         if (r) {
8281                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8282                 return r;
8283         }
8284
8285         cik_uvd_start(rdev);
8286         cik_vce_start(rdev);
8287
8288         /* Enable IRQ */
8289         if (!rdev->irq.installed) {
8290                 r = radeon_irq_kms_init(rdev);
8291                 if (r)
8292                         return r;
8293         }
8294
8295         r = cik_irq_init(rdev);
8296         if (r) {
8297                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8298                 radeon_irq_kms_fini(rdev);
8299                 return r;
8300         }
8301         cik_irq_set(rdev);
8302
8303         if (rdev->family == CHIP_HAWAII) {
8304                 if (rdev->new_fw)
8305                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8306                 else
8307                         nop = RADEON_CP_PACKET2;
8308         } else {
8309                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8310         }
8311
8312         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8313         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8314                              nop);
8315         if (r)
8316                 return r;
8317
8318         /* set up the compute queues */
8319         /* type-2 packets are deprecated on MEC, use type-3 instead */
8320         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8321         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8322                              nop);
8323         if (r)
8324                 return r;
8325         ring->me = 1; /* first MEC */
8326         ring->pipe = 0; /* first pipe */
8327         ring->queue = 0; /* first queue */
8328         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8329
8330         /* type-2 packets are deprecated on MEC, use type-3 instead */
8331         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8332         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8333                              nop);
8334         if (r)
8335                 return r;
8336         /* dGPU only have 1 MEC */
8337         ring->me = 1; /* first MEC */
8338         ring->pipe = 0; /* first pipe */
8339         ring->queue = 1; /* second queue */
8340         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8341
8342         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8343         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8344                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8345         if (r)
8346                 return r;
8347
8348         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8349         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8350                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8351         if (r)
8352                 return r;
8353
8354         r = cik_cp_resume(rdev);
8355         if (r)
8356                 return r;
8357
8358         r = cik_sdma_resume(rdev);
8359         if (r)
8360                 return r;
8361
8362         cik_uvd_resume(rdev);
8363         cik_vce_resume(rdev);
8364
8365         r = radeon_ib_pool_init(rdev);
8366         if (r) {
8367                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8368                 return r;
8369         }
8370
8371         r = radeon_vm_manager_init(rdev);
8372         if (r) {
8373                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8374                 return r;
8375         }
8376
8377         r = radeon_audio_init(rdev);
8378         if (r)
8379                 return r;
8380
8381         r = radeon_kfd_resume(rdev);
8382         if (r)
8383                 return r;
8384
8385         return 0;
8386 }
8387
8388 /**
8389  * cik_resume - resume the asic to a functional state
8390  *
8391  * @rdev: radeon_device pointer
8392  *
8393  * Programs the asic to a functional state (CIK).
8394  * Called at resume.
8395  * Returns 0 for success, error for failure.
8396  */
8397 int cik_resume(struct radeon_device *rdev)
8398 {
8399         int r;
8400
8401         /* post card */
8402         atom_asic_init(rdev->mode_info.atom_context);
8403
8404         /* init golden registers */
8405         cik_init_golden_registers(rdev);
8406
8407         if (rdev->pm.pm_method == PM_METHOD_DPM)
8408                 radeon_pm_resume(rdev);
8409
8410         rdev->accel_working = true;
8411         r = cik_startup(rdev);
8412         if (r) {
8413                 DRM_ERROR("cik startup failed on resume\n");
8414                 rdev->accel_working = false;
8415                 return r;
8416         }
8417
8418         return r;
8419
8420 }
8421
8422 /**
8423  * cik_suspend - suspend the asic
8424  *
8425  * @rdev: radeon_device pointer
8426  *
8427  * Bring the chip into a state suitable for suspend (CIK).
8428  * Called at suspend.
8429  * Returns 0 for success.
8430  */
8431 int cik_suspend(struct radeon_device *rdev)
8432 {
8433         radeon_kfd_suspend(rdev);
8434         radeon_pm_suspend(rdev);
8435         radeon_audio_fini(rdev);
8436         radeon_vm_manager_fini(rdev);
8437         cik_cp_enable(rdev, false);
8438         cik_sdma_enable(rdev, false);
8439         if (rdev->has_uvd) {
8440                 uvd_v1_0_fini(rdev);
8441                 radeon_uvd_suspend(rdev);
8442         }
8443         if (rdev->has_vce)
8444                 radeon_vce_suspend(rdev);
8445         cik_fini_pg(rdev);
8446         cik_fini_cg(rdev);
8447         cik_irq_suspend(rdev);
8448         radeon_wb_disable(rdev);
8449         cik_pcie_gart_disable(rdev);
8450         return 0;
8451 }
8452
8453 /* Plan is to move initialization in that function and use
8454  * helper function so that radeon_device_init pretty much
8455  * do nothing more than calling asic specific function. This
8456  * should also allow to remove a bunch of callback function
8457  * like vram_info.
8458  */
8459 /**
8460  * cik_init - asic specific driver and hw init
8461  *
8462  * @rdev: radeon_device pointer
8463  *
8464  * Setup asic specific driver variables and program the hw
8465  * to a functional state (CIK).
8466  * Called at driver startup.
8467  * Returns 0 for success, errors for failure.
8468  */
8469 int cik_init(struct radeon_device *rdev)
8470 {
8471         struct radeon_ring *ring;
8472         int r;
8473
8474         /* Read BIOS */
8475         if (!radeon_get_bios(rdev)) {
8476                 if (ASIC_IS_AVIVO(rdev))
8477                         return -EINVAL;
8478         }
8479         /* Must be an ATOMBIOS */
8480         if (!rdev->is_atom_bios) {
8481                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8482                 return -EINVAL;
8483         }
8484         r = radeon_atombios_init(rdev);
8485         if (r)
8486                 return r;
8487
8488         /* Post card if necessary */
8489         if (!radeon_card_posted(rdev)) {
8490                 if (!rdev->bios) {
8491                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8492                         return -EINVAL;
8493                 }
8494                 DRM_INFO("GPU not posted. posting now...\n");
8495                 atom_asic_init(rdev->mode_info.atom_context);
8496         }
8497         /* init golden registers */
8498         cik_init_golden_registers(rdev);
8499         /* Initialize scratch registers */
8500         cik_scratch_init(rdev);
8501         /* Initialize surface registers */
8502         radeon_surface_init(rdev);
8503         /* Initialize clocks */
8504         radeon_get_clock_info(rdev->ddev);
8505
8506         /* Fence driver */
8507         r = radeon_fence_driver_init(rdev);
8508         if (r)
8509                 return r;
8510
8511         /* initialize memory controller */
8512         r = cik_mc_init(rdev);
8513         if (r)
8514                 return r;
8515         /* Memory manager */
8516         r = radeon_bo_init(rdev);
8517         if (r)
8518                 return r;
8519
8520         if (rdev->flags & RADEON_IS_IGP) {
8521                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8522                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8523                         r = cik_init_microcode(rdev);
8524                         if (r) {
8525                                 DRM_ERROR("Failed to load firmware!\n");
8526                                 return r;
8527                         }
8528                 }
8529         } else {
8530                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8531                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8532                     !rdev->mc_fw) {
8533                         r = cik_init_microcode(rdev);
8534                         if (r) {
8535                                 DRM_ERROR("Failed to load firmware!\n");
8536                                 return r;
8537                         }
8538                 }
8539         }
8540
8541         /* Initialize power management */
8542         radeon_pm_init(rdev);
8543
8544         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8545         ring->ring_obj = NULL;
8546         r600_ring_init(rdev, ring, 1024 * 1024);
8547
8548         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8549         ring->ring_obj = NULL;
8550         r600_ring_init(rdev, ring, 1024 * 1024);
8551         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8552         if (r)
8553                 return r;
8554
8555         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8556         ring->ring_obj = NULL;
8557         r600_ring_init(rdev, ring, 1024 * 1024);
8558         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8559         if (r)
8560                 return r;
8561
8562         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8563         ring->ring_obj = NULL;
8564         r600_ring_init(rdev, ring, 256 * 1024);
8565
8566         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8567         ring->ring_obj = NULL;
8568         r600_ring_init(rdev, ring, 256 * 1024);
8569
8570         cik_uvd_init(rdev);
8571         cik_vce_init(rdev);
8572
8573         rdev->ih.ring_obj = NULL;
8574         r600_ih_ring_init(rdev, 64 * 1024);
8575
8576         r = r600_pcie_gart_init(rdev);
8577         if (r)
8578                 return r;
8579
8580         rdev->accel_working = true;
8581         r = cik_startup(rdev);
8582         if (r) {
8583                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8584                 cik_cp_fini(rdev);
8585                 cik_sdma_fini(rdev);
8586                 cik_irq_fini(rdev);
8587                 sumo_rlc_fini(rdev);
8588                 cik_mec_fini(rdev);
8589                 radeon_wb_fini(rdev);
8590                 radeon_ib_pool_fini(rdev);
8591                 radeon_vm_manager_fini(rdev);
8592                 radeon_irq_kms_fini(rdev);
8593                 cik_pcie_gart_fini(rdev);
8594                 rdev->accel_working = false;
8595         }
8596
8597         /* Don't start up if the MC ucode is missing.
8598          * The default clocks and voltages before the MC ucode
8599          * is loaded are not suffient for advanced operations.
8600          */
8601         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8602                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8603                 return -EINVAL;
8604         }
8605
8606         return 0;
8607 }
8608
8609 /**
8610  * cik_fini - asic specific driver and hw fini
8611  *
8612  * @rdev: radeon_device pointer
8613  *
8614  * Tear down the asic specific driver variables and program the hw
8615  * to an idle state (CIK).
8616  * Called at driver unload.
8617  */
8618 void cik_fini(struct radeon_device *rdev)
8619 {
8620         radeon_pm_fini(rdev);
8621         cik_cp_fini(rdev);
8622         cik_sdma_fini(rdev);
8623         cik_fini_pg(rdev);
8624         cik_fini_cg(rdev);
8625         cik_irq_fini(rdev);
8626         sumo_rlc_fini(rdev);
8627         cik_mec_fini(rdev);
8628         radeon_wb_fini(rdev);
8629         radeon_vm_manager_fini(rdev);
8630         radeon_ib_pool_fini(rdev);
8631         radeon_irq_kms_fini(rdev);
8632         uvd_v1_0_fini(rdev);
8633         radeon_uvd_fini(rdev);
8634         radeon_vce_fini(rdev);
8635         cik_pcie_gart_fini(rdev);
8636         r600_vram_scratch_fini(rdev);
8637         radeon_gem_fini(rdev);
8638         radeon_fence_driver_fini(rdev);
8639         radeon_bo_fini(rdev);
8640         radeon_atombios_fini(rdev);
8641         kfree(rdev->bios);
8642         rdev->bios = NULL;
8643 }
8644
8645 void dce8_program_fmt(struct drm_encoder *encoder)
8646 {
8647         struct drm_device *dev = encoder->dev;
8648         struct radeon_device *rdev = dev->dev_private;
8649         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8650         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8651         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8652         int bpc = 0;
8653         u32 tmp = 0;
8654         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8655
8656         if (connector) {
8657                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8658                 bpc = radeon_get_monitor_bpc(connector);
8659                 dither = radeon_connector->dither;
8660         }
8661
8662         /* LVDS/eDP FMT is set up by atom */
8663         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8664                 return;
8665
8666         /* not needed for analog */
8667         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8668             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8669                 return;
8670
8671         if (bpc == 0)
8672                 return;
8673
8674         switch (bpc) {
8675         case 6:
8676                 if (dither == RADEON_FMT_DITHER_ENABLE)
8677                         /* XXX sort out optimal dither settings */
8678                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8679                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8680                 else
8681                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8682                 break;
8683         case 8:
8684                 if (dither == RADEON_FMT_DITHER_ENABLE)
8685                         /* XXX sort out optimal dither settings */
8686                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8687                                 FMT_RGB_RANDOM_ENABLE |
8688                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8689                 else
8690                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8691                 break;
8692         case 10:
8693                 if (dither == RADEON_FMT_DITHER_ENABLE)
8694                         /* XXX sort out optimal dither settings */
8695                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8696                                 FMT_RGB_RANDOM_ENABLE |
8697                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8698                 else
8699                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8700                 break;
8701         default:
8702                 /* not needed */
8703                 break;
8704         }
8705
8706         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8707 }
8708
8709 /* display watermark setup */
8710 /**
8711  * dce8_line_buffer_adjust - Set up the line buffer
8712  *
8713  * @rdev: radeon_device pointer
8714  * @radeon_crtc: the selected display controller
8715  * @mode: the current display mode on the selected display
8716  * controller
8717  *
8718  * Setup up the line buffer allocation for
8719  * the selected display controller (CIK).
8720  * Returns the line buffer size in pixels.
8721  */
8722 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8723                                    struct radeon_crtc *radeon_crtc,
8724                                    struct drm_display_mode *mode)
8725 {
8726         u32 tmp, buffer_alloc, i;
8727         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8728         /*
8729          * Line Buffer Setup
8730          * There are 6 line buffers, one for each display controllers.
8731          * There are 3 partitions per LB. Select the number of partitions
8732          * to enable based on the display width.  For display widths larger
8733          * than 4096, you need use to use 2 display controllers and combine
8734          * them using the stereo blender.
8735          */
8736         if (radeon_crtc->base.enabled && mode) {
8737                 if (mode->crtc_hdisplay < 1920) {
8738                         tmp = 1;
8739                         buffer_alloc = 2;
8740                 } else if (mode->crtc_hdisplay < 2560) {
8741                         tmp = 2;
8742                         buffer_alloc = 2;
8743                 } else if (mode->crtc_hdisplay < 4096) {
8744                         tmp = 0;
8745                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8746                 } else {
8747                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8748                         tmp = 0;
8749                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8750                 }
8751         } else {
8752                 tmp = 1;
8753                 buffer_alloc = 0;
8754         }
8755
8756         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8757                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8758
8759         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8760                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8761         for (i = 0; i < rdev->usec_timeout; i++) {
8762                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8763                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8764                         break;
8765                 udelay(1);
8766         }
8767
8768         if (radeon_crtc->base.enabled && mode) {
8769                 switch (tmp) {
8770                 case 0:
8771                 default:
8772                         return 4096 * 2;
8773                 case 1:
8774                         return 1920 * 2;
8775                 case 2:
8776                         return 2560 * 2;
8777                 }
8778         }
8779
8780         /* controller not enabled, so no lb used */
8781         return 0;
8782 }
8783
8784 /**
8785  * cik_get_number_of_dram_channels - get the number of dram channels
8786  *
8787  * @rdev: radeon_device pointer
8788  *
8789  * Look up the number of video ram channels (CIK).
8790  * Used for display watermark bandwidth calculations
8791  * Returns the number of dram channels
8792  */
8793 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8794 {
8795         u32 tmp = RREG32(MC_SHARED_CHMAP);
8796
8797         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8798         case 0:
8799         default:
8800                 return 1;
8801         case 1:
8802                 return 2;
8803         case 2:
8804                 return 4;
8805         case 3:
8806                 return 8;
8807         case 4:
8808                 return 3;
8809         case 5:
8810                 return 6;
8811         case 6:
8812                 return 10;
8813         case 7:
8814                 return 12;
8815         case 8:
8816                 return 16;
8817         }
8818 }
8819
8820 struct dce8_wm_params {
8821         u32 dram_channels; /* number of dram channels */
8822         u32 yclk;          /* bandwidth per dram data pin in kHz */
8823         u32 sclk;          /* engine clock in kHz */
8824         u32 disp_clk;      /* display clock in kHz */
8825         u32 src_width;     /* viewport width */
8826         u32 active_time;   /* active display time in ns */
8827         u32 blank_time;    /* blank time in ns */
8828         bool interlaced;    /* mode is interlaced */
8829         fixed20_12 vsc;    /* vertical scale ratio */
8830         u32 num_heads;     /* number of active crtcs */
8831         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8832         u32 lb_size;       /* line buffer allocated to pipe */
8833         u32 vtaps;         /* vertical scaler taps */
8834 };
8835
8836 /**
8837  * dce8_dram_bandwidth - get the dram bandwidth
8838  *
8839  * @wm: watermark calculation data
8840  *
8841  * Calculate the raw dram bandwidth (CIK).
8842  * Used for display watermark bandwidth calculations
8843  * Returns the dram bandwidth in MBytes/s
8844  */
8845 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8846 {
8847         /* Calculate raw DRAM Bandwidth */
8848         fixed20_12 dram_efficiency; /* 0.7 */
8849         fixed20_12 yclk, dram_channels, bandwidth;
8850         fixed20_12 a;
8851
8852         a.full = dfixed_const(1000);
8853         yclk.full = dfixed_const(wm->yclk);
8854         yclk.full = dfixed_div(yclk, a);
8855         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8856         a.full = dfixed_const(10);
8857         dram_efficiency.full = dfixed_const(7);
8858         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8859         bandwidth.full = dfixed_mul(dram_channels, yclk);
8860         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8861
8862         return dfixed_trunc(bandwidth);
8863 }
8864
8865 /**
8866  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8867  *
8868  * @wm: watermark calculation data
8869  *
8870  * Calculate the dram bandwidth used for display (CIK).
8871  * Used for display watermark bandwidth calculations
8872  * Returns the dram bandwidth for display in MBytes/s
8873  */
8874 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8875 {
8876         /* Calculate DRAM Bandwidth and the part allocated to display. */
8877         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8878         fixed20_12 yclk, dram_channels, bandwidth;
8879         fixed20_12 a;
8880
8881         a.full = dfixed_const(1000);
8882         yclk.full = dfixed_const(wm->yclk);
8883         yclk.full = dfixed_div(yclk, a);
8884         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8885         a.full = dfixed_const(10);
8886         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8887         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8888         bandwidth.full = dfixed_mul(dram_channels, yclk);
8889         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8890
8891         return dfixed_trunc(bandwidth);
8892 }
8893
8894 /**
8895  * dce8_data_return_bandwidth - get the data return bandwidth
8896  *
8897  * @wm: watermark calculation data
8898  *
8899  * Calculate the data return bandwidth used for display (CIK).
8900  * Used for display watermark bandwidth calculations
8901  * Returns the data return bandwidth in MBytes/s
8902  */
8903 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8904 {
8905         /* Calculate the display Data return Bandwidth */
8906         fixed20_12 return_efficiency; /* 0.8 */
8907         fixed20_12 sclk, bandwidth;
8908         fixed20_12 a;
8909
8910         a.full = dfixed_const(1000);
8911         sclk.full = dfixed_const(wm->sclk);
8912         sclk.full = dfixed_div(sclk, a);
8913         a.full = dfixed_const(10);
8914         return_efficiency.full = dfixed_const(8);
8915         return_efficiency.full = dfixed_div(return_efficiency, a);
8916         a.full = dfixed_const(32);
8917         bandwidth.full = dfixed_mul(a, sclk);
8918         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8919
8920         return dfixed_trunc(bandwidth);
8921 }
8922
8923 /**
8924  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8925  *
8926  * @wm: watermark calculation data
8927  *
8928  * Calculate the dmif bandwidth used for display (CIK).
8929  * Used for display watermark bandwidth calculations
8930  * Returns the dmif bandwidth in MBytes/s
8931  */
8932 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8933 {
8934         /* Calculate the DMIF Request Bandwidth */
8935         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8936         fixed20_12 disp_clk, bandwidth;
8937         fixed20_12 a, b;
8938
8939         a.full = dfixed_const(1000);
8940         disp_clk.full = dfixed_const(wm->disp_clk);
8941         disp_clk.full = dfixed_div(disp_clk, a);
8942         a.full = dfixed_const(32);
8943         b.full = dfixed_mul(a, disp_clk);
8944
8945         a.full = dfixed_const(10);
8946         disp_clk_request_efficiency.full = dfixed_const(8);
8947         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8948
8949         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8950
8951         return dfixed_trunc(bandwidth);
8952 }
8953
8954 /**
8955  * dce8_available_bandwidth - get the min available bandwidth
8956  *
8957  * @wm: watermark calculation data
8958  *
8959  * Calculate the min available bandwidth used for display (CIK).
8960  * Used for display watermark bandwidth calculations
8961  * Returns the min available bandwidth in MBytes/s
8962  */
8963 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8964 {
8965         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8966         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8967         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8968         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8969
8970         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8971 }
8972
8973 /**
8974  * dce8_average_bandwidth - get the average available bandwidth
8975  *
8976  * @wm: watermark calculation data
8977  *
8978  * Calculate the average available bandwidth used for display (CIK).
8979  * Used for display watermark bandwidth calculations
8980  * Returns the average available bandwidth in MBytes/s
8981  */
8982 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8983 {
8984         /* Calculate the display mode Average Bandwidth
8985          * DisplayMode should contain the source and destination dimensions,
8986          * timing, etc.
8987          */
8988         fixed20_12 bpp;
8989         fixed20_12 line_time;
8990         fixed20_12 src_width;
8991         fixed20_12 bandwidth;
8992         fixed20_12 a;
8993
8994         a.full = dfixed_const(1000);
8995         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8996         line_time.full = dfixed_div(line_time, a);
8997         bpp.full = dfixed_const(wm->bytes_per_pixel);
8998         src_width.full = dfixed_const(wm->src_width);
8999         bandwidth.full = dfixed_mul(src_width, bpp);
9000         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9001         bandwidth.full = dfixed_div(bandwidth, line_time);
9002
9003         return dfixed_trunc(bandwidth);
9004 }
9005
9006 /**
9007  * dce8_latency_watermark - get the latency watermark
9008  *
9009  * @wm: watermark calculation data
9010  *
9011  * Calculate the latency watermark (CIK).
9012  * Used for display watermark bandwidth calculations
9013  * Returns the latency watermark in ns
9014  */
9015 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9016 {
9017         /* First calculate the latency in ns */
9018         u32 mc_latency = 2000; /* 2000 ns. */
9019         u32 available_bandwidth = dce8_available_bandwidth(wm);
9020         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9021         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9022         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9023         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9024                 (wm->num_heads * cursor_line_pair_return_time);
9025         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9026         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9027         u32 tmp, dmif_size = 12288;
9028         fixed20_12 a, b, c;
9029
9030         if (wm->num_heads == 0)
9031                 return 0;
9032
9033         a.full = dfixed_const(2);
9034         b.full = dfixed_const(1);
9035         if ((wm->vsc.full > a.full) ||
9036             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9037             (wm->vtaps >= 5) ||
9038             ((wm->vsc.full >= a.full) && wm->interlaced))
9039                 max_src_lines_per_dst_line = 4;
9040         else
9041                 max_src_lines_per_dst_line = 2;
9042
9043         a.full = dfixed_const(available_bandwidth);
9044         b.full = dfixed_const(wm->num_heads);
9045         a.full = dfixed_div(a, b);
9046         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9047         tmp = min(dfixed_trunc(a), tmp);
9048
9049         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9050
9051         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9052         b.full = dfixed_const(1000);
9053         c.full = dfixed_const(lb_fill_bw);
9054         b.full = dfixed_div(c, b);
9055         a.full = dfixed_div(a, b);
9056         line_fill_time = dfixed_trunc(a);
9057
9058         if (line_fill_time < wm->active_time)
9059                 return latency;
9060         else
9061                 return latency + (line_fill_time - wm->active_time);
9062
9063 }
9064
9065 /**
9066  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9067  * average and available dram bandwidth
9068  *
9069  * @wm: watermark calculation data
9070  *
9071  * Check if the display average bandwidth fits in the display
9072  * dram bandwidth (CIK).
9073  * Used for display watermark bandwidth calculations
9074  * Returns true if the display fits, false if not.
9075  */
9076 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9077 {
9078         if (dce8_average_bandwidth(wm) <=
9079             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9080                 return true;
9081         else
9082                 return false;
9083 }
9084
9085 /**
9086  * dce8_average_bandwidth_vs_available_bandwidth - check
9087  * average and available bandwidth
9088  *
9089  * @wm: watermark calculation data
9090  *
9091  * Check if the display average bandwidth fits in the display
9092  * available bandwidth (CIK).
9093  * Used for display watermark bandwidth calculations
9094  * Returns true if the display fits, false if not.
9095  */
9096 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9097 {
9098         if (dce8_average_bandwidth(wm) <=
9099             (dce8_available_bandwidth(wm) / wm->num_heads))
9100                 return true;
9101         else
9102                 return false;
9103 }
9104
9105 /**
9106  * dce8_check_latency_hiding - check latency hiding
9107  *
9108  * @wm: watermark calculation data
9109  *
9110  * Check latency hiding (CIK).
9111  * Used for display watermark bandwidth calculations
9112  * Returns true if the display fits, false if not.
9113  */
9114 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9115 {
9116         u32 lb_partitions = wm->lb_size / wm->src_width;
9117         u32 line_time = wm->active_time + wm->blank_time;
9118         u32 latency_tolerant_lines;
9119         u32 latency_hiding;
9120         fixed20_12 a;
9121
9122         a.full = dfixed_const(1);
9123         if (wm->vsc.full > a.full)
9124                 latency_tolerant_lines = 1;
9125         else {
9126                 if (lb_partitions <= (wm->vtaps + 1))
9127                         latency_tolerant_lines = 1;
9128                 else
9129                         latency_tolerant_lines = 2;
9130         }
9131
9132         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9133
9134         if (dce8_latency_watermark(wm) <= latency_hiding)
9135                 return true;
9136         else
9137                 return false;
9138 }
9139
9140 /**
9141  * dce8_program_watermarks - program display watermarks
9142  *
9143  * @rdev: radeon_device pointer
9144  * @radeon_crtc: the selected display controller
9145  * @lb_size: line buffer size
9146  * @num_heads: number of display controllers in use
9147  *
9148  * Calculate and program the display watermarks for the
9149  * selected display controller (CIK).
9150  */
9151 static void dce8_program_watermarks(struct radeon_device *rdev,
9152                                     struct radeon_crtc *radeon_crtc,
9153                                     u32 lb_size, u32 num_heads)
9154 {
9155         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9156         struct dce8_wm_params wm_low, wm_high;
9157         u32 active_time;
9158         u32 line_time = 0;
9159         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9160         u32 tmp, wm_mask;
9161
9162         if (radeon_crtc->base.enabled && num_heads && mode) {
9163                 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9164                                             (u32)mode->clock);
9165                 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9166                                           (u32)mode->clock);
9167                 line_time = min(line_time, (u32)65535);
9168
9169                 /* watermark for high clocks */
9170                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9171                     rdev->pm.dpm_enabled) {
9172                         wm_high.yclk =
9173                                 radeon_dpm_get_mclk(rdev, false) * 10;
9174                         wm_high.sclk =
9175                                 radeon_dpm_get_sclk(rdev, false) * 10;
9176                 } else {
9177                         wm_high.yclk = rdev->pm.current_mclk * 10;
9178                         wm_high.sclk = rdev->pm.current_sclk * 10;
9179                 }
9180
9181                 wm_high.disp_clk = mode->clock;
9182                 wm_high.src_width = mode->crtc_hdisplay;
9183                 wm_high.active_time = active_time;
9184                 wm_high.blank_time = line_time - wm_high.active_time;
9185                 wm_high.interlaced = false;
9186                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9187                         wm_high.interlaced = true;
9188                 wm_high.vsc = radeon_crtc->vsc;
9189                 wm_high.vtaps = 1;
9190                 if (radeon_crtc->rmx_type != RMX_OFF)
9191                         wm_high.vtaps = 2;
9192                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9193                 wm_high.lb_size = lb_size;
9194                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9195                 wm_high.num_heads = num_heads;
9196
9197                 /* set for high clocks */
9198                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9199
9200                 /* possibly force display priority to high */
9201                 /* should really do this at mode validation time... */
9202                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9203                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9204                     !dce8_check_latency_hiding(&wm_high) ||
9205                     (rdev->disp_priority == 2)) {
9206                         DRM_DEBUG_KMS("force priority to high\n");
9207                 }
9208
9209                 /* watermark for low clocks */
9210                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9211                     rdev->pm.dpm_enabled) {
9212                         wm_low.yclk =
9213                                 radeon_dpm_get_mclk(rdev, true) * 10;
9214                         wm_low.sclk =
9215                                 radeon_dpm_get_sclk(rdev, true) * 10;
9216                 } else {
9217                         wm_low.yclk = rdev->pm.current_mclk * 10;
9218                         wm_low.sclk = rdev->pm.current_sclk * 10;
9219                 }
9220
9221                 wm_low.disp_clk = mode->clock;
9222                 wm_low.src_width = mode->crtc_hdisplay;
9223                 wm_low.active_time = active_time;
9224                 wm_low.blank_time = line_time - wm_low.active_time;
9225                 wm_low.interlaced = false;
9226                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9227                         wm_low.interlaced = true;
9228                 wm_low.vsc = radeon_crtc->vsc;
9229                 wm_low.vtaps = 1;
9230                 if (radeon_crtc->rmx_type != RMX_OFF)
9231                         wm_low.vtaps = 2;
9232                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9233                 wm_low.lb_size = lb_size;
9234                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9235                 wm_low.num_heads = num_heads;
9236
9237                 /* set for low clocks */
9238                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9239
9240                 /* possibly force display priority to high */
9241                 /* should really do this at mode validation time... */
9242                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9243                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9244                     !dce8_check_latency_hiding(&wm_low) ||
9245                     (rdev->disp_priority == 2)) {
9246                         DRM_DEBUG_KMS("force priority to high\n");
9247                 }
9248
9249                 /* Save number of lines the linebuffer leads before the scanout */
9250                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9251         }
9252
9253         /* select wm A */
9254         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9255         tmp = wm_mask;
9256         tmp &= ~LATENCY_WATERMARK_MASK(3);
9257         tmp |= LATENCY_WATERMARK_MASK(1);
9258         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9259         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9260                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9261                 LATENCY_HIGH_WATERMARK(line_time)));
9262         /* select wm B */
9263         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9264         tmp &= ~LATENCY_WATERMARK_MASK(3);
9265         tmp |= LATENCY_WATERMARK_MASK(2);
9266         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9267         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9268                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9269                 LATENCY_HIGH_WATERMARK(line_time)));
9270         /* restore original selection */
9271         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9272
9273         /* save values for DPM */
9274         radeon_crtc->line_time = line_time;
9275         radeon_crtc->wm_high = latency_watermark_a;
9276         radeon_crtc->wm_low = latency_watermark_b;
9277 }
9278
9279 /**
9280  * dce8_bandwidth_update - program display watermarks
9281  *
9282  * @rdev: radeon_device pointer
9283  *
9284  * Calculate and program the display watermarks and line
9285  * buffer allocation (CIK).
9286  */
9287 void dce8_bandwidth_update(struct radeon_device *rdev)
9288 {
9289         struct drm_display_mode *mode = NULL;
9290         u32 num_heads = 0, lb_size;
9291         int i;
9292
9293         if (!rdev->mode_info.mode_config_initialized)
9294                 return;
9295
9296         radeon_update_display_priority(rdev);
9297
9298         for (i = 0; i < rdev->num_crtc; i++) {
9299                 if (rdev->mode_info.crtcs[i]->base.enabled)
9300                         num_heads++;
9301         }
9302         for (i = 0; i < rdev->num_crtc; i++) {
9303                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9304                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9305                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9306         }
9307 }
9308
9309 /**
9310  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9311  *
9312  * @rdev: radeon_device pointer
9313  *
9314  * Fetches a GPU clock counter snapshot (SI).
9315  * Returns the 64 bit clock counter snapshot.
9316  */
9317 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9318 {
9319         uint64_t clock;
9320
9321         mutex_lock(&rdev->gpu_clock_mutex);
9322         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9323         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9324                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9325         mutex_unlock(&rdev->gpu_clock_mutex);
9326         return clock;
9327 }
9328
9329 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9330                              u32 cntl_reg, u32 status_reg)
9331 {
9332         int r, i;
9333         struct atom_clock_dividers dividers;
9334         uint32_t tmp;
9335
9336         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9337                                            clock, false, &dividers);
9338         if (r)
9339                 return r;
9340
9341         tmp = RREG32_SMC(cntl_reg);
9342         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9343         tmp |= dividers.post_divider;
9344         WREG32_SMC(cntl_reg, tmp);
9345
9346         for (i = 0; i < 100; i++) {
9347                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9348                         break;
9349                 mdelay(10);
9350         }
9351         if (i == 100)
9352                 return -ETIMEDOUT;
9353
9354         return 0;
9355 }
9356
9357 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9358 {
9359         int r = 0;
9360
9361         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9362         if (r)
9363                 return r;
9364
9365         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9366         return r;
9367 }
9368
9369 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9370 {
9371         int r, i;
9372         struct atom_clock_dividers dividers;
9373         u32 tmp;
9374
9375         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9376                                            ecclk, false, &dividers);
9377         if (r)
9378                 return r;
9379
9380         for (i = 0; i < 100; i++) {
9381                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9382                         break;
9383                 mdelay(10);
9384         }
9385         if (i == 100)
9386                 return -ETIMEDOUT;
9387
9388         tmp = RREG32_SMC(CG_ECLK_CNTL);
9389         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9390         tmp |= dividers.post_divider;
9391         WREG32_SMC(CG_ECLK_CNTL, tmp);
9392
9393         for (i = 0; i < 100; i++) {
9394                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9395                         break;
9396                 mdelay(10);
9397         }
9398         if (i == 100)
9399                 return -ETIMEDOUT;
9400
9401         return 0;
9402 }
9403
9404 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9405 {
9406         struct pci_dev *root = rdev->pdev->bus->self;
9407         int bridge_pos, gpu_pos;
9408         u32 speed_cntl, mask, current_data_rate;
9409         int ret, i;
9410         u16 tmp16;
9411
9412         if (pci_is_root_bus(rdev->pdev->bus))
9413                 return;
9414
9415         if (radeon_pcie_gen2 == 0)
9416                 return;
9417
9418         if (rdev->flags & RADEON_IS_IGP)
9419                 return;
9420
9421         if (!(rdev->flags & RADEON_IS_PCIE))
9422                 return;
9423
9424         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9425         if (ret != 0)
9426                 return;
9427
9428         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9429                 return;
9430
9431         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9432         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9433                 LC_CURRENT_DATA_RATE_SHIFT;
9434         if (mask & DRM_PCIE_SPEED_80) {
9435                 if (current_data_rate == 2) {
9436                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9437                         return;
9438                 }
9439                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9440         } else if (mask & DRM_PCIE_SPEED_50) {
9441                 if (current_data_rate == 1) {
9442                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9443                         return;
9444                 }
9445                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9446         }
9447
9448         bridge_pos = pci_pcie_cap(root);
9449         if (!bridge_pos)
9450                 return;
9451
9452         gpu_pos = pci_pcie_cap(rdev->pdev);
9453         if (!gpu_pos)
9454                 return;
9455
9456         if (mask & DRM_PCIE_SPEED_80) {
9457                 /* re-try equalization if gen3 is not already enabled */
9458                 if (current_data_rate != 2) {
9459                         u16 bridge_cfg, gpu_cfg;
9460                         u16 bridge_cfg2, gpu_cfg2;
9461                         u32 max_lw, current_lw, tmp;
9462
9463                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9464                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9465
9466                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9467                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9468
9469                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9470                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9471
9472                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9473                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9474                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9475
9476                         if (current_lw < max_lw) {
9477                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9478                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9479                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9480                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9481                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9482                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9483                                 }
9484                         }
9485
9486                         for (i = 0; i < 10; i++) {
9487                                 /* check status */
9488                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9489                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9490                                         break;
9491
9492                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9493                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9494
9495                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9496                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9497
9498                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9499                                 tmp |= LC_SET_QUIESCE;
9500                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9501
9502                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9503                                 tmp |= LC_REDO_EQ;
9504                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9505
9506                                 mdelay(100);
9507
9508                                 /* linkctl */
9509                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9510                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9511                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9512                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9513
9514                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9515                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9516                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9517                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9518
9519                                 /* linkctl2 */
9520                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9521                                 tmp16 &= ~((1 << 4) | (7 << 9));
9522                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9523                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9524
9525                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9526                                 tmp16 &= ~((1 << 4) | (7 << 9));
9527                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9528                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9529
9530                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9531                                 tmp &= ~LC_SET_QUIESCE;
9532                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9533                         }
9534                 }
9535         }
9536
9537         /* set the link speed */
9538         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9539         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9540         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9541
9542         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9543         tmp16 &= ~0xf;
9544         if (mask & DRM_PCIE_SPEED_80)
9545                 tmp16 |= 3; /* gen3 */
9546         else if (mask & DRM_PCIE_SPEED_50)
9547                 tmp16 |= 2; /* gen2 */
9548         else
9549                 tmp16 |= 1; /* gen1 */
9550         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9551
9552         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9553         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9554         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9555
9556         for (i = 0; i < rdev->usec_timeout; i++) {
9557                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9558                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9559                         break;
9560                 udelay(1);
9561         }
9562 }
9563
9564 static void cik_program_aspm(struct radeon_device *rdev)
9565 {
9566         u32 data, orig;
9567         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9568         bool disable_clkreq = false;
9569
9570         if (radeon_aspm == 0)
9571                 return;
9572
9573         /* XXX double check IGPs */
9574         if (rdev->flags & RADEON_IS_IGP)
9575                 return;
9576
9577         if (!(rdev->flags & RADEON_IS_PCIE))
9578                 return;
9579
9580         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9581         data &= ~LC_XMIT_N_FTS_MASK;
9582         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9583         if (orig != data)
9584                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9585
9586         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9587         data |= LC_GO_TO_RECOVERY;
9588         if (orig != data)
9589                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9590
9591         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9592         data |= P_IGNORE_EDB_ERR;
9593         if (orig != data)
9594                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9595
9596         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9597         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9598         data |= LC_PMI_TO_L1_DIS;
9599         if (!disable_l0s)
9600                 data |= LC_L0S_INACTIVITY(7);
9601
9602         if (!disable_l1) {
9603                 data |= LC_L1_INACTIVITY(7);
9604                 data &= ~LC_PMI_TO_L1_DIS;
9605                 if (orig != data)
9606                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9607
9608                 if (!disable_plloff_in_l1) {
9609                         bool clk_req_support;
9610
9611                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9612                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9613                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9614                         if (orig != data)
9615                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9616
9617                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9618                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9619                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9620                         if (orig != data)
9621                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9622
9623                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9624                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9625                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9626                         if (orig != data)
9627                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9628
9629                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9630                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9631                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9632                         if (orig != data)
9633                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9634
9635                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9636                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9637                         data |= LC_DYN_LANES_PWR_STATE(3);
9638                         if (orig != data)
9639                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9640
9641                         if (!disable_clkreq &&
9642                             !pci_is_root_bus(rdev->pdev->bus)) {
9643                                 struct pci_dev *root = rdev->pdev->bus->self;
9644                                 u32 lnkcap;
9645
9646                                 clk_req_support = false;
9647                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9648                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9649                                         clk_req_support = true;
9650                         } else {
9651                                 clk_req_support = false;
9652                         }
9653
9654                         if (clk_req_support) {
9655                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9656                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9657                                 if (orig != data)
9658                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9659
9660                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9661                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9662                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9663                                 if (orig != data)
9664                                         WREG32_SMC(THM_CLK_CNTL, data);
9665
9666                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9667                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9668                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9669                                 if (orig != data)
9670                                         WREG32_SMC(MISC_CLK_CTRL, data);
9671
9672                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9673                                 data &= ~BCLK_AS_XCLK;
9674                                 if (orig != data)
9675                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9676
9677                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9678                                 data &= ~FORCE_BIF_REFCLK_EN;
9679                                 if (orig != data)
9680                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9681
9682                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9683                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9684                                 data |= MPLL_CLKOUT_SEL(4);
9685                                 if (orig != data)
9686                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9687                         }
9688                 }
9689         } else {
9690                 if (orig != data)
9691                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9692         }
9693
9694         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9695         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9696         if (orig != data)
9697                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9698
9699         if (!disable_l0s) {
9700                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9701                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9702                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9703                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9704                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9705                                 data &= ~LC_L0S_INACTIVITY_MASK;
9706                                 if (orig != data)
9707                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9708                         }
9709                 }
9710         }
9711 }