GNU Linux-libre 5.19-rc6-gnu
[releases.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include <linux/slab.h>
29
30 #include <drm/drm_vblank.h>
31
32 #include "atom.h"
33 #include "evergreen.h"
34 #include "cik_blit_shaders.h"
35 #include "cik.h"
36 #include "cikd.h"
37 #include "clearstate_ci.h"
38 #include "r600.h"
39 #include "radeon.h"
40 #include "radeon_asic.h"
41 #include "radeon_audio.h"
42 #include "radeon_ucode.h"
43 #include "si.h"
44 #include "vce.h"
45
46 #define SH_MEM_CONFIG_GFX_DEFAULT \
47         ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
48
49 /*(DEBLOBBED)*/
50
51 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
52 static void cik_rlc_stop(struct radeon_device *rdev);
53 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
54 static void cik_program_aspm(struct radeon_device *rdev);
55 static void cik_init_pg(struct radeon_device *rdev);
56 static void cik_init_cg(struct radeon_device *rdev);
57 static void cik_fini_pg(struct radeon_device *rdev);
58 static void cik_fini_cg(struct radeon_device *rdev);
59 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
60                                           bool enable);
61
62 /**
63  * cik_get_allowed_info_register - fetch the register for the info ioctl
64  *
65  * @rdev: radeon_device pointer
66  * @reg: register offset in bytes
67  * @val: register value
68  *
69  * Returns 0 for success or -EINVAL for an invalid register
70  *
71  */
72 int cik_get_allowed_info_register(struct radeon_device *rdev,
73                                   u32 reg, u32 *val)
74 {
75         switch (reg) {
76         case GRBM_STATUS:
77         case GRBM_STATUS2:
78         case GRBM_STATUS_SE0:
79         case GRBM_STATUS_SE1:
80         case GRBM_STATUS_SE2:
81         case GRBM_STATUS_SE3:
82         case SRBM_STATUS:
83         case SRBM_STATUS2:
84         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
85         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
86         case UVD_STATUS:
87         /* TODO VCE */
88                 *val = RREG32(reg);
89                 return 0;
90         default:
91                 return -EINVAL;
92         }
93 }
94
95 /*
96  * Indirect registers accessor
97  */
98 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
99 {
100         unsigned long flags;
101         u32 r;
102
103         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
104         WREG32(CIK_DIDT_IND_INDEX, (reg));
105         r = RREG32(CIK_DIDT_IND_DATA);
106         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
107         return r;
108 }
109
110 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
111 {
112         unsigned long flags;
113
114         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
115         WREG32(CIK_DIDT_IND_INDEX, (reg));
116         WREG32(CIK_DIDT_IND_DATA, (v));
117         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
118 }
119
120 /* get temperature in millidegrees */
121 int ci_get_temp(struct radeon_device *rdev)
122 {
123         u32 temp;
124         int actual_temp = 0;
125
126         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
127                 CTF_TEMP_SHIFT;
128
129         if (temp & 0x200)
130                 actual_temp = 255;
131         else
132                 actual_temp = temp & 0x1ff;
133
134         return actual_temp * 1000;
135 }
136
137 /* get temperature in millidegrees */
138 int kv_get_temp(struct radeon_device *rdev)
139 {
140         u32 temp;
141         int actual_temp = 0;
142
143         temp = RREG32_SMC(0xC0300E0C);
144
145         if (temp)
146                 actual_temp = (temp / 8) - 49;
147         else
148                 actual_temp = 0;
149
150         return actual_temp * 1000;
151 }
152
153 /*
154  * Indirect registers accessor
155  */
156 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
157 {
158         unsigned long flags;
159         u32 r;
160
161         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
162         WREG32(PCIE_INDEX, reg);
163         (void)RREG32(PCIE_INDEX);
164         r = RREG32(PCIE_DATA);
165         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
166         return r;
167 }
168
169 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
170 {
171         unsigned long flags;
172
173         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
174         WREG32(PCIE_INDEX, reg);
175         (void)RREG32(PCIE_INDEX);
176         WREG32(PCIE_DATA, v);
177         (void)RREG32(PCIE_DATA);
178         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
179 }
180
181 static const u32 spectre_rlc_save_restore_register_list[] =
182 {
183         (0x0e00 << 16) | (0xc12c >> 2),
184         0x00000000,
185         (0x0e00 << 16) | (0xc140 >> 2),
186         0x00000000,
187         (0x0e00 << 16) | (0xc150 >> 2),
188         0x00000000,
189         (0x0e00 << 16) | (0xc15c >> 2),
190         0x00000000,
191         (0x0e00 << 16) | (0xc168 >> 2),
192         0x00000000,
193         (0x0e00 << 16) | (0xc170 >> 2),
194         0x00000000,
195         (0x0e00 << 16) | (0xc178 >> 2),
196         0x00000000,
197         (0x0e00 << 16) | (0xc204 >> 2),
198         0x00000000,
199         (0x0e00 << 16) | (0xc2b4 >> 2),
200         0x00000000,
201         (0x0e00 << 16) | (0xc2b8 >> 2),
202         0x00000000,
203         (0x0e00 << 16) | (0xc2bc >> 2),
204         0x00000000,
205         (0x0e00 << 16) | (0xc2c0 >> 2),
206         0x00000000,
207         (0x0e00 << 16) | (0x8228 >> 2),
208         0x00000000,
209         (0x0e00 << 16) | (0x829c >> 2),
210         0x00000000,
211         (0x0e00 << 16) | (0x869c >> 2),
212         0x00000000,
213         (0x0600 << 16) | (0x98f4 >> 2),
214         0x00000000,
215         (0x0e00 << 16) | (0x98f8 >> 2),
216         0x00000000,
217         (0x0e00 << 16) | (0x9900 >> 2),
218         0x00000000,
219         (0x0e00 << 16) | (0xc260 >> 2),
220         0x00000000,
221         (0x0e00 << 16) | (0x90e8 >> 2),
222         0x00000000,
223         (0x0e00 << 16) | (0x3c000 >> 2),
224         0x00000000,
225         (0x0e00 << 16) | (0x3c00c >> 2),
226         0x00000000,
227         (0x0e00 << 16) | (0x8c1c >> 2),
228         0x00000000,
229         (0x0e00 << 16) | (0x9700 >> 2),
230         0x00000000,
231         (0x0e00 << 16) | (0xcd20 >> 2),
232         0x00000000,
233         (0x4e00 << 16) | (0xcd20 >> 2),
234         0x00000000,
235         (0x5e00 << 16) | (0xcd20 >> 2),
236         0x00000000,
237         (0x6e00 << 16) | (0xcd20 >> 2),
238         0x00000000,
239         (0x7e00 << 16) | (0xcd20 >> 2),
240         0x00000000,
241         (0x8e00 << 16) | (0xcd20 >> 2),
242         0x00000000,
243         (0x9e00 << 16) | (0xcd20 >> 2),
244         0x00000000,
245         (0xae00 << 16) | (0xcd20 >> 2),
246         0x00000000,
247         (0xbe00 << 16) | (0xcd20 >> 2),
248         0x00000000,
249         (0x0e00 << 16) | (0x89bc >> 2),
250         0x00000000,
251         (0x0e00 << 16) | (0x8900 >> 2),
252         0x00000000,
253         0x3,
254         (0x0e00 << 16) | (0xc130 >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0xc134 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xc1fc >> 2),
259         0x00000000,
260         (0x0e00 << 16) | (0xc208 >> 2),
261         0x00000000,
262         (0x0e00 << 16) | (0xc264 >> 2),
263         0x00000000,
264         (0x0e00 << 16) | (0xc268 >> 2),
265         0x00000000,
266         (0x0e00 << 16) | (0xc26c >> 2),
267         0x00000000,
268         (0x0e00 << 16) | (0xc270 >> 2),
269         0x00000000,
270         (0x0e00 << 16) | (0xc274 >> 2),
271         0x00000000,
272         (0x0e00 << 16) | (0xc278 >> 2),
273         0x00000000,
274         (0x0e00 << 16) | (0xc27c >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0xc280 >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0xc284 >> 2),
279         0x00000000,
280         (0x0e00 << 16) | (0xc288 >> 2),
281         0x00000000,
282         (0x0e00 << 16) | (0xc28c >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0xc290 >> 2),
285         0x00000000,
286         (0x0e00 << 16) | (0xc294 >> 2),
287         0x00000000,
288         (0x0e00 << 16) | (0xc298 >> 2),
289         0x00000000,
290         (0x0e00 << 16) | (0xc29c >> 2),
291         0x00000000,
292         (0x0e00 << 16) | (0xc2a0 >> 2),
293         0x00000000,
294         (0x0e00 << 16) | (0xc2a4 >> 2),
295         0x00000000,
296         (0x0e00 << 16) | (0xc2a8 >> 2),
297         0x00000000,
298         (0x0e00 << 16) | (0xc2ac  >> 2),
299         0x00000000,
300         (0x0e00 << 16) | (0xc2b0 >> 2),
301         0x00000000,
302         (0x0e00 << 16) | (0x301d0 >> 2),
303         0x00000000,
304         (0x0e00 << 16) | (0x30238 >> 2),
305         0x00000000,
306         (0x0e00 << 16) | (0x30250 >> 2),
307         0x00000000,
308         (0x0e00 << 16) | (0x30254 >> 2),
309         0x00000000,
310         (0x0e00 << 16) | (0x30258 >> 2),
311         0x00000000,
312         (0x0e00 << 16) | (0x3025c >> 2),
313         0x00000000,
314         (0x4e00 << 16) | (0xc900 >> 2),
315         0x00000000,
316         (0x5e00 << 16) | (0xc900 >> 2),
317         0x00000000,
318         (0x6e00 << 16) | (0xc900 >> 2),
319         0x00000000,
320         (0x7e00 << 16) | (0xc900 >> 2),
321         0x00000000,
322         (0x8e00 << 16) | (0xc900 >> 2),
323         0x00000000,
324         (0x9e00 << 16) | (0xc900 >> 2),
325         0x00000000,
326         (0xae00 << 16) | (0xc900 >> 2),
327         0x00000000,
328         (0xbe00 << 16) | (0xc900 >> 2),
329         0x00000000,
330         (0x4e00 << 16) | (0xc904 >> 2),
331         0x00000000,
332         (0x5e00 << 16) | (0xc904 >> 2),
333         0x00000000,
334         (0x6e00 << 16) | (0xc904 >> 2),
335         0x00000000,
336         (0x7e00 << 16) | (0xc904 >> 2),
337         0x00000000,
338         (0x8e00 << 16) | (0xc904 >> 2),
339         0x00000000,
340         (0x9e00 << 16) | (0xc904 >> 2),
341         0x00000000,
342         (0xae00 << 16) | (0xc904 >> 2),
343         0x00000000,
344         (0xbe00 << 16) | (0xc904 >> 2),
345         0x00000000,
346         (0x4e00 << 16) | (0xc908 >> 2),
347         0x00000000,
348         (0x5e00 << 16) | (0xc908 >> 2),
349         0x00000000,
350         (0x6e00 << 16) | (0xc908 >> 2),
351         0x00000000,
352         (0x7e00 << 16) | (0xc908 >> 2),
353         0x00000000,
354         (0x8e00 << 16) | (0xc908 >> 2),
355         0x00000000,
356         (0x9e00 << 16) | (0xc908 >> 2),
357         0x00000000,
358         (0xae00 << 16) | (0xc908 >> 2),
359         0x00000000,
360         (0xbe00 << 16) | (0xc908 >> 2),
361         0x00000000,
362         (0x4e00 << 16) | (0xc90c >> 2),
363         0x00000000,
364         (0x5e00 << 16) | (0xc90c >> 2),
365         0x00000000,
366         (0x6e00 << 16) | (0xc90c >> 2),
367         0x00000000,
368         (0x7e00 << 16) | (0xc90c >> 2),
369         0x00000000,
370         (0x8e00 << 16) | (0xc90c >> 2),
371         0x00000000,
372         (0x9e00 << 16) | (0xc90c >> 2),
373         0x00000000,
374         (0xae00 << 16) | (0xc90c >> 2),
375         0x00000000,
376         (0xbe00 << 16) | (0xc90c >> 2),
377         0x00000000,
378         (0x4e00 << 16) | (0xc910 >> 2),
379         0x00000000,
380         (0x5e00 << 16) | (0xc910 >> 2),
381         0x00000000,
382         (0x6e00 << 16) | (0xc910 >> 2),
383         0x00000000,
384         (0x7e00 << 16) | (0xc910 >> 2),
385         0x00000000,
386         (0x8e00 << 16) | (0xc910 >> 2),
387         0x00000000,
388         (0x9e00 << 16) | (0xc910 >> 2),
389         0x00000000,
390         (0xae00 << 16) | (0xc910 >> 2),
391         0x00000000,
392         (0xbe00 << 16) | (0xc910 >> 2),
393         0x00000000,
394         (0x0e00 << 16) | (0xc99c >> 2),
395         0x00000000,
396         (0x0e00 << 16) | (0x9834 >> 2),
397         0x00000000,
398         (0x0000 << 16) | (0x30f00 >> 2),
399         0x00000000,
400         (0x0001 << 16) | (0x30f00 >> 2),
401         0x00000000,
402         (0x0000 << 16) | (0x30f04 >> 2),
403         0x00000000,
404         (0x0001 << 16) | (0x30f04 >> 2),
405         0x00000000,
406         (0x0000 << 16) | (0x30f08 >> 2),
407         0x00000000,
408         (0x0001 << 16) | (0x30f08 >> 2),
409         0x00000000,
410         (0x0000 << 16) | (0x30f0c >> 2),
411         0x00000000,
412         (0x0001 << 16) | (0x30f0c >> 2),
413         0x00000000,
414         (0x0600 << 16) | (0x9b7c >> 2),
415         0x00000000,
416         (0x0e00 << 16) | (0x8a14 >> 2),
417         0x00000000,
418         (0x0e00 << 16) | (0x8a18 >> 2),
419         0x00000000,
420         (0x0600 << 16) | (0x30a00 >> 2),
421         0x00000000,
422         (0x0e00 << 16) | (0x8bf0 >> 2),
423         0x00000000,
424         (0x0e00 << 16) | (0x8bcc >> 2),
425         0x00000000,
426         (0x0e00 << 16) | (0x8b24 >> 2),
427         0x00000000,
428         (0x0e00 << 16) | (0x30a04 >> 2),
429         0x00000000,
430         (0x0600 << 16) | (0x30a10 >> 2),
431         0x00000000,
432         (0x0600 << 16) | (0x30a14 >> 2),
433         0x00000000,
434         (0x0600 << 16) | (0x30a18 >> 2),
435         0x00000000,
436         (0x0600 << 16) | (0x30a2c >> 2),
437         0x00000000,
438         (0x0e00 << 16) | (0xc700 >> 2),
439         0x00000000,
440         (0x0e00 << 16) | (0xc704 >> 2),
441         0x00000000,
442         (0x0e00 << 16) | (0xc708 >> 2),
443         0x00000000,
444         (0x0e00 << 16) | (0xc768 >> 2),
445         0x00000000,
446         (0x0400 << 16) | (0xc770 >> 2),
447         0x00000000,
448         (0x0400 << 16) | (0xc774 >> 2),
449         0x00000000,
450         (0x0400 << 16) | (0xc778 >> 2),
451         0x00000000,
452         (0x0400 << 16) | (0xc77c >> 2),
453         0x00000000,
454         (0x0400 << 16) | (0xc780 >> 2),
455         0x00000000,
456         (0x0400 << 16) | (0xc784 >> 2),
457         0x00000000,
458         (0x0400 << 16) | (0xc788 >> 2),
459         0x00000000,
460         (0x0400 << 16) | (0xc78c >> 2),
461         0x00000000,
462         (0x0400 << 16) | (0xc798 >> 2),
463         0x00000000,
464         (0x0400 << 16) | (0xc79c >> 2),
465         0x00000000,
466         (0x0400 << 16) | (0xc7a0 >> 2),
467         0x00000000,
468         (0x0400 << 16) | (0xc7a4 >> 2),
469         0x00000000,
470         (0x0400 << 16) | (0xc7a8 >> 2),
471         0x00000000,
472         (0x0400 << 16) | (0xc7ac >> 2),
473         0x00000000,
474         (0x0400 << 16) | (0xc7b0 >> 2),
475         0x00000000,
476         (0x0400 << 16) | (0xc7b4 >> 2),
477         0x00000000,
478         (0x0e00 << 16) | (0x9100 >> 2),
479         0x00000000,
480         (0x0e00 << 16) | (0x3c010 >> 2),
481         0x00000000,
482         (0x0e00 << 16) | (0x92a8 >> 2),
483         0x00000000,
484         (0x0e00 << 16) | (0x92ac >> 2),
485         0x00000000,
486         (0x0e00 << 16) | (0x92b4 >> 2),
487         0x00000000,
488         (0x0e00 << 16) | (0x92b8 >> 2),
489         0x00000000,
490         (0x0e00 << 16) | (0x92bc >> 2),
491         0x00000000,
492         (0x0e00 << 16) | (0x92c0 >> 2),
493         0x00000000,
494         (0x0e00 << 16) | (0x92c4 >> 2),
495         0x00000000,
496         (0x0e00 << 16) | (0x92c8 >> 2),
497         0x00000000,
498         (0x0e00 << 16) | (0x92cc >> 2),
499         0x00000000,
500         (0x0e00 << 16) | (0x92d0 >> 2),
501         0x00000000,
502         (0x0e00 << 16) | (0x8c00 >> 2),
503         0x00000000,
504         (0x0e00 << 16) | (0x8c04 >> 2),
505         0x00000000,
506         (0x0e00 << 16) | (0x8c20 >> 2),
507         0x00000000,
508         (0x0e00 << 16) | (0x8c38 >> 2),
509         0x00000000,
510         (0x0e00 << 16) | (0x8c3c >> 2),
511         0x00000000,
512         (0x0e00 << 16) | (0xae00 >> 2),
513         0x00000000,
514         (0x0e00 << 16) | (0x9604 >> 2),
515         0x00000000,
516         (0x0e00 << 16) | (0xac08 >> 2),
517         0x00000000,
518         (0x0e00 << 16) | (0xac0c >> 2),
519         0x00000000,
520         (0x0e00 << 16) | (0xac10 >> 2),
521         0x00000000,
522         (0x0e00 << 16) | (0xac14 >> 2),
523         0x00000000,
524         (0x0e00 << 16) | (0xac58 >> 2),
525         0x00000000,
526         (0x0e00 << 16) | (0xac68 >> 2),
527         0x00000000,
528         (0x0e00 << 16) | (0xac6c >> 2),
529         0x00000000,
530         (0x0e00 << 16) | (0xac70 >> 2),
531         0x00000000,
532         (0x0e00 << 16) | (0xac74 >> 2),
533         0x00000000,
534         (0x0e00 << 16) | (0xac78 >> 2),
535         0x00000000,
536         (0x0e00 << 16) | (0xac7c >> 2),
537         0x00000000,
538         (0x0e00 << 16) | (0xac80 >> 2),
539         0x00000000,
540         (0x0e00 << 16) | (0xac84 >> 2),
541         0x00000000,
542         (0x0e00 << 16) | (0xac88 >> 2),
543         0x00000000,
544         (0x0e00 << 16) | (0xac8c >> 2),
545         0x00000000,
546         (0x0e00 << 16) | (0x970c >> 2),
547         0x00000000,
548         (0x0e00 << 16) | (0x9714 >> 2),
549         0x00000000,
550         (0x0e00 << 16) | (0x9718 >> 2),
551         0x00000000,
552         (0x0e00 << 16) | (0x971c >> 2),
553         0x00000000,
554         (0x0e00 << 16) | (0x31068 >> 2),
555         0x00000000,
556         (0x4e00 << 16) | (0x31068 >> 2),
557         0x00000000,
558         (0x5e00 << 16) | (0x31068 >> 2),
559         0x00000000,
560         (0x6e00 << 16) | (0x31068 >> 2),
561         0x00000000,
562         (0x7e00 << 16) | (0x31068 >> 2),
563         0x00000000,
564         (0x8e00 << 16) | (0x31068 >> 2),
565         0x00000000,
566         (0x9e00 << 16) | (0x31068 >> 2),
567         0x00000000,
568         (0xae00 << 16) | (0x31068 >> 2),
569         0x00000000,
570         (0xbe00 << 16) | (0x31068 >> 2),
571         0x00000000,
572         (0x0e00 << 16) | (0xcd10 >> 2),
573         0x00000000,
574         (0x0e00 << 16) | (0xcd14 >> 2),
575         0x00000000,
576         (0x0e00 << 16) | (0x88b0 >> 2),
577         0x00000000,
578         (0x0e00 << 16) | (0x88b4 >> 2),
579         0x00000000,
580         (0x0e00 << 16) | (0x88b8 >> 2),
581         0x00000000,
582         (0x0e00 << 16) | (0x88bc >> 2),
583         0x00000000,
584         (0x0400 << 16) | (0x89c0 >> 2),
585         0x00000000,
586         (0x0e00 << 16) | (0x88c4 >> 2),
587         0x00000000,
588         (0x0e00 << 16) | (0x88c8 >> 2),
589         0x00000000,
590         (0x0e00 << 16) | (0x88d0 >> 2),
591         0x00000000,
592         (0x0e00 << 16) | (0x88d4 >> 2),
593         0x00000000,
594         (0x0e00 << 16) | (0x88d8 >> 2),
595         0x00000000,
596         (0x0e00 << 16) | (0x8980 >> 2),
597         0x00000000,
598         (0x0e00 << 16) | (0x30938 >> 2),
599         0x00000000,
600         (0x0e00 << 16) | (0x3093c >> 2),
601         0x00000000,
602         (0x0e00 << 16) | (0x30940 >> 2),
603         0x00000000,
604         (0x0e00 << 16) | (0x89a0 >> 2),
605         0x00000000,
606         (0x0e00 << 16) | (0x30900 >> 2),
607         0x00000000,
608         (0x0e00 << 16) | (0x30904 >> 2),
609         0x00000000,
610         (0x0e00 << 16) | (0x89b4 >> 2),
611         0x00000000,
612         (0x0e00 << 16) | (0x3c210 >> 2),
613         0x00000000,
614         (0x0e00 << 16) | (0x3c214 >> 2),
615         0x00000000,
616         (0x0e00 << 16) | (0x3c218 >> 2),
617         0x00000000,
618         (0x0e00 << 16) | (0x8904 >> 2),
619         0x00000000,
620         0x5,
621         (0x0e00 << 16) | (0x8c28 >> 2),
622         (0x0e00 << 16) | (0x8c2c >> 2),
623         (0x0e00 << 16) | (0x8c30 >> 2),
624         (0x0e00 << 16) | (0x8c34 >> 2),
625         (0x0e00 << 16) | (0x9600 >> 2),
626 };
627
628 static const u32 kalindi_rlc_save_restore_register_list[] =
629 {
630         (0x0e00 << 16) | (0xc12c >> 2),
631         0x00000000,
632         (0x0e00 << 16) | (0xc140 >> 2),
633         0x00000000,
634         (0x0e00 << 16) | (0xc150 >> 2),
635         0x00000000,
636         (0x0e00 << 16) | (0xc15c >> 2),
637         0x00000000,
638         (0x0e00 << 16) | (0xc168 >> 2),
639         0x00000000,
640         (0x0e00 << 16) | (0xc170 >> 2),
641         0x00000000,
642         (0x0e00 << 16) | (0xc204 >> 2),
643         0x00000000,
644         (0x0e00 << 16) | (0xc2b4 >> 2),
645         0x00000000,
646         (0x0e00 << 16) | (0xc2b8 >> 2),
647         0x00000000,
648         (0x0e00 << 16) | (0xc2bc >> 2),
649         0x00000000,
650         (0x0e00 << 16) | (0xc2c0 >> 2),
651         0x00000000,
652         (0x0e00 << 16) | (0x8228 >> 2),
653         0x00000000,
654         (0x0e00 << 16) | (0x829c >> 2),
655         0x00000000,
656         (0x0e00 << 16) | (0x869c >> 2),
657         0x00000000,
658         (0x0600 << 16) | (0x98f4 >> 2),
659         0x00000000,
660         (0x0e00 << 16) | (0x98f8 >> 2),
661         0x00000000,
662         (0x0e00 << 16) | (0x9900 >> 2),
663         0x00000000,
664         (0x0e00 << 16) | (0xc260 >> 2),
665         0x00000000,
666         (0x0e00 << 16) | (0x90e8 >> 2),
667         0x00000000,
668         (0x0e00 << 16) | (0x3c000 >> 2),
669         0x00000000,
670         (0x0e00 << 16) | (0x3c00c >> 2),
671         0x00000000,
672         (0x0e00 << 16) | (0x8c1c >> 2),
673         0x00000000,
674         (0x0e00 << 16) | (0x9700 >> 2),
675         0x00000000,
676         (0x0e00 << 16) | (0xcd20 >> 2),
677         0x00000000,
678         (0x4e00 << 16) | (0xcd20 >> 2),
679         0x00000000,
680         (0x5e00 << 16) | (0xcd20 >> 2),
681         0x00000000,
682         (0x6e00 << 16) | (0xcd20 >> 2),
683         0x00000000,
684         (0x7e00 << 16) | (0xcd20 >> 2),
685         0x00000000,
686         (0x0e00 << 16) | (0x89bc >> 2),
687         0x00000000,
688         (0x0e00 << 16) | (0x8900 >> 2),
689         0x00000000,
690         0x3,
691         (0x0e00 << 16) | (0xc130 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0xc134 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0xc1fc >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0xc208 >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0xc264 >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0xc268 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0xc26c >> 2),
704         0x00000000,
705         (0x0e00 << 16) | (0xc270 >> 2),
706         0x00000000,
707         (0x0e00 << 16) | (0xc274 >> 2),
708         0x00000000,
709         (0x0e00 << 16) | (0xc28c >> 2),
710         0x00000000,
711         (0x0e00 << 16) | (0xc290 >> 2),
712         0x00000000,
713         (0x0e00 << 16) | (0xc294 >> 2),
714         0x00000000,
715         (0x0e00 << 16) | (0xc298 >> 2),
716         0x00000000,
717         (0x0e00 << 16) | (0xc2a0 >> 2),
718         0x00000000,
719         (0x0e00 << 16) | (0xc2a4 >> 2),
720         0x00000000,
721         (0x0e00 << 16) | (0xc2a8 >> 2),
722         0x00000000,
723         (0x0e00 << 16) | (0xc2ac >> 2),
724         0x00000000,
725         (0x0e00 << 16) | (0x301d0 >> 2),
726         0x00000000,
727         (0x0e00 << 16) | (0x30238 >> 2),
728         0x00000000,
729         (0x0e00 << 16) | (0x30250 >> 2),
730         0x00000000,
731         (0x0e00 << 16) | (0x30254 >> 2),
732         0x00000000,
733         (0x0e00 << 16) | (0x30258 >> 2),
734         0x00000000,
735         (0x0e00 << 16) | (0x3025c >> 2),
736         0x00000000,
737         (0x4e00 << 16) | (0xc900 >> 2),
738         0x00000000,
739         (0x5e00 << 16) | (0xc900 >> 2),
740         0x00000000,
741         (0x6e00 << 16) | (0xc900 >> 2),
742         0x00000000,
743         (0x7e00 << 16) | (0xc900 >> 2),
744         0x00000000,
745         (0x4e00 << 16) | (0xc904 >> 2),
746         0x00000000,
747         (0x5e00 << 16) | (0xc904 >> 2),
748         0x00000000,
749         (0x6e00 << 16) | (0xc904 >> 2),
750         0x00000000,
751         (0x7e00 << 16) | (0xc904 >> 2),
752         0x00000000,
753         (0x4e00 << 16) | (0xc908 >> 2),
754         0x00000000,
755         (0x5e00 << 16) | (0xc908 >> 2),
756         0x00000000,
757         (0x6e00 << 16) | (0xc908 >> 2),
758         0x00000000,
759         (0x7e00 << 16) | (0xc908 >> 2),
760         0x00000000,
761         (0x4e00 << 16) | (0xc90c >> 2),
762         0x00000000,
763         (0x5e00 << 16) | (0xc90c >> 2),
764         0x00000000,
765         (0x6e00 << 16) | (0xc90c >> 2),
766         0x00000000,
767         (0x7e00 << 16) | (0xc90c >> 2),
768         0x00000000,
769         (0x4e00 << 16) | (0xc910 >> 2),
770         0x00000000,
771         (0x5e00 << 16) | (0xc910 >> 2),
772         0x00000000,
773         (0x6e00 << 16) | (0xc910 >> 2),
774         0x00000000,
775         (0x7e00 << 16) | (0xc910 >> 2),
776         0x00000000,
777         (0x0e00 << 16) | (0xc99c >> 2),
778         0x00000000,
779         (0x0e00 << 16) | (0x9834 >> 2),
780         0x00000000,
781         (0x0000 << 16) | (0x30f00 >> 2),
782         0x00000000,
783         (0x0000 << 16) | (0x30f04 >> 2),
784         0x00000000,
785         (0x0000 << 16) | (0x30f08 >> 2),
786         0x00000000,
787         (0x0000 << 16) | (0x30f0c >> 2),
788         0x00000000,
789         (0x0600 << 16) | (0x9b7c >> 2),
790         0x00000000,
791         (0x0e00 << 16) | (0x8a14 >> 2),
792         0x00000000,
793         (0x0e00 << 16) | (0x8a18 >> 2),
794         0x00000000,
795         (0x0600 << 16) | (0x30a00 >> 2),
796         0x00000000,
797         (0x0e00 << 16) | (0x8bf0 >> 2),
798         0x00000000,
799         (0x0e00 << 16) | (0x8bcc >> 2),
800         0x00000000,
801         (0x0e00 << 16) | (0x8b24 >> 2),
802         0x00000000,
803         (0x0e00 << 16) | (0x30a04 >> 2),
804         0x00000000,
805         (0x0600 << 16) | (0x30a10 >> 2),
806         0x00000000,
807         (0x0600 << 16) | (0x30a14 >> 2),
808         0x00000000,
809         (0x0600 << 16) | (0x30a18 >> 2),
810         0x00000000,
811         (0x0600 << 16) | (0x30a2c >> 2),
812         0x00000000,
813         (0x0e00 << 16) | (0xc700 >> 2),
814         0x00000000,
815         (0x0e00 << 16) | (0xc704 >> 2),
816         0x00000000,
817         (0x0e00 << 16) | (0xc708 >> 2),
818         0x00000000,
819         (0x0e00 << 16) | (0xc768 >> 2),
820         0x00000000,
821         (0x0400 << 16) | (0xc770 >> 2),
822         0x00000000,
823         (0x0400 << 16) | (0xc774 >> 2),
824         0x00000000,
825         (0x0400 << 16) | (0xc798 >> 2),
826         0x00000000,
827         (0x0400 << 16) | (0xc79c >> 2),
828         0x00000000,
829         (0x0e00 << 16) | (0x9100 >> 2),
830         0x00000000,
831         (0x0e00 << 16) | (0x3c010 >> 2),
832         0x00000000,
833         (0x0e00 << 16) | (0x8c00 >> 2),
834         0x00000000,
835         (0x0e00 << 16) | (0x8c04 >> 2),
836         0x00000000,
837         (0x0e00 << 16) | (0x8c20 >> 2),
838         0x00000000,
839         (0x0e00 << 16) | (0x8c38 >> 2),
840         0x00000000,
841         (0x0e00 << 16) | (0x8c3c >> 2),
842         0x00000000,
843         (0x0e00 << 16) | (0xae00 >> 2),
844         0x00000000,
845         (0x0e00 << 16) | (0x9604 >> 2),
846         0x00000000,
847         (0x0e00 << 16) | (0xac08 >> 2),
848         0x00000000,
849         (0x0e00 << 16) | (0xac0c >> 2),
850         0x00000000,
851         (0x0e00 << 16) | (0xac10 >> 2),
852         0x00000000,
853         (0x0e00 << 16) | (0xac14 >> 2),
854         0x00000000,
855         (0x0e00 << 16) | (0xac58 >> 2),
856         0x00000000,
857         (0x0e00 << 16) | (0xac68 >> 2),
858         0x00000000,
859         (0x0e00 << 16) | (0xac6c >> 2),
860         0x00000000,
861         (0x0e00 << 16) | (0xac70 >> 2),
862         0x00000000,
863         (0x0e00 << 16) | (0xac74 >> 2),
864         0x00000000,
865         (0x0e00 << 16) | (0xac78 >> 2),
866         0x00000000,
867         (0x0e00 << 16) | (0xac7c >> 2),
868         0x00000000,
869         (0x0e00 << 16) | (0xac80 >> 2),
870         0x00000000,
871         (0x0e00 << 16) | (0xac84 >> 2),
872         0x00000000,
873         (0x0e00 << 16) | (0xac88 >> 2),
874         0x00000000,
875         (0x0e00 << 16) | (0xac8c >> 2),
876         0x00000000,
877         (0x0e00 << 16) | (0x970c >> 2),
878         0x00000000,
879         (0x0e00 << 16) | (0x9714 >> 2),
880         0x00000000,
881         (0x0e00 << 16) | (0x9718 >> 2),
882         0x00000000,
883         (0x0e00 << 16) | (0x971c >> 2),
884         0x00000000,
885         (0x0e00 << 16) | (0x31068 >> 2),
886         0x00000000,
887         (0x4e00 << 16) | (0x31068 >> 2),
888         0x00000000,
889         (0x5e00 << 16) | (0x31068 >> 2),
890         0x00000000,
891         (0x6e00 << 16) | (0x31068 >> 2),
892         0x00000000,
893         (0x7e00 << 16) | (0x31068 >> 2),
894         0x00000000,
895         (0x0e00 << 16) | (0xcd10 >> 2),
896         0x00000000,
897         (0x0e00 << 16) | (0xcd14 >> 2),
898         0x00000000,
899         (0x0e00 << 16) | (0x88b0 >> 2),
900         0x00000000,
901         (0x0e00 << 16) | (0x88b4 >> 2),
902         0x00000000,
903         (0x0e00 << 16) | (0x88b8 >> 2),
904         0x00000000,
905         (0x0e00 << 16) | (0x88bc >> 2),
906         0x00000000,
907         (0x0400 << 16) | (0x89c0 >> 2),
908         0x00000000,
909         (0x0e00 << 16) | (0x88c4 >> 2),
910         0x00000000,
911         (0x0e00 << 16) | (0x88c8 >> 2),
912         0x00000000,
913         (0x0e00 << 16) | (0x88d0 >> 2),
914         0x00000000,
915         (0x0e00 << 16) | (0x88d4 >> 2),
916         0x00000000,
917         (0x0e00 << 16) | (0x88d8 >> 2),
918         0x00000000,
919         (0x0e00 << 16) | (0x8980 >> 2),
920         0x00000000,
921         (0x0e00 << 16) | (0x30938 >> 2),
922         0x00000000,
923         (0x0e00 << 16) | (0x3093c >> 2),
924         0x00000000,
925         (0x0e00 << 16) | (0x30940 >> 2),
926         0x00000000,
927         (0x0e00 << 16) | (0x89a0 >> 2),
928         0x00000000,
929         (0x0e00 << 16) | (0x30900 >> 2),
930         0x00000000,
931         (0x0e00 << 16) | (0x30904 >> 2),
932         0x00000000,
933         (0x0e00 << 16) | (0x89b4 >> 2),
934         0x00000000,
935         (0x0e00 << 16) | (0x3e1fc >> 2),
936         0x00000000,
937         (0x0e00 << 16) | (0x3c210 >> 2),
938         0x00000000,
939         (0x0e00 << 16) | (0x3c214 >> 2),
940         0x00000000,
941         (0x0e00 << 16) | (0x3c218 >> 2),
942         0x00000000,
943         (0x0e00 << 16) | (0x8904 >> 2),
944         0x00000000,
945         0x5,
946         (0x0e00 << 16) | (0x8c28 >> 2),
947         (0x0e00 << 16) | (0x8c2c >> 2),
948         (0x0e00 << 16) | (0x8c30 >> 2),
949         (0x0e00 << 16) | (0x8c34 >> 2),
950         (0x0e00 << 16) | (0x9600 >> 2),
951 };
952
953 static const u32 bonaire_golden_spm_registers[] =
954 {
955         0x30800, 0xe0ffffff, 0xe0000000
956 };
957
958 static const u32 bonaire_golden_common_registers[] =
959 {
960         0xc770, 0xffffffff, 0x00000800,
961         0xc774, 0xffffffff, 0x00000800,
962         0xc798, 0xffffffff, 0x00007fbf,
963         0xc79c, 0xffffffff, 0x00007faf
964 };
965
966 static const u32 bonaire_golden_registers[] =
967 {
968         0x3354, 0x00000333, 0x00000333,
969         0x3350, 0x000c0fc0, 0x00040200,
970         0x9a10, 0x00010000, 0x00058208,
971         0x3c000, 0xffff1fff, 0x00140000,
972         0x3c200, 0xfdfc0fff, 0x00000100,
973         0x3c234, 0x40000000, 0x40000200,
974         0x9830, 0xffffffff, 0x00000000,
975         0x9834, 0xf00fffff, 0x00000400,
976         0x9838, 0x0002021c, 0x00020200,
977         0xc78, 0x00000080, 0x00000000,
978         0x5bb0, 0x000000f0, 0x00000070,
979         0x5bc0, 0xf0311fff, 0x80300000,
980         0x98f8, 0x73773777, 0x12010001,
981         0x350c, 0x00810000, 0x408af000,
982         0x7030, 0x31000111, 0x00000011,
983         0x2f48, 0x73773777, 0x12010001,
984         0x220c, 0x00007fb6, 0x0021a1b1,
985         0x2210, 0x00007fb6, 0x002021b1,
986         0x2180, 0x00007fb6, 0x00002191,
987         0x2218, 0x00007fb6, 0x002121b1,
988         0x221c, 0x00007fb6, 0x002021b1,
989         0x21dc, 0x00007fb6, 0x00002191,
990         0x21e0, 0x00007fb6, 0x00002191,
991         0x3628, 0x0000003f, 0x0000000a,
992         0x362c, 0x0000003f, 0x0000000a,
993         0x2ae4, 0x00073ffe, 0x000022a2,
994         0x240c, 0x000007ff, 0x00000000,
995         0x8a14, 0xf000003f, 0x00000007,
996         0x8bf0, 0x00002001, 0x00000001,
997         0x8b24, 0xffffffff, 0x00ffffff,
998         0x30a04, 0x0000ff0f, 0x00000000,
999         0x28a4c, 0x07ffffff, 0x06000000,
1000         0x4d8, 0x00000fff, 0x00000100,
1001         0x3e78, 0x00000001, 0x00000002,
1002         0x9100, 0x03000000, 0x0362c688,
1003         0x8c00, 0x000000ff, 0x00000001,
1004         0xe40, 0x00001fff, 0x00001fff,
1005         0x9060, 0x0000007f, 0x00000020,
1006         0x9508, 0x00010000, 0x00010000,
1007         0xac14, 0x000003ff, 0x000000f3,
1008         0xac0c, 0xffffffff, 0x00001032
1009 };
1010
1011 static const u32 bonaire_mgcg_cgcg_init[] =
1012 {
1013         0xc420, 0xffffffff, 0xfffffffc,
1014         0x30800, 0xffffffff, 0xe0000000,
1015         0x3c2a0, 0xffffffff, 0x00000100,
1016         0x3c208, 0xffffffff, 0x00000100,
1017         0x3c2c0, 0xffffffff, 0xc0000100,
1018         0x3c2c8, 0xffffffff, 0xc0000100,
1019         0x3c2c4, 0xffffffff, 0xc0000100,
1020         0x55e4, 0xffffffff, 0x00600100,
1021         0x3c280, 0xffffffff, 0x00000100,
1022         0x3c214, 0xffffffff, 0x06000100,
1023         0x3c220, 0xffffffff, 0x00000100,
1024         0x3c218, 0xffffffff, 0x06000100,
1025         0x3c204, 0xffffffff, 0x00000100,
1026         0x3c2e0, 0xffffffff, 0x00000100,
1027         0x3c224, 0xffffffff, 0x00000100,
1028         0x3c200, 0xffffffff, 0x00000100,
1029         0x3c230, 0xffffffff, 0x00000100,
1030         0x3c234, 0xffffffff, 0x00000100,
1031         0x3c250, 0xffffffff, 0x00000100,
1032         0x3c254, 0xffffffff, 0x00000100,
1033         0x3c258, 0xffffffff, 0x00000100,
1034         0x3c25c, 0xffffffff, 0x00000100,
1035         0x3c260, 0xffffffff, 0x00000100,
1036         0x3c27c, 0xffffffff, 0x00000100,
1037         0x3c278, 0xffffffff, 0x00000100,
1038         0x3c210, 0xffffffff, 0x06000100,
1039         0x3c290, 0xffffffff, 0x00000100,
1040         0x3c274, 0xffffffff, 0x00000100,
1041         0x3c2b4, 0xffffffff, 0x00000100,
1042         0x3c2b0, 0xffffffff, 0x00000100,
1043         0x3c270, 0xffffffff, 0x00000100,
1044         0x30800, 0xffffffff, 0xe0000000,
1045         0x3c020, 0xffffffff, 0x00010000,
1046         0x3c024, 0xffffffff, 0x00030002,
1047         0x3c028, 0xffffffff, 0x00040007,
1048         0x3c02c, 0xffffffff, 0x00060005,
1049         0x3c030, 0xffffffff, 0x00090008,
1050         0x3c034, 0xffffffff, 0x00010000,
1051         0x3c038, 0xffffffff, 0x00030002,
1052         0x3c03c, 0xffffffff, 0x00040007,
1053         0x3c040, 0xffffffff, 0x00060005,
1054         0x3c044, 0xffffffff, 0x00090008,
1055         0x3c048, 0xffffffff, 0x00010000,
1056         0x3c04c, 0xffffffff, 0x00030002,
1057         0x3c050, 0xffffffff, 0x00040007,
1058         0x3c054, 0xffffffff, 0x00060005,
1059         0x3c058, 0xffffffff, 0x00090008,
1060         0x3c05c, 0xffffffff, 0x00010000,
1061         0x3c060, 0xffffffff, 0x00030002,
1062         0x3c064, 0xffffffff, 0x00040007,
1063         0x3c068, 0xffffffff, 0x00060005,
1064         0x3c06c, 0xffffffff, 0x00090008,
1065         0x3c070, 0xffffffff, 0x00010000,
1066         0x3c074, 0xffffffff, 0x00030002,
1067         0x3c078, 0xffffffff, 0x00040007,
1068         0x3c07c, 0xffffffff, 0x00060005,
1069         0x3c080, 0xffffffff, 0x00090008,
1070         0x3c084, 0xffffffff, 0x00010000,
1071         0x3c088, 0xffffffff, 0x00030002,
1072         0x3c08c, 0xffffffff, 0x00040007,
1073         0x3c090, 0xffffffff, 0x00060005,
1074         0x3c094, 0xffffffff, 0x00090008,
1075         0x3c098, 0xffffffff, 0x00010000,
1076         0x3c09c, 0xffffffff, 0x00030002,
1077         0x3c0a0, 0xffffffff, 0x00040007,
1078         0x3c0a4, 0xffffffff, 0x00060005,
1079         0x3c0a8, 0xffffffff, 0x00090008,
1080         0x3c000, 0xffffffff, 0x96e00200,
1081         0x8708, 0xffffffff, 0x00900100,
1082         0xc424, 0xffffffff, 0x0020003f,
1083         0x38, 0xffffffff, 0x0140001c,
1084         0x3c, 0x000f0000, 0x000f0000,
1085         0x220, 0xffffffff, 0xC060000C,
1086         0x224, 0xc0000fff, 0x00000100,
1087         0xf90, 0xffffffff, 0x00000100,
1088         0xf98, 0x00000101, 0x00000000,
1089         0x20a8, 0xffffffff, 0x00000104,
1090         0x55e4, 0xff000fff, 0x00000100,
1091         0x30cc, 0xc0000fff, 0x00000104,
1092         0xc1e4, 0x00000001, 0x00000001,
1093         0xd00c, 0xff000ff0, 0x00000100,
1094         0xd80c, 0xff000ff0, 0x00000100
1095 };
1096
1097 static const u32 spectre_golden_spm_registers[] =
1098 {
1099         0x30800, 0xe0ffffff, 0xe0000000
1100 };
1101
1102 static const u32 spectre_golden_common_registers[] =
1103 {
1104         0xc770, 0xffffffff, 0x00000800,
1105         0xc774, 0xffffffff, 0x00000800,
1106         0xc798, 0xffffffff, 0x00007fbf,
1107         0xc79c, 0xffffffff, 0x00007faf
1108 };
1109
1110 static const u32 spectre_golden_registers[] =
1111 {
1112         0x3c000, 0xffff1fff, 0x96940200,
1113         0x3c00c, 0xffff0001, 0xff000000,
1114         0x3c200, 0xfffc0fff, 0x00000100,
1115         0x6ed8, 0x00010101, 0x00010000,
1116         0x9834, 0xf00fffff, 0x00000400,
1117         0x9838, 0xfffffffc, 0x00020200,
1118         0x5bb0, 0x000000f0, 0x00000070,
1119         0x5bc0, 0xf0311fff, 0x80300000,
1120         0x98f8, 0x73773777, 0x12010001,
1121         0x9b7c, 0x00ff0000, 0x00fc0000,
1122         0x2f48, 0x73773777, 0x12010001,
1123         0x8a14, 0xf000003f, 0x00000007,
1124         0x8b24, 0xffffffff, 0x00ffffff,
1125         0x28350, 0x3f3f3fff, 0x00000082,
1126         0x28354, 0x0000003f, 0x00000000,
1127         0x3e78, 0x00000001, 0x00000002,
1128         0x913c, 0xffff03df, 0x00000004,
1129         0xc768, 0x00000008, 0x00000008,
1130         0x8c00, 0x000008ff, 0x00000800,
1131         0x9508, 0x00010000, 0x00010000,
1132         0xac0c, 0xffffffff, 0x54763210,
1133         0x214f8, 0x01ff01ff, 0x00000002,
1134         0x21498, 0x007ff800, 0x00200000,
1135         0x2015c, 0xffffffff, 0x00000f40,
1136         0x30934, 0xffffffff, 0x00000001
1137 };
1138
1139 static const u32 spectre_mgcg_cgcg_init[] =
1140 {
1141         0xc420, 0xffffffff, 0xfffffffc,
1142         0x30800, 0xffffffff, 0xe0000000,
1143         0x3c2a0, 0xffffffff, 0x00000100,
1144         0x3c208, 0xffffffff, 0x00000100,
1145         0x3c2c0, 0xffffffff, 0x00000100,
1146         0x3c2c8, 0xffffffff, 0x00000100,
1147         0x3c2c4, 0xffffffff, 0x00000100,
1148         0x55e4, 0xffffffff, 0x00600100,
1149         0x3c280, 0xffffffff, 0x00000100,
1150         0x3c214, 0xffffffff, 0x06000100,
1151         0x3c220, 0xffffffff, 0x00000100,
1152         0x3c218, 0xffffffff, 0x06000100,
1153         0x3c204, 0xffffffff, 0x00000100,
1154         0x3c2e0, 0xffffffff, 0x00000100,
1155         0x3c224, 0xffffffff, 0x00000100,
1156         0x3c200, 0xffffffff, 0x00000100,
1157         0x3c230, 0xffffffff, 0x00000100,
1158         0x3c234, 0xffffffff, 0x00000100,
1159         0x3c250, 0xffffffff, 0x00000100,
1160         0x3c254, 0xffffffff, 0x00000100,
1161         0x3c258, 0xffffffff, 0x00000100,
1162         0x3c25c, 0xffffffff, 0x00000100,
1163         0x3c260, 0xffffffff, 0x00000100,
1164         0x3c27c, 0xffffffff, 0x00000100,
1165         0x3c278, 0xffffffff, 0x00000100,
1166         0x3c210, 0xffffffff, 0x06000100,
1167         0x3c290, 0xffffffff, 0x00000100,
1168         0x3c274, 0xffffffff, 0x00000100,
1169         0x3c2b4, 0xffffffff, 0x00000100,
1170         0x3c2b0, 0xffffffff, 0x00000100,
1171         0x3c270, 0xffffffff, 0x00000100,
1172         0x30800, 0xffffffff, 0xe0000000,
1173         0x3c020, 0xffffffff, 0x00010000,
1174         0x3c024, 0xffffffff, 0x00030002,
1175         0x3c028, 0xffffffff, 0x00040007,
1176         0x3c02c, 0xffffffff, 0x00060005,
1177         0x3c030, 0xffffffff, 0x00090008,
1178         0x3c034, 0xffffffff, 0x00010000,
1179         0x3c038, 0xffffffff, 0x00030002,
1180         0x3c03c, 0xffffffff, 0x00040007,
1181         0x3c040, 0xffffffff, 0x00060005,
1182         0x3c044, 0xffffffff, 0x00090008,
1183         0x3c048, 0xffffffff, 0x00010000,
1184         0x3c04c, 0xffffffff, 0x00030002,
1185         0x3c050, 0xffffffff, 0x00040007,
1186         0x3c054, 0xffffffff, 0x00060005,
1187         0x3c058, 0xffffffff, 0x00090008,
1188         0x3c05c, 0xffffffff, 0x00010000,
1189         0x3c060, 0xffffffff, 0x00030002,
1190         0x3c064, 0xffffffff, 0x00040007,
1191         0x3c068, 0xffffffff, 0x00060005,
1192         0x3c06c, 0xffffffff, 0x00090008,
1193         0x3c070, 0xffffffff, 0x00010000,
1194         0x3c074, 0xffffffff, 0x00030002,
1195         0x3c078, 0xffffffff, 0x00040007,
1196         0x3c07c, 0xffffffff, 0x00060005,
1197         0x3c080, 0xffffffff, 0x00090008,
1198         0x3c084, 0xffffffff, 0x00010000,
1199         0x3c088, 0xffffffff, 0x00030002,
1200         0x3c08c, 0xffffffff, 0x00040007,
1201         0x3c090, 0xffffffff, 0x00060005,
1202         0x3c094, 0xffffffff, 0x00090008,
1203         0x3c098, 0xffffffff, 0x00010000,
1204         0x3c09c, 0xffffffff, 0x00030002,
1205         0x3c0a0, 0xffffffff, 0x00040007,
1206         0x3c0a4, 0xffffffff, 0x00060005,
1207         0x3c0a8, 0xffffffff, 0x00090008,
1208         0x3c0ac, 0xffffffff, 0x00010000,
1209         0x3c0b0, 0xffffffff, 0x00030002,
1210         0x3c0b4, 0xffffffff, 0x00040007,
1211         0x3c0b8, 0xffffffff, 0x00060005,
1212         0x3c0bc, 0xffffffff, 0x00090008,
1213         0x3c000, 0xffffffff, 0x96e00200,
1214         0x8708, 0xffffffff, 0x00900100,
1215         0xc424, 0xffffffff, 0x0020003f,
1216         0x38, 0xffffffff, 0x0140001c,
1217         0x3c, 0x000f0000, 0x000f0000,
1218         0x220, 0xffffffff, 0xC060000C,
1219         0x224, 0xc0000fff, 0x00000100,
1220         0xf90, 0xffffffff, 0x00000100,
1221         0xf98, 0x00000101, 0x00000000,
1222         0x20a8, 0xffffffff, 0x00000104,
1223         0x55e4, 0xff000fff, 0x00000100,
1224         0x30cc, 0xc0000fff, 0x00000104,
1225         0xc1e4, 0x00000001, 0x00000001,
1226         0xd00c, 0xff000ff0, 0x00000100,
1227         0xd80c, 0xff000ff0, 0x00000100
1228 };
1229
1230 static const u32 kalindi_golden_spm_registers[] =
1231 {
1232         0x30800, 0xe0ffffff, 0xe0000000
1233 };
1234
1235 static const u32 kalindi_golden_common_registers[] =
1236 {
1237         0xc770, 0xffffffff, 0x00000800,
1238         0xc774, 0xffffffff, 0x00000800,
1239         0xc798, 0xffffffff, 0x00007fbf,
1240         0xc79c, 0xffffffff, 0x00007faf
1241 };
1242
1243 static const u32 kalindi_golden_registers[] =
1244 {
1245         0x3c000, 0xffffdfff, 0x6e944040,
1246         0x55e4, 0xff607fff, 0xfc000100,
1247         0x3c220, 0xff000fff, 0x00000100,
1248         0x3c224, 0xff000fff, 0x00000100,
1249         0x3c200, 0xfffc0fff, 0x00000100,
1250         0x6ed8, 0x00010101, 0x00010000,
1251         0x9830, 0xffffffff, 0x00000000,
1252         0x9834, 0xf00fffff, 0x00000400,
1253         0x5bb0, 0x000000f0, 0x00000070,
1254         0x5bc0, 0xf0311fff, 0x80300000,
1255         0x98f8, 0x73773777, 0x12010001,
1256         0x98fc, 0xffffffff, 0x00000010,
1257         0x9b7c, 0x00ff0000, 0x00fc0000,
1258         0x8030, 0x00001f0f, 0x0000100a,
1259         0x2f48, 0x73773777, 0x12010001,
1260         0x2408, 0x000fffff, 0x000c007f,
1261         0x8a14, 0xf000003f, 0x00000007,
1262         0x8b24, 0x3fff3fff, 0x00ffcfff,
1263         0x30a04, 0x0000ff0f, 0x00000000,
1264         0x28a4c, 0x07ffffff, 0x06000000,
1265         0x4d8, 0x00000fff, 0x00000100,
1266         0x3e78, 0x00000001, 0x00000002,
1267         0xc768, 0x00000008, 0x00000008,
1268         0x8c00, 0x000000ff, 0x00000003,
1269         0x214f8, 0x01ff01ff, 0x00000002,
1270         0x21498, 0x007ff800, 0x00200000,
1271         0x2015c, 0xffffffff, 0x00000f40,
1272         0x88c4, 0x001f3ae3, 0x00000082,
1273         0x88d4, 0x0000001f, 0x00000010,
1274         0x30934, 0xffffffff, 0x00000000
1275 };
1276
1277 static const u32 kalindi_mgcg_cgcg_init[] =
1278 {
1279         0xc420, 0xffffffff, 0xfffffffc,
1280         0x30800, 0xffffffff, 0xe0000000,
1281         0x3c2a0, 0xffffffff, 0x00000100,
1282         0x3c208, 0xffffffff, 0x00000100,
1283         0x3c2c0, 0xffffffff, 0x00000100,
1284         0x3c2c8, 0xffffffff, 0x00000100,
1285         0x3c2c4, 0xffffffff, 0x00000100,
1286         0x55e4, 0xffffffff, 0x00600100,
1287         0x3c280, 0xffffffff, 0x00000100,
1288         0x3c214, 0xffffffff, 0x06000100,
1289         0x3c220, 0xffffffff, 0x00000100,
1290         0x3c218, 0xffffffff, 0x06000100,
1291         0x3c204, 0xffffffff, 0x00000100,
1292         0x3c2e0, 0xffffffff, 0x00000100,
1293         0x3c224, 0xffffffff, 0x00000100,
1294         0x3c200, 0xffffffff, 0x00000100,
1295         0x3c230, 0xffffffff, 0x00000100,
1296         0x3c234, 0xffffffff, 0x00000100,
1297         0x3c250, 0xffffffff, 0x00000100,
1298         0x3c254, 0xffffffff, 0x00000100,
1299         0x3c258, 0xffffffff, 0x00000100,
1300         0x3c25c, 0xffffffff, 0x00000100,
1301         0x3c260, 0xffffffff, 0x00000100,
1302         0x3c27c, 0xffffffff, 0x00000100,
1303         0x3c278, 0xffffffff, 0x00000100,
1304         0x3c210, 0xffffffff, 0x06000100,
1305         0x3c290, 0xffffffff, 0x00000100,
1306         0x3c274, 0xffffffff, 0x00000100,
1307         0x3c2b4, 0xffffffff, 0x00000100,
1308         0x3c2b0, 0xffffffff, 0x00000100,
1309         0x3c270, 0xffffffff, 0x00000100,
1310         0x30800, 0xffffffff, 0xe0000000,
1311         0x3c020, 0xffffffff, 0x00010000,
1312         0x3c024, 0xffffffff, 0x00030002,
1313         0x3c028, 0xffffffff, 0x00040007,
1314         0x3c02c, 0xffffffff, 0x00060005,
1315         0x3c030, 0xffffffff, 0x00090008,
1316         0x3c034, 0xffffffff, 0x00010000,
1317         0x3c038, 0xffffffff, 0x00030002,
1318         0x3c03c, 0xffffffff, 0x00040007,
1319         0x3c040, 0xffffffff, 0x00060005,
1320         0x3c044, 0xffffffff, 0x00090008,
1321         0x3c000, 0xffffffff, 0x96e00200,
1322         0x8708, 0xffffffff, 0x00900100,
1323         0xc424, 0xffffffff, 0x0020003f,
1324         0x38, 0xffffffff, 0x0140001c,
1325         0x3c, 0x000f0000, 0x000f0000,
1326         0x220, 0xffffffff, 0xC060000C,
1327         0x224, 0xc0000fff, 0x00000100,
1328         0x20a8, 0xffffffff, 0x00000104,
1329         0x55e4, 0xff000fff, 0x00000100,
1330         0x30cc, 0xc0000fff, 0x00000104,
1331         0xc1e4, 0x00000001, 0x00000001,
1332         0xd00c, 0xff000ff0, 0x00000100,
1333         0xd80c, 0xff000ff0, 0x00000100
1334 };
1335
1336 static const u32 hawaii_golden_spm_registers[] =
1337 {
1338         0x30800, 0xe0ffffff, 0xe0000000
1339 };
1340
1341 static const u32 hawaii_golden_common_registers[] =
1342 {
1343         0x30800, 0xffffffff, 0xe0000000,
1344         0x28350, 0xffffffff, 0x3a00161a,
1345         0x28354, 0xffffffff, 0x0000002e,
1346         0x9a10, 0xffffffff, 0x00018208,
1347         0x98f8, 0xffffffff, 0x12011003
1348 };
1349
1350 static const u32 hawaii_golden_registers[] =
1351 {
1352         0x3354, 0x00000333, 0x00000333,
1353         0x9a10, 0x00010000, 0x00058208,
1354         0x9830, 0xffffffff, 0x00000000,
1355         0x9834, 0xf00fffff, 0x00000400,
1356         0x9838, 0x0002021c, 0x00020200,
1357         0xc78, 0x00000080, 0x00000000,
1358         0x5bb0, 0x000000f0, 0x00000070,
1359         0x5bc0, 0xf0311fff, 0x80300000,
1360         0x350c, 0x00810000, 0x408af000,
1361         0x7030, 0x31000111, 0x00000011,
1362         0x2f48, 0x73773777, 0x12010001,
1363         0x2120, 0x0000007f, 0x0000001b,
1364         0x21dc, 0x00007fb6, 0x00002191,
1365         0x3628, 0x0000003f, 0x0000000a,
1366         0x362c, 0x0000003f, 0x0000000a,
1367         0x2ae4, 0x00073ffe, 0x000022a2,
1368         0x240c, 0x000007ff, 0x00000000,
1369         0x8bf0, 0x00002001, 0x00000001,
1370         0x8b24, 0xffffffff, 0x00ffffff,
1371         0x30a04, 0x0000ff0f, 0x00000000,
1372         0x28a4c, 0x07ffffff, 0x06000000,
1373         0x3e78, 0x00000001, 0x00000002,
1374         0xc768, 0x00000008, 0x00000008,
1375         0xc770, 0x00000f00, 0x00000800,
1376         0xc774, 0x00000f00, 0x00000800,
1377         0xc798, 0x00ffffff, 0x00ff7fbf,
1378         0xc79c, 0x00ffffff, 0x00ff7faf,
1379         0x8c00, 0x000000ff, 0x00000800,
1380         0xe40, 0x00001fff, 0x00001fff,
1381         0x9060, 0x0000007f, 0x00000020,
1382         0x9508, 0x00010000, 0x00010000,
1383         0xae00, 0x00100000, 0x000ff07c,
1384         0xac14, 0x000003ff, 0x0000000f,
1385         0xac10, 0xffffffff, 0x7564fdec,
1386         0xac0c, 0xffffffff, 0x3120b9a8,
1387         0xac08, 0x20000000, 0x0f9c0000
1388 };
1389
1390 static const u32 hawaii_mgcg_cgcg_init[] =
1391 {
1392         0xc420, 0xffffffff, 0xfffffffd,
1393         0x30800, 0xffffffff, 0xe0000000,
1394         0x3c2a0, 0xffffffff, 0x00000100,
1395         0x3c208, 0xffffffff, 0x00000100,
1396         0x3c2c0, 0xffffffff, 0x00000100,
1397         0x3c2c8, 0xffffffff, 0x00000100,
1398         0x3c2c4, 0xffffffff, 0x00000100,
1399         0x55e4, 0xffffffff, 0x00200100,
1400         0x3c280, 0xffffffff, 0x00000100,
1401         0x3c214, 0xffffffff, 0x06000100,
1402         0x3c220, 0xffffffff, 0x00000100,
1403         0x3c218, 0xffffffff, 0x06000100,
1404         0x3c204, 0xffffffff, 0x00000100,
1405         0x3c2e0, 0xffffffff, 0x00000100,
1406         0x3c224, 0xffffffff, 0x00000100,
1407         0x3c200, 0xffffffff, 0x00000100,
1408         0x3c230, 0xffffffff, 0x00000100,
1409         0x3c234, 0xffffffff, 0x00000100,
1410         0x3c250, 0xffffffff, 0x00000100,
1411         0x3c254, 0xffffffff, 0x00000100,
1412         0x3c258, 0xffffffff, 0x00000100,
1413         0x3c25c, 0xffffffff, 0x00000100,
1414         0x3c260, 0xffffffff, 0x00000100,
1415         0x3c27c, 0xffffffff, 0x00000100,
1416         0x3c278, 0xffffffff, 0x00000100,
1417         0x3c210, 0xffffffff, 0x06000100,
1418         0x3c290, 0xffffffff, 0x00000100,
1419         0x3c274, 0xffffffff, 0x00000100,
1420         0x3c2b4, 0xffffffff, 0x00000100,
1421         0x3c2b0, 0xffffffff, 0x00000100,
1422         0x3c270, 0xffffffff, 0x00000100,
1423         0x30800, 0xffffffff, 0xe0000000,
1424         0x3c020, 0xffffffff, 0x00010000,
1425         0x3c024, 0xffffffff, 0x00030002,
1426         0x3c028, 0xffffffff, 0x00040007,
1427         0x3c02c, 0xffffffff, 0x00060005,
1428         0x3c030, 0xffffffff, 0x00090008,
1429         0x3c034, 0xffffffff, 0x00010000,
1430         0x3c038, 0xffffffff, 0x00030002,
1431         0x3c03c, 0xffffffff, 0x00040007,
1432         0x3c040, 0xffffffff, 0x00060005,
1433         0x3c044, 0xffffffff, 0x00090008,
1434         0x3c048, 0xffffffff, 0x00010000,
1435         0x3c04c, 0xffffffff, 0x00030002,
1436         0x3c050, 0xffffffff, 0x00040007,
1437         0x3c054, 0xffffffff, 0x00060005,
1438         0x3c058, 0xffffffff, 0x00090008,
1439         0x3c05c, 0xffffffff, 0x00010000,
1440         0x3c060, 0xffffffff, 0x00030002,
1441         0x3c064, 0xffffffff, 0x00040007,
1442         0x3c068, 0xffffffff, 0x00060005,
1443         0x3c06c, 0xffffffff, 0x00090008,
1444         0x3c070, 0xffffffff, 0x00010000,
1445         0x3c074, 0xffffffff, 0x00030002,
1446         0x3c078, 0xffffffff, 0x00040007,
1447         0x3c07c, 0xffffffff, 0x00060005,
1448         0x3c080, 0xffffffff, 0x00090008,
1449         0x3c084, 0xffffffff, 0x00010000,
1450         0x3c088, 0xffffffff, 0x00030002,
1451         0x3c08c, 0xffffffff, 0x00040007,
1452         0x3c090, 0xffffffff, 0x00060005,
1453         0x3c094, 0xffffffff, 0x00090008,
1454         0x3c098, 0xffffffff, 0x00010000,
1455         0x3c09c, 0xffffffff, 0x00030002,
1456         0x3c0a0, 0xffffffff, 0x00040007,
1457         0x3c0a4, 0xffffffff, 0x00060005,
1458         0x3c0a8, 0xffffffff, 0x00090008,
1459         0x3c0ac, 0xffffffff, 0x00010000,
1460         0x3c0b0, 0xffffffff, 0x00030002,
1461         0x3c0b4, 0xffffffff, 0x00040007,
1462         0x3c0b8, 0xffffffff, 0x00060005,
1463         0x3c0bc, 0xffffffff, 0x00090008,
1464         0x3c0c0, 0xffffffff, 0x00010000,
1465         0x3c0c4, 0xffffffff, 0x00030002,
1466         0x3c0c8, 0xffffffff, 0x00040007,
1467         0x3c0cc, 0xffffffff, 0x00060005,
1468         0x3c0d0, 0xffffffff, 0x00090008,
1469         0x3c0d4, 0xffffffff, 0x00010000,
1470         0x3c0d8, 0xffffffff, 0x00030002,
1471         0x3c0dc, 0xffffffff, 0x00040007,
1472         0x3c0e0, 0xffffffff, 0x00060005,
1473         0x3c0e4, 0xffffffff, 0x00090008,
1474         0x3c0e8, 0xffffffff, 0x00010000,
1475         0x3c0ec, 0xffffffff, 0x00030002,
1476         0x3c0f0, 0xffffffff, 0x00040007,
1477         0x3c0f4, 0xffffffff, 0x00060005,
1478         0x3c0f8, 0xffffffff, 0x00090008,
1479         0xc318, 0xffffffff, 0x00020200,
1480         0x3350, 0xffffffff, 0x00000200,
1481         0x15c0, 0xffffffff, 0x00000400,
1482         0x55e8, 0xffffffff, 0x00000000,
1483         0x2f50, 0xffffffff, 0x00000902,
1484         0x3c000, 0xffffffff, 0x96940200,
1485         0x8708, 0xffffffff, 0x00900100,
1486         0xc424, 0xffffffff, 0x0020003f,
1487         0x38, 0xffffffff, 0x0140001c,
1488         0x3c, 0x000f0000, 0x000f0000,
1489         0x220, 0xffffffff, 0xc060000c,
1490         0x224, 0xc0000fff, 0x00000100,
1491         0xf90, 0xffffffff, 0x00000100,
1492         0xf98, 0x00000101, 0x00000000,
1493         0x20a8, 0xffffffff, 0x00000104,
1494         0x55e4, 0xff000fff, 0x00000100,
1495         0x30cc, 0xc0000fff, 0x00000104,
1496         0xc1e4, 0x00000001, 0x00000001,
1497         0xd00c, 0xff000ff0, 0x00000100,
1498         0xd80c, 0xff000ff0, 0x00000100
1499 };
1500
1501 static const u32 godavari_golden_registers[] =
1502 {
1503         0x55e4, 0xff607fff, 0xfc000100,
1504         0x6ed8, 0x00010101, 0x00010000,
1505         0x9830, 0xffffffff, 0x00000000,
1506         0x98302, 0xf00fffff, 0x00000400,
1507         0x6130, 0xffffffff, 0x00010000,
1508         0x5bb0, 0x000000f0, 0x00000070,
1509         0x5bc0, 0xf0311fff, 0x80300000,
1510         0x98f8, 0x73773777, 0x12010001,
1511         0x98fc, 0xffffffff, 0x00000010,
1512         0x8030, 0x00001f0f, 0x0000100a,
1513         0x2f48, 0x73773777, 0x12010001,
1514         0x2408, 0x000fffff, 0x000c007f,
1515         0x8a14, 0xf000003f, 0x00000007,
1516         0x8b24, 0xffffffff, 0x00ff0fff,
1517         0x30a04, 0x0000ff0f, 0x00000000,
1518         0x28a4c, 0x07ffffff, 0x06000000,
1519         0x4d8, 0x00000fff, 0x00000100,
1520         0xd014, 0x00010000, 0x00810001,
1521         0xd814, 0x00010000, 0x00810001,
1522         0x3e78, 0x00000001, 0x00000002,
1523         0xc768, 0x00000008, 0x00000008,
1524         0xc770, 0x00000f00, 0x00000800,
1525         0xc774, 0x00000f00, 0x00000800,
1526         0xc798, 0x00ffffff, 0x00ff7fbf,
1527         0xc79c, 0x00ffffff, 0x00ff7faf,
1528         0x8c00, 0x000000ff, 0x00000001,
1529         0x214f8, 0x01ff01ff, 0x00000002,
1530         0x21498, 0x007ff800, 0x00200000,
1531         0x2015c, 0xffffffff, 0x00000f40,
1532         0x88c4, 0x001f3ae3, 0x00000082,
1533         0x88d4, 0x0000001f, 0x00000010,
1534         0x30934, 0xffffffff, 0x00000000
1535 };
1536
1537
1538 static void cik_init_golden_registers(struct radeon_device *rdev)
1539 {
1540         switch (rdev->family) {
1541         case CHIP_BONAIRE:
1542                 radeon_program_register_sequence(rdev,
1543                                                  bonaire_mgcg_cgcg_init,
1544                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1545                 radeon_program_register_sequence(rdev,
1546                                                  bonaire_golden_registers,
1547                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1548                 radeon_program_register_sequence(rdev,
1549                                                  bonaire_golden_common_registers,
1550                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1551                 radeon_program_register_sequence(rdev,
1552                                                  bonaire_golden_spm_registers,
1553                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1554                 break;
1555         case CHIP_KABINI:
1556                 radeon_program_register_sequence(rdev,
1557                                                  kalindi_mgcg_cgcg_init,
1558                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1559                 radeon_program_register_sequence(rdev,
1560                                                  kalindi_golden_registers,
1561                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1562                 radeon_program_register_sequence(rdev,
1563                                                  kalindi_golden_common_registers,
1564                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1565                 radeon_program_register_sequence(rdev,
1566                                                  kalindi_golden_spm_registers,
1567                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1568                 break;
1569         case CHIP_MULLINS:
1570                 radeon_program_register_sequence(rdev,
1571                                                  kalindi_mgcg_cgcg_init,
1572                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1573                 radeon_program_register_sequence(rdev,
1574                                                  godavari_golden_registers,
1575                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1576                 radeon_program_register_sequence(rdev,
1577                                                  kalindi_golden_common_registers,
1578                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1579                 radeon_program_register_sequence(rdev,
1580                                                  kalindi_golden_spm_registers,
1581                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1582                 break;
1583         case CHIP_KAVERI:
1584                 radeon_program_register_sequence(rdev,
1585                                                  spectre_mgcg_cgcg_init,
1586                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1587                 radeon_program_register_sequence(rdev,
1588                                                  spectre_golden_registers,
1589                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1590                 radeon_program_register_sequence(rdev,
1591                                                  spectre_golden_common_registers,
1592                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1593                 radeon_program_register_sequence(rdev,
1594                                                  spectre_golden_spm_registers,
1595                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1596                 break;
1597         case CHIP_HAWAII:
1598                 radeon_program_register_sequence(rdev,
1599                                                  hawaii_mgcg_cgcg_init,
1600                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1601                 radeon_program_register_sequence(rdev,
1602                                                  hawaii_golden_registers,
1603                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1604                 radeon_program_register_sequence(rdev,
1605                                                  hawaii_golden_common_registers,
1606                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1607                 radeon_program_register_sequence(rdev,
1608                                                  hawaii_golden_spm_registers,
1609                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1610                 break;
1611         default:
1612                 break;
1613         }
1614 }
1615
1616 /**
1617  * cik_get_xclk - get the xclk
1618  *
1619  * @rdev: radeon_device pointer
1620  *
1621  * Returns the reference clock used by the gfx engine
1622  * (CIK).
1623  */
1624 u32 cik_get_xclk(struct radeon_device *rdev)
1625 {
1626         u32 reference_clock = rdev->clock.spll.reference_freq;
1627
1628         if (rdev->flags & RADEON_IS_IGP) {
1629                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1630                         return reference_clock / 2;
1631         } else {
1632                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1633                         return reference_clock / 4;
1634         }
1635         return reference_clock;
1636 }
1637
1638 /**
1639  * cik_mm_rdoorbell - read a doorbell dword
1640  *
1641  * @rdev: radeon_device pointer
1642  * @index: doorbell index
1643  *
1644  * Returns the value in the doorbell aperture at the
1645  * requested doorbell index (CIK).
1646  */
1647 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1648 {
1649         if (index < rdev->doorbell.num_doorbells) {
1650                 return readl(rdev->doorbell.ptr + index);
1651         } else {
1652                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1653                 return 0;
1654         }
1655 }
1656
1657 /**
1658  * cik_mm_wdoorbell - write a doorbell dword
1659  *
1660  * @rdev: radeon_device pointer
1661  * @index: doorbell index
1662  * @v: value to write
1663  *
1664  * Writes @v to the doorbell aperture at the
1665  * requested doorbell index (CIK).
1666  */
1667 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1668 {
1669         if (index < rdev->doorbell.num_doorbells) {
1670                 writel(v, rdev->doorbell.ptr + index);
1671         } else {
1672                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1673         }
1674 }
1675
1676 #define BONAIRE_IO_MC_REGS_SIZE 36
1677
1678 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1679 {
1680         {0x00000070, 0x04400000},
1681         {0x00000071, 0x80c01803},
1682         {0x00000072, 0x00004004},
1683         {0x00000073, 0x00000100},
1684         {0x00000074, 0x00ff0000},
1685         {0x00000075, 0x34000000},
1686         {0x00000076, 0x08000014},
1687         {0x00000077, 0x00cc08ec},
1688         {0x00000078, 0x00000400},
1689         {0x00000079, 0x00000000},
1690         {0x0000007a, 0x04090000},
1691         {0x0000007c, 0x00000000},
1692         {0x0000007e, 0x4408a8e8},
1693         {0x0000007f, 0x00000304},
1694         {0x00000080, 0x00000000},
1695         {0x00000082, 0x00000001},
1696         {0x00000083, 0x00000002},
1697         {0x00000084, 0xf3e4f400},
1698         {0x00000085, 0x052024e3},
1699         {0x00000087, 0x00000000},
1700         {0x00000088, 0x01000000},
1701         {0x0000008a, 0x1c0a0000},
1702         {0x0000008b, 0xff010000},
1703         {0x0000008d, 0xffffefff},
1704         {0x0000008e, 0xfff3efff},
1705         {0x0000008f, 0xfff3efbf},
1706         {0x00000092, 0xf7ffffff},
1707         {0x00000093, 0xffffff7f},
1708         {0x00000095, 0x00101101},
1709         {0x00000096, 0x00000fff},
1710         {0x00000097, 0x00116fff},
1711         {0x00000098, 0x60010000},
1712         {0x00000099, 0x10010000},
1713         {0x0000009a, 0x00006000},
1714         {0x0000009b, 0x00001000},
1715         {0x0000009f, 0x00b48000}
1716 };
1717
1718 #define HAWAII_IO_MC_REGS_SIZE 22
1719
1720 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1721 {
1722         {0x0000007d, 0x40000000},
1723         {0x0000007e, 0x40180304},
1724         {0x0000007f, 0x0000ff00},
1725         {0x00000081, 0x00000000},
1726         {0x00000083, 0x00000800},
1727         {0x00000086, 0x00000000},
1728         {0x00000087, 0x00000100},
1729         {0x00000088, 0x00020100},
1730         {0x00000089, 0x00000000},
1731         {0x0000008b, 0x00040000},
1732         {0x0000008c, 0x00000100},
1733         {0x0000008e, 0xff010000},
1734         {0x00000090, 0xffffefff},
1735         {0x00000091, 0xfff3efff},
1736         {0x00000092, 0xfff3efbf},
1737         {0x00000093, 0xf7ffffff},
1738         {0x00000094, 0xffffff7f},
1739         {0x00000095, 0x00000fff},
1740         {0x00000096, 0x00116fff},
1741         {0x00000097, 0x60010000},
1742         {0x00000098, 0x10010000},
1743         {0x0000009f, 0x00c79000}
1744 };
1745
1746
1747 /**
1748  * cik_srbm_select - select specific register instances
1749  *
1750  * @rdev: radeon_device pointer
1751  * @me: selected ME (micro engine)
1752  * @pipe: pipe
1753  * @queue: queue
1754  * @vmid: VMID
1755  *
1756  * Switches the currently active registers instances.  Some
1757  * registers are instanced per VMID, others are instanced per
1758  * me/pipe/queue combination.
1759  */
1760 static void cik_srbm_select(struct radeon_device *rdev,
1761                             u32 me, u32 pipe, u32 queue, u32 vmid)
1762 {
1763         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1764                              MEID(me & 0x3) |
1765                              VMID(vmid & 0xf) |
1766                              QUEUEID(queue & 0x7));
1767         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1768 }
1769
1770 /* ucode loading */
1771 /**
1772  * ci_mc_load_microcode - load MC ucode into the hw
1773  *
1774  * @rdev: radeon_device pointer
1775  *
1776  * Load the GDDR MC ucode into the hw (CIK).
1777  * Returns 0 on success, error on failure.
1778  */
1779 int ci_mc_load_microcode(struct radeon_device *rdev)
1780 {
1781         const __be32 *fw_data = NULL;
1782         const __le32 *new_fw_data = NULL;
1783         u32 running, tmp;
1784         u32 *io_mc_regs = NULL;
1785         const __le32 *new_io_mc_regs = NULL;
1786         int i, regs_size, ucode_size;
1787
1788         if (!rdev->mc_fw)
1789                 return -EINVAL;
1790
1791         if (rdev->new_fw) {
1792                 const struct mc_firmware_header_v1_0 *hdr =
1793                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1794
1795                 radeon_ucode_print_mc_hdr(&hdr->header);
1796
1797                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1798                 new_io_mc_regs = (const __le32 *)
1799                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1800                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1801                 new_fw_data = (const __le32 *)
1802                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1803         } else {
1804                 ucode_size = rdev->mc_fw->size / 4;
1805
1806                 switch (rdev->family) {
1807                 case CHIP_BONAIRE:
1808                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1809                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1810                         break;
1811                 case CHIP_HAWAII:
1812                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1813                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1814                         break;
1815                 default:
1816                         return -EINVAL;
1817                 }
1818                 fw_data = (const __be32 *)rdev->mc_fw->data;
1819         }
1820
1821         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1822
1823         if (running == 0) {
1824                 /* reset the engine and set to writable */
1825                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1826                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1827
1828                 /* load mc io regs */
1829                 for (i = 0; i < regs_size; i++) {
1830                         if (rdev->new_fw) {
1831                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1832                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1833                         } else {
1834                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1835                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1836                         }
1837                 }
1838
1839                 tmp = RREG32(MC_SEQ_MISC0);
1840                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1841                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1842                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1843                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1844                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1845                 }
1846
1847                 /* load the MC ucode */
1848                 for (i = 0; i < ucode_size; i++) {
1849                         if (rdev->new_fw)
1850                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1851                         else
1852                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1853                 }
1854
1855                 /* put the engine back into the active state */
1856                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1857                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1858                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1859
1860                 /* wait for training to complete */
1861                 for (i = 0; i < rdev->usec_timeout; i++) {
1862                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1863                                 break;
1864                         udelay(1);
1865                 }
1866                 for (i = 0; i < rdev->usec_timeout; i++) {
1867                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1868                                 break;
1869                         udelay(1);
1870                 }
1871         }
1872
1873         return 0;
1874 }
1875
1876 /**
1877  * cik_init_microcode - load ucode images from disk
1878  *
1879  * @rdev: radeon_device pointer
1880  *
1881  * Use the firmware interface to load the ucode images into
1882  * the driver (not loaded into hw).
1883  * Returns 0 on success, error on failure.
1884  */
1885 static int cik_init_microcode(struct radeon_device *rdev)
1886 {
1887         const char *chip_name;
1888         const char *new_chip_name;
1889         size_t pfp_req_size, me_req_size, ce_req_size,
1890                 mec_req_size, rlc_req_size, mc_req_size = 0,
1891                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1892         char fw_name[30];
1893         int new_fw = 0;
1894         int err;
1895         int num_fw;
1896         bool new_smc = false;
1897
1898         DRM_DEBUG("\n");
1899
1900         switch (rdev->family) {
1901         case CHIP_BONAIRE:
1902                 chip_name = "BONAIRE";
1903                 if ((rdev->pdev->revision == 0x80) ||
1904                     (rdev->pdev->revision == 0x81) ||
1905                     (rdev->pdev->device == 0x665f))
1906                         new_smc = true;
1907                 new_chip_name = "bonaire";
1908                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1909                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1910                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1911                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1912                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1913                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1914                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1915                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1916                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1917                 num_fw = 8;
1918                 break;
1919         case CHIP_HAWAII:
1920                 chip_name = "HAWAII";
1921                 if (rdev->pdev->revision == 0x80)
1922                         new_smc = true;
1923                 new_chip_name = "hawaii";
1924                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1925                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1926                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1927                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1928                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1929                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1930                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1931                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1932                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1933                 num_fw = 8;
1934                 break;
1935         case CHIP_KAVERI:
1936                 chip_name = "KAVERI";
1937                 new_chip_name = "kaveri";
1938                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1939                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1940                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1941                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1942                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1943                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1944                 num_fw = 7;
1945                 break;
1946         case CHIP_KABINI:
1947                 chip_name = "KABINI";
1948                 new_chip_name = "kabini";
1949                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1950                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1951                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1952                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1953                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1954                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1955                 num_fw = 6;
1956                 break;
1957         case CHIP_MULLINS:
1958                 chip_name = "MULLINS";
1959                 new_chip_name = "mullins";
1960                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1961                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1962                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1963                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1964                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1965                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1966                 num_fw = 6;
1967                 break;
1968         default: BUG();
1969         }
1970
1971         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1972
1973         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1974         err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1975         if (err) {
1976                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1977                 err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1978                 if (err)
1979                         goto out;
1980                 if (rdev->pfp_fw->size != pfp_req_size) {
1981                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
1982                                rdev->pfp_fw->size, fw_name);
1983                         err = -EINVAL;
1984                         goto out;
1985                 }
1986         } else {
1987                 err = radeon_ucode_validate(rdev->pfp_fw);
1988                 if (err) {
1989                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
1990                                fw_name);
1991                         goto out;
1992                 } else {
1993                         new_fw++;
1994                 }
1995         }
1996
1997         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1998         err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
1999         if (err) {
2000                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2001                 err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
2002                 if (err)
2003                         goto out;
2004                 if (rdev->me_fw->size != me_req_size) {
2005                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2006                                rdev->me_fw->size, fw_name);
2007                         err = -EINVAL;
2008                 }
2009         } else {
2010                 err = radeon_ucode_validate(rdev->me_fw);
2011                 if (err) {
2012                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2013                                fw_name);
2014                         goto out;
2015                 } else {
2016                         new_fw++;
2017                 }
2018         }
2019
2020         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2021         err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2022         if (err) {
2023                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2024                 err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2025                 if (err)
2026                         goto out;
2027                 if (rdev->ce_fw->size != ce_req_size) {
2028                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2029                                rdev->ce_fw->size, fw_name);
2030                         err = -EINVAL;
2031                 }
2032         } else {
2033                 err = radeon_ucode_validate(rdev->ce_fw);
2034                 if (err) {
2035                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2036                                fw_name);
2037                         goto out;
2038                 } else {
2039                         new_fw++;
2040                 }
2041         }
2042
2043         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2044         err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2045         if (err) {
2046                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2047                 err = reject_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2048                 if (err)
2049                         goto out;
2050                 if (rdev->mec_fw->size != mec_req_size) {
2051                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2052                                rdev->mec_fw->size, fw_name);
2053                         err = -EINVAL;
2054                 }
2055         } else {
2056                 err = radeon_ucode_validate(rdev->mec_fw);
2057                 if (err) {
2058                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2059                                fw_name);
2060                         goto out;
2061                 } else {
2062                         new_fw++;
2063                 }
2064         }
2065
2066         if (rdev->family == CHIP_KAVERI) {
2067                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2068                 err = reject_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2069                 if (err) {
2070                         goto out;
2071                 } else {
2072                         err = radeon_ucode_validate(rdev->mec2_fw);
2073                         if (err) {
2074                                 goto out;
2075                         } else {
2076                                 new_fw++;
2077                         }
2078                 }
2079         }
2080
2081         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2082         err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2083         if (err) {
2084                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2085                 err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2086                 if (err)
2087                         goto out;
2088                 if (rdev->rlc_fw->size != rlc_req_size) {
2089                         pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2090                                rdev->rlc_fw->size, fw_name);
2091                         err = -EINVAL;
2092                 }
2093         } else {
2094                 err = radeon_ucode_validate(rdev->rlc_fw);
2095                 if (err) {
2096                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2097                                fw_name);
2098                         goto out;
2099                 } else {
2100                         new_fw++;
2101                 }
2102         }
2103
2104         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2105         err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2106         if (err) {
2107                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2108                 err = reject_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2109                 if (err)
2110                         goto out;
2111                 if (rdev->sdma_fw->size != sdma_req_size) {
2112                         pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2113                                rdev->sdma_fw->size, fw_name);
2114                         err = -EINVAL;
2115                 }
2116         } else {
2117                 err = radeon_ucode_validate(rdev->sdma_fw);
2118                 if (err) {
2119                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2120                                fw_name);
2121                         goto out;
2122                 } else {
2123                         new_fw++;
2124                 }
2125         }
2126
2127         /* No SMC, MC ucode on APUs */
2128         if (!(rdev->flags & RADEON_IS_IGP)) {
2129                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2130                 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2131                 if (err) {
2132                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2133                         err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2134                         if (err) {
2135                                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2136                                 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2137                                 if (err)
2138                                         goto out;
2139                         }
2140                         if ((rdev->mc_fw->size != mc_req_size) &&
2141                             (rdev->mc_fw->size != mc2_req_size)){
2142                                 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2143                                        rdev->mc_fw->size, fw_name);
2144                                 err = -EINVAL;
2145                         }
2146                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2147                 } else {
2148                         err = radeon_ucode_validate(rdev->mc_fw);
2149                         if (err) {
2150                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2151                                        fw_name);
2152                                 goto out;
2153                         } else {
2154                                 new_fw++;
2155                         }
2156                 }
2157
2158                 if (new_smc)
2159                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2160                 else
2161                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
2162                 err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2163                 if (err) {
2164                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
2165                         err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2166                         if (err) {
2167                                 pr_err("smc: error loading firmware \"%s\"\n",
2168                                        fw_name);
2169                                 release_firmware(rdev->smc_fw);
2170                                 rdev->smc_fw = NULL;
2171                                 err = 0;
2172                         } else if (rdev->smc_fw->size != smc_req_size) {
2173                                 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2174                                        rdev->smc_fw->size, fw_name);
2175                                 err = -EINVAL;
2176                         }
2177                 } else {
2178                         err = radeon_ucode_validate(rdev->smc_fw);
2179                         if (err) {
2180                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2181                                        fw_name);
2182                                 goto out;
2183                         } else {
2184                                 new_fw++;
2185                         }
2186                 }
2187         }
2188
2189         if (new_fw == 0) {
2190                 rdev->new_fw = false;
2191         } else if (new_fw < num_fw) {
2192                 pr_err("ci_fw: mixing new and old firmware!\n");
2193                 err = -EINVAL;
2194         } else {
2195                 rdev->new_fw = true;
2196         }
2197
2198 out:
2199         if (err) {
2200                 if (err != -EINVAL)
2201                         pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2202                                fw_name);
2203                 release_firmware(rdev->pfp_fw);
2204                 rdev->pfp_fw = NULL;
2205                 release_firmware(rdev->me_fw);
2206                 rdev->me_fw = NULL;
2207                 release_firmware(rdev->ce_fw);
2208                 rdev->ce_fw = NULL;
2209                 release_firmware(rdev->mec_fw);
2210                 rdev->mec_fw = NULL;
2211                 release_firmware(rdev->mec2_fw);
2212                 rdev->mec2_fw = NULL;
2213                 release_firmware(rdev->rlc_fw);
2214                 rdev->rlc_fw = NULL;
2215                 release_firmware(rdev->sdma_fw);
2216                 rdev->sdma_fw = NULL;
2217                 release_firmware(rdev->mc_fw);
2218                 rdev->mc_fw = NULL;
2219                 release_firmware(rdev->smc_fw);
2220                 rdev->smc_fw = NULL;
2221         }
2222         return err;
2223 }
2224
2225 /*
2226  * Core functions
2227  */
2228 /**
2229  * cik_tiling_mode_table_init - init the hw tiling table
2230  *
2231  * @rdev: radeon_device pointer
2232  *
2233  * Starting with SI, the tiling setup is done globally in a
2234  * set of 32 tiling modes.  Rather than selecting each set of
2235  * parameters per surface as on older asics, we just select
2236  * which index in the tiling table we want to use, and the
2237  * surface uses those parameters (CIK).
2238  */
2239 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2240 {
2241         u32 *tile = rdev->config.cik.tile_mode_array;
2242         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2243         const u32 num_tile_mode_states =
2244                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2245         const u32 num_secondary_tile_mode_states =
2246                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2247         u32 reg_offset, split_equal_to_row_size;
2248         u32 num_pipe_configs;
2249         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2250                 rdev->config.cik.max_shader_engines;
2251
2252         switch (rdev->config.cik.mem_row_size_in_kb) {
2253         case 1:
2254                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2255                 break;
2256         case 2:
2257         default:
2258                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2259                 break;
2260         case 4:
2261                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2262                 break;
2263         }
2264
2265         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2266         if (num_pipe_configs > 8)
2267                 num_pipe_configs = 16;
2268
2269         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2270                 tile[reg_offset] = 0;
2271         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2272                 macrotile[reg_offset] = 0;
2273
2274         switch(num_pipe_configs) {
2275         case 16:
2276                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2277                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2278                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2279                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2280                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2281                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2282                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2283                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2284                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2285                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2286                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2287                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2288                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2289                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2290                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2291                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2292                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2293                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2294                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2295                            TILE_SPLIT(split_equal_to_row_size));
2296                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2297                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2298                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2299                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2300                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2301                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2302                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2303                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2304                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2305                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2306                            TILE_SPLIT(split_equal_to_row_size));
2307                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2308                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2309                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2310                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2312                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2313                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2314                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2316                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2317                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2318                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2319                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2321                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2322                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2324                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2325                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2327                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2329                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2331                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2332                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2333                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2334                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2335                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2336                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2337                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2338                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2339                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2340                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2342                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2344                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2346                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2347                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2348                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2349                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2350                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2351                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2352                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2354
2355                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2357                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358                            NUM_BANKS(ADDR_SURF_16_BANK));
2359                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2360                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2361                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2362                            NUM_BANKS(ADDR_SURF_16_BANK));
2363                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2364                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2365                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2366                            NUM_BANKS(ADDR_SURF_16_BANK));
2367                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2370                            NUM_BANKS(ADDR_SURF_16_BANK));
2371                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2373                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2374                            NUM_BANKS(ADDR_SURF_8_BANK));
2375                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2377                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2378                            NUM_BANKS(ADDR_SURF_4_BANK));
2379                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382                            NUM_BANKS(ADDR_SURF_2_BANK));
2383                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2385                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2386                            NUM_BANKS(ADDR_SURF_16_BANK));
2387                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2389                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2390                            NUM_BANKS(ADDR_SURF_16_BANK));
2391                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2393                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2394                             NUM_BANKS(ADDR_SURF_16_BANK));
2395                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2396                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2397                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2398                             NUM_BANKS(ADDR_SURF_8_BANK));
2399                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2400                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2401                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2402                             NUM_BANKS(ADDR_SURF_4_BANK));
2403                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2406                             NUM_BANKS(ADDR_SURF_2_BANK));
2407                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2409                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2410                             NUM_BANKS(ADDR_SURF_2_BANK));
2411
2412                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2413                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2414                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2415                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2416                 break;
2417
2418         case 8:
2419                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2420                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2421                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2422                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2423                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2425                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2426                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2427                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2428                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2429                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2430                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2431                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2432                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2433                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2434                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2435                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2436                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2437                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2438                            TILE_SPLIT(split_equal_to_row_size));
2439                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2440                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2441                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2442                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2444                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2445                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2446                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2447                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2448                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2449                            TILE_SPLIT(split_equal_to_row_size));
2450                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2451                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2452                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2453                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2454                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2455                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2456                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2457                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2458                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2461                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2462                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2464                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2465                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2466                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2467                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2468                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2470                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2471                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2472                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2474                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2475                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2476                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2477                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2478                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2479                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2480                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2482                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2483                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2484                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2485                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2486                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2487                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2488                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2489                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2490                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2491                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2492                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2493                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2494                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2495                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2496                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2497
2498                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2500                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2501                                 NUM_BANKS(ADDR_SURF_16_BANK));
2502                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2504                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2505                                 NUM_BANKS(ADDR_SURF_16_BANK));
2506                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2508                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2509                                 NUM_BANKS(ADDR_SURF_16_BANK));
2510                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2513                                 NUM_BANKS(ADDR_SURF_16_BANK));
2514                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2516                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2517                                 NUM_BANKS(ADDR_SURF_8_BANK));
2518                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2520                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2521                                 NUM_BANKS(ADDR_SURF_4_BANK));
2522                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2524                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2525                                 NUM_BANKS(ADDR_SURF_2_BANK));
2526                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2528                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2529                                 NUM_BANKS(ADDR_SURF_16_BANK));
2530                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2532                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2533                                 NUM_BANKS(ADDR_SURF_16_BANK));
2534                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2536                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2537                                 NUM_BANKS(ADDR_SURF_16_BANK));
2538                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2541                                 NUM_BANKS(ADDR_SURF_16_BANK));
2542                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2543                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2544                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2545                                 NUM_BANKS(ADDR_SURF_8_BANK));
2546                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2548                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2549                                 NUM_BANKS(ADDR_SURF_4_BANK));
2550                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2553                                 NUM_BANKS(ADDR_SURF_2_BANK));
2554
2555                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2556                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2557                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2558                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2559                 break;
2560
2561         case 4:
2562                 if (num_rbs == 4) {
2563                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2565                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2566                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2567                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2569                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2570                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2571                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2572                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2573                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2574                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2575                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2577                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2578                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2579                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2581                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2582                            TILE_SPLIT(split_equal_to_row_size));
2583                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2584                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2585                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2586                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2587                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2588                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2589                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2590                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2591                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2592                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2593                            TILE_SPLIT(split_equal_to_row_size));
2594                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2595                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2596                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2597                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2598                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2599                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2600                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2601                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2602                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2603                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2604                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2605                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2606                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2607                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2608                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2609                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2610                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2611                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2612                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2613                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2614                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2616                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2617                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2618                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2619                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2620                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2621                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2623                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2624                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2625                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2626                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2627                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2628                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2629                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2630                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2631                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2632                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2633                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2635                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2636                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2637                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2638                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2639                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2640                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2641
2642                 } else if (num_rbs < 4) {
2643                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2645                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2646                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2647                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2648                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2649                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2650                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2651                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2653                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2654                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2655                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2657                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2658                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2659                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2661                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2662                            TILE_SPLIT(split_equal_to_row_size));
2663                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2664                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2665                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2666                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2667                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2668                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2669                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2670                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2671                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2672                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2673                            TILE_SPLIT(split_equal_to_row_size));
2674                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2675                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2676                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2677                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2678                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2679                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2680                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2681                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2682                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2683                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2684                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2685                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2686                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2687                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2688                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2689                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2690                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2691                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2692                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2693                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2694                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2696                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2697                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2698                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2699                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2700                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2701                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2702                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2703                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2704                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2705                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2706                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2707                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2708                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2709                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2710                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2711                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2712                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2713                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2714                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2715                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2716                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2717                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2718                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2719                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2720                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2721                 }
2722
2723                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2725                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2726                                 NUM_BANKS(ADDR_SURF_16_BANK));
2727                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2729                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2730                                 NUM_BANKS(ADDR_SURF_16_BANK));
2731                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2733                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2734                                 NUM_BANKS(ADDR_SURF_16_BANK));
2735                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2737                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2738                                 NUM_BANKS(ADDR_SURF_16_BANK));
2739                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2741                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2742                                 NUM_BANKS(ADDR_SURF_16_BANK));
2743                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2745                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2746                                 NUM_BANKS(ADDR_SURF_8_BANK));
2747                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2749                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2750                                 NUM_BANKS(ADDR_SURF_4_BANK));
2751                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2752                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2753                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2754                                 NUM_BANKS(ADDR_SURF_16_BANK));
2755                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2756                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2757                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2758                                 NUM_BANKS(ADDR_SURF_16_BANK));
2759                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2761                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2762                                 NUM_BANKS(ADDR_SURF_16_BANK));
2763                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2765                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2766                                 NUM_BANKS(ADDR_SURF_16_BANK));
2767                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2770                                 NUM_BANKS(ADDR_SURF_16_BANK));
2771                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2773                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2774                                 NUM_BANKS(ADDR_SURF_8_BANK));
2775                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2776                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2777                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2778                                 NUM_BANKS(ADDR_SURF_4_BANK));
2779
2780                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2781                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2782                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2783                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2784                 break;
2785
2786         case 2:
2787                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2788                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2789                            PIPE_CONFIG(ADDR_SURF_P2) |
2790                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2791                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2792                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2793                            PIPE_CONFIG(ADDR_SURF_P2) |
2794                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2795                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2797                            PIPE_CONFIG(ADDR_SURF_P2) |
2798                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2799                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2801                            PIPE_CONFIG(ADDR_SURF_P2) |
2802                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2803                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2805                            PIPE_CONFIG(ADDR_SURF_P2) |
2806                            TILE_SPLIT(split_equal_to_row_size));
2807                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2808                            PIPE_CONFIG(ADDR_SURF_P2) |
2809                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2810                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2811                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2812                            PIPE_CONFIG(ADDR_SURF_P2) |
2813                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2814                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2815                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2816                            PIPE_CONFIG(ADDR_SURF_P2) |
2817                            TILE_SPLIT(split_equal_to_row_size));
2818                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2819                            PIPE_CONFIG(ADDR_SURF_P2);
2820                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2821                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2822                            PIPE_CONFIG(ADDR_SURF_P2));
2823                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2824                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2825                             PIPE_CONFIG(ADDR_SURF_P2) |
2826                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2827                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2828                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2829                             PIPE_CONFIG(ADDR_SURF_P2) |
2830                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2831                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2832                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2833                             PIPE_CONFIG(ADDR_SURF_P2) |
2834                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2836                             PIPE_CONFIG(ADDR_SURF_P2) |
2837                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2838                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2840                             PIPE_CONFIG(ADDR_SURF_P2) |
2841                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2842                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2843                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2844                             PIPE_CONFIG(ADDR_SURF_P2) |
2845                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2846                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2847                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2848                             PIPE_CONFIG(ADDR_SURF_P2) |
2849                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2850                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2851                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2852                             PIPE_CONFIG(ADDR_SURF_P2));
2853                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2854                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2855                             PIPE_CONFIG(ADDR_SURF_P2) |
2856                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2857                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2858                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2859                             PIPE_CONFIG(ADDR_SURF_P2) |
2860                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2861                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2862                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2863                             PIPE_CONFIG(ADDR_SURF_P2) |
2864                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2865
2866                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2867                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2869                                 NUM_BANKS(ADDR_SURF_16_BANK));
2870                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2871                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2872                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2873                                 NUM_BANKS(ADDR_SURF_16_BANK));
2874                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2875                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2876                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2877                                 NUM_BANKS(ADDR_SURF_16_BANK));
2878                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2879                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2880                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2881                                 NUM_BANKS(ADDR_SURF_16_BANK));
2882                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2884                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2885                                 NUM_BANKS(ADDR_SURF_16_BANK));
2886                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2887                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2888                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2889                                 NUM_BANKS(ADDR_SURF_16_BANK));
2890                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2891                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2892                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2893                                 NUM_BANKS(ADDR_SURF_8_BANK));
2894                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2895                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2896                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2897                                 NUM_BANKS(ADDR_SURF_16_BANK));
2898                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2899                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2900                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2901                                 NUM_BANKS(ADDR_SURF_16_BANK));
2902                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2903                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2904                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2905                                 NUM_BANKS(ADDR_SURF_16_BANK));
2906                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2907                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2908                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2909                                 NUM_BANKS(ADDR_SURF_16_BANK));
2910                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2912                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2913                                 NUM_BANKS(ADDR_SURF_16_BANK));
2914                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2915                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2916                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917                                 NUM_BANKS(ADDR_SURF_16_BANK));
2918                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2920                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2921                                 NUM_BANKS(ADDR_SURF_8_BANK));
2922
2923                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2924                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2925                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2926                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2927                 break;
2928
2929         default:
2930                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2931         }
2932 }
2933
2934 /**
2935  * cik_select_se_sh - select which SE, SH to address
2936  *
2937  * @rdev: radeon_device pointer
2938  * @se_num: shader engine to address
2939  * @sh_num: sh block to address
2940  *
2941  * Select which SE, SH combinations to address. Certain
2942  * registers are instanced per SE or SH.  0xffffffff means
2943  * broadcast to all SEs or SHs (CIK).
2944  */
2945 static void cik_select_se_sh(struct radeon_device *rdev,
2946                              u32 se_num, u32 sh_num)
2947 {
2948         u32 data = INSTANCE_BROADCAST_WRITES;
2949
2950         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2951                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2952         else if (se_num == 0xffffffff)
2953                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2954         else if (sh_num == 0xffffffff)
2955                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2956         else
2957                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2958         WREG32(GRBM_GFX_INDEX, data);
2959 }
2960
2961 /**
2962  * cik_create_bitmask - create a bitmask
2963  *
2964  * @bit_width: length of the mask
2965  *
2966  * create a variable length bit mask (CIK).
2967  * Returns the bitmask.
2968  */
2969 static u32 cik_create_bitmask(u32 bit_width)
2970 {
2971         u32 i, mask = 0;
2972
2973         for (i = 0; i < bit_width; i++) {
2974                 mask <<= 1;
2975                 mask |= 1;
2976         }
2977         return mask;
2978 }
2979
2980 /**
2981  * cik_get_rb_disabled - computes the mask of disabled RBs
2982  *
2983  * @rdev: radeon_device pointer
2984  * @max_rb_num_per_se: max RBs (render backends) per SE (shader engine) for the asic
2985  * @sh_per_se: number of SH blocks per SE for the asic
2986  *
2987  * Calculates the bitmask of disabled RBs (CIK).
2988  * Returns the disabled RB bitmask.
2989  */
2990 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2991                               u32 max_rb_num_per_se,
2992                               u32 sh_per_se)
2993 {
2994         u32 data, mask;
2995
2996         data = RREG32(CC_RB_BACKEND_DISABLE);
2997         if (data & 1)
2998                 data &= BACKEND_DISABLE_MASK;
2999         else
3000                 data = 0;
3001         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3002
3003         data >>= BACKEND_DISABLE_SHIFT;
3004
3005         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3006
3007         return data & mask;
3008 }
3009
3010 /**
3011  * cik_setup_rb - setup the RBs on the asic
3012  *
3013  * @rdev: radeon_device pointer
3014  * @se_num: number of SEs (shader engines) for the asic
3015  * @sh_per_se: number of SH blocks per SE for the asic
3016  * @max_rb_num_per_se: max RBs (render backends) per SE for the asic
3017  *
3018  * Configures per-SE/SH RB registers (CIK).
3019  */
3020 static void cik_setup_rb(struct radeon_device *rdev,
3021                          u32 se_num, u32 sh_per_se,
3022                          u32 max_rb_num_per_se)
3023 {
3024         int i, j;
3025         u32 data, mask;
3026         u32 disabled_rbs = 0;
3027         u32 enabled_rbs = 0;
3028
3029         for (i = 0; i < se_num; i++) {
3030                 for (j = 0; j < sh_per_se; j++) {
3031                         cik_select_se_sh(rdev, i, j);
3032                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3033                         if (rdev->family == CHIP_HAWAII)
3034                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3035                         else
3036                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3037                 }
3038         }
3039         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3040
3041         mask = 1;
3042         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3043                 if (!(disabled_rbs & mask))
3044                         enabled_rbs |= mask;
3045                 mask <<= 1;
3046         }
3047
3048         rdev->config.cik.backend_enable_mask = enabled_rbs;
3049
3050         for (i = 0; i < se_num; i++) {
3051                 cik_select_se_sh(rdev, i, 0xffffffff);
3052                 data = 0;
3053                 for (j = 0; j < sh_per_se; j++) {
3054                         switch (enabled_rbs & 3) {
3055                         case 0:
3056                                 if (j == 0)
3057                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3058                                 else
3059                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3060                                 break;
3061                         case 1:
3062                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3063                                 break;
3064                         case 2:
3065                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3066                                 break;
3067                         case 3:
3068                         default:
3069                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3070                                 break;
3071                         }
3072                         enabled_rbs >>= 2;
3073                 }
3074                 WREG32(PA_SC_RASTER_CONFIG, data);
3075         }
3076         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3077 }
3078
3079 /**
3080  * cik_gpu_init - setup the 3D engine
3081  *
3082  * @rdev: radeon_device pointer
3083  *
3084  * Configures the 3D engine and tiling configuration
3085  * registers so that the 3D engine is usable.
3086  */
3087 static void cik_gpu_init(struct radeon_device *rdev)
3088 {
3089         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3090         u32 mc_arb_ramcfg;
3091         u32 hdp_host_path_cntl;
3092         u32 tmp;
3093         int i, j;
3094
3095         switch (rdev->family) {
3096         case CHIP_BONAIRE:
3097                 rdev->config.cik.max_shader_engines = 2;
3098                 rdev->config.cik.max_tile_pipes = 4;
3099                 rdev->config.cik.max_cu_per_sh = 7;
3100                 rdev->config.cik.max_sh_per_se = 1;
3101                 rdev->config.cik.max_backends_per_se = 2;
3102                 rdev->config.cik.max_texture_channel_caches = 4;
3103                 rdev->config.cik.max_gprs = 256;
3104                 rdev->config.cik.max_gs_threads = 32;
3105                 rdev->config.cik.max_hw_contexts = 8;
3106
3107                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3108                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3109                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3110                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3111                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3112                 break;
3113         case CHIP_HAWAII:
3114                 rdev->config.cik.max_shader_engines = 4;
3115                 rdev->config.cik.max_tile_pipes = 16;
3116                 rdev->config.cik.max_cu_per_sh = 11;
3117                 rdev->config.cik.max_sh_per_se = 1;
3118                 rdev->config.cik.max_backends_per_se = 4;
3119                 rdev->config.cik.max_texture_channel_caches = 16;
3120                 rdev->config.cik.max_gprs = 256;
3121                 rdev->config.cik.max_gs_threads = 32;
3122                 rdev->config.cik.max_hw_contexts = 8;
3123
3124                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3125                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3126                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3127                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3128                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3129                 break;
3130         case CHIP_KAVERI:
3131                 rdev->config.cik.max_shader_engines = 1;
3132                 rdev->config.cik.max_tile_pipes = 4;
3133                 rdev->config.cik.max_cu_per_sh = 8;
3134                 rdev->config.cik.max_backends_per_se = 2;
3135                 rdev->config.cik.max_sh_per_se = 1;
3136                 rdev->config.cik.max_texture_channel_caches = 4;
3137                 rdev->config.cik.max_gprs = 256;
3138                 rdev->config.cik.max_gs_threads = 16;
3139                 rdev->config.cik.max_hw_contexts = 8;
3140
3141                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3142                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3143                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3144                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3145                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3146                 break;
3147         case CHIP_KABINI:
3148         case CHIP_MULLINS:
3149         default:
3150                 rdev->config.cik.max_shader_engines = 1;
3151                 rdev->config.cik.max_tile_pipes = 2;
3152                 rdev->config.cik.max_cu_per_sh = 2;
3153                 rdev->config.cik.max_sh_per_se = 1;
3154                 rdev->config.cik.max_backends_per_se = 1;
3155                 rdev->config.cik.max_texture_channel_caches = 2;
3156                 rdev->config.cik.max_gprs = 256;
3157                 rdev->config.cik.max_gs_threads = 16;
3158                 rdev->config.cik.max_hw_contexts = 8;
3159
3160                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3161                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3162                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3163                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3164                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3165                 break;
3166         }
3167
3168         /* Initialize HDP */
3169         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3170                 WREG32((0x2c14 + j), 0x00000000);
3171                 WREG32((0x2c18 + j), 0x00000000);
3172                 WREG32((0x2c1c + j), 0x00000000);
3173                 WREG32((0x2c20 + j), 0x00000000);
3174                 WREG32((0x2c24 + j), 0x00000000);
3175         }
3176
3177         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3178         WREG32(SRBM_INT_CNTL, 0x1);
3179         WREG32(SRBM_INT_ACK, 0x1);
3180
3181         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3182
3183         RREG32(MC_SHARED_CHMAP);
3184         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3185
3186         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3187         rdev->config.cik.mem_max_burst_length_bytes = 256;
3188         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3189         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3190         if (rdev->config.cik.mem_row_size_in_kb > 4)
3191                 rdev->config.cik.mem_row_size_in_kb = 4;
3192         /* XXX use MC settings? */
3193         rdev->config.cik.shader_engine_tile_size = 32;
3194         rdev->config.cik.num_gpus = 1;
3195         rdev->config.cik.multi_gpu_tile_size = 64;
3196
3197         /* fix up row size */
3198         gb_addr_config &= ~ROW_SIZE_MASK;
3199         switch (rdev->config.cik.mem_row_size_in_kb) {
3200         case 1:
3201         default:
3202                 gb_addr_config |= ROW_SIZE(0);
3203                 break;
3204         case 2:
3205                 gb_addr_config |= ROW_SIZE(1);
3206                 break;
3207         case 4:
3208                 gb_addr_config |= ROW_SIZE(2);
3209                 break;
3210         }
3211
3212         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3213          * not have bank info, so create a custom tiling dword.
3214          * bits 3:0   num_pipes
3215          * bits 7:4   num_banks
3216          * bits 11:8  group_size
3217          * bits 15:12 row_size
3218          */
3219         rdev->config.cik.tile_config = 0;
3220         switch (rdev->config.cik.num_tile_pipes) {
3221         case 1:
3222                 rdev->config.cik.tile_config |= (0 << 0);
3223                 break;
3224         case 2:
3225                 rdev->config.cik.tile_config |= (1 << 0);
3226                 break;
3227         case 4:
3228                 rdev->config.cik.tile_config |= (2 << 0);
3229                 break;
3230         case 8:
3231         default:
3232                 /* XXX what about 12? */
3233                 rdev->config.cik.tile_config |= (3 << 0);
3234                 break;
3235         }
3236         rdev->config.cik.tile_config |=
3237                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3238         rdev->config.cik.tile_config |=
3239                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3240         rdev->config.cik.tile_config |=
3241                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3242
3243         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3244         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3245         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3246         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3247         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3248         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3249         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3250         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3251
3252         cik_tiling_mode_table_init(rdev);
3253
3254         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3255                      rdev->config.cik.max_sh_per_se,
3256                      rdev->config.cik.max_backends_per_se);
3257
3258         rdev->config.cik.active_cus = 0;
3259         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3260                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3261                         rdev->config.cik.active_cus +=
3262                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3263                 }
3264         }
3265
3266         /* set HW defaults for 3D engine */
3267         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3268
3269         WREG32(SX_DEBUG_1, 0x20);
3270
3271         WREG32(TA_CNTL_AUX, 0x00010000);
3272
3273         tmp = RREG32(SPI_CONFIG_CNTL);
3274         tmp |= 0x03000000;
3275         WREG32(SPI_CONFIG_CNTL, tmp);
3276
3277         WREG32(SQ_CONFIG, 1);
3278
3279         WREG32(DB_DEBUG, 0);
3280
3281         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3282         tmp |= 0x00000400;
3283         WREG32(DB_DEBUG2, tmp);
3284
3285         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3286         tmp |= 0x00020200;
3287         WREG32(DB_DEBUG3, tmp);
3288
3289         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3290         tmp |= 0x00018208;
3291         WREG32(CB_HW_CONTROL, tmp);
3292
3293         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3294
3295         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3296                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3297                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3298                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3299
3300         WREG32(VGT_NUM_INSTANCES, 1);
3301
3302         WREG32(CP_PERFMON_CNTL, 0);
3303
3304         WREG32(SQ_CONFIG, 0);
3305
3306         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3307                                           FORCE_EOV_MAX_REZ_CNT(255)));
3308
3309         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3310                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3311
3312         WREG32(VGT_GS_VERTEX_REUSE, 16);
3313         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3314
3315         tmp = RREG32(HDP_MISC_CNTL);
3316         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3317         WREG32(HDP_MISC_CNTL, tmp);
3318
3319         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3320         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3321
3322         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3323         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3324
3325         udelay(50);
3326 }
3327
3328 /*
3329  * GPU scratch registers helpers function.
3330  */
3331 /**
3332  * cik_scratch_init - setup driver info for CP scratch regs
3333  *
3334  * @rdev: radeon_device pointer
3335  *
3336  * Set up the number and offset of the CP scratch registers.
3337  * NOTE: use of CP scratch registers is a legacy inferface and
3338  * is not used by default on newer asics (r6xx+).  On newer asics,
3339  * memory buffers are used for fences rather than scratch regs.
3340  */
3341 static void cik_scratch_init(struct radeon_device *rdev)
3342 {
3343         int i;
3344
3345         rdev->scratch.num_reg = 7;
3346         rdev->scratch.reg_base = SCRATCH_REG0;
3347         for (i = 0; i < rdev->scratch.num_reg; i++) {
3348                 rdev->scratch.free[i] = true;
3349                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3350         }
3351 }
3352
3353 /**
3354  * cik_ring_test - basic gfx ring test
3355  *
3356  * @rdev: radeon_device pointer
3357  * @ring: radeon_ring structure holding ring information
3358  *
3359  * Allocate a scratch register and write to it using the gfx ring (CIK).
3360  * Provides a basic gfx ring test to verify that the ring is working.
3361  * Used by cik_cp_gfx_resume();
3362  * Returns 0 on success, error on failure.
3363  */
3364 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3365 {
3366         uint32_t scratch;
3367         uint32_t tmp = 0;
3368         unsigned i;
3369         int r;
3370
3371         r = radeon_scratch_get(rdev, &scratch);
3372         if (r) {
3373                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3374                 return r;
3375         }
3376         WREG32(scratch, 0xCAFEDEAD);
3377         r = radeon_ring_lock(rdev, ring, 3);
3378         if (r) {
3379                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3380                 radeon_scratch_free(rdev, scratch);
3381                 return r;
3382         }
3383         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3384         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3385         radeon_ring_write(ring, 0xDEADBEEF);
3386         radeon_ring_unlock_commit(rdev, ring, false);
3387
3388         for (i = 0; i < rdev->usec_timeout; i++) {
3389                 tmp = RREG32(scratch);
3390                 if (tmp == 0xDEADBEEF)
3391                         break;
3392                 udelay(1);
3393         }
3394         if (i < rdev->usec_timeout) {
3395                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3396         } else {
3397                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3398                           ring->idx, scratch, tmp);
3399                 r = -EINVAL;
3400         }
3401         radeon_scratch_free(rdev, scratch);
3402         return r;
3403 }
3404
3405 /**
3406  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3407  *
3408  * @rdev: radeon_device pointer
3409  * @ridx: radeon ring index
3410  *
3411  * Emits an hdp flush on the cp.
3412  */
3413 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3414                                        int ridx)
3415 {
3416         struct radeon_ring *ring = &rdev->ring[ridx];
3417         u32 ref_and_mask;
3418
3419         switch (ring->idx) {
3420         case CAYMAN_RING_TYPE_CP1_INDEX:
3421         case CAYMAN_RING_TYPE_CP2_INDEX:
3422         default:
3423                 switch (ring->me) {
3424                 case 0:
3425                         ref_and_mask = CP2 << ring->pipe;
3426                         break;
3427                 case 1:
3428                         ref_and_mask = CP6 << ring->pipe;
3429                         break;
3430                 default:
3431                         return;
3432                 }
3433                 break;
3434         case RADEON_RING_TYPE_GFX_INDEX:
3435                 ref_and_mask = CP0;
3436                 break;
3437         }
3438
3439         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3440         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3441                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3442                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3443         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3444         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3445         radeon_ring_write(ring, ref_and_mask);
3446         radeon_ring_write(ring, ref_and_mask);
3447         radeon_ring_write(ring, 0x20); /* poll interval */
3448 }
3449
3450 /**
3451  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3452  *
3453  * @rdev: radeon_device pointer
3454  * @fence: radeon fence object
3455  *
3456  * Emits a fence sequnce number on the gfx ring and flushes
3457  * GPU caches.
3458  */
3459 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3460                              struct radeon_fence *fence)
3461 {
3462         struct radeon_ring *ring = &rdev->ring[fence->ring];
3463         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3464
3465         /* Workaround for cache flush problems. First send a dummy EOP
3466          * event down the pipe with seq one below.
3467          */
3468         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3469         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3470                                  EOP_TC_ACTION_EN |
3471                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3472                                  EVENT_INDEX(5)));
3473         radeon_ring_write(ring, addr & 0xfffffffc);
3474         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3475                                 DATA_SEL(1) | INT_SEL(0));
3476         radeon_ring_write(ring, fence->seq - 1);
3477         radeon_ring_write(ring, 0);
3478
3479         /* Then send the real EOP event down the pipe. */
3480         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3481         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3482                                  EOP_TC_ACTION_EN |
3483                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3484                                  EVENT_INDEX(5)));
3485         radeon_ring_write(ring, addr & 0xfffffffc);
3486         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3487         radeon_ring_write(ring, fence->seq);
3488         radeon_ring_write(ring, 0);
3489 }
3490
3491 /**
3492  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3493  *
3494  * @rdev: radeon_device pointer
3495  * @fence: radeon fence object
3496  *
3497  * Emits a fence sequnce number on the compute ring and flushes
3498  * GPU caches.
3499  */
3500 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3501                                  struct radeon_fence *fence)
3502 {
3503         struct radeon_ring *ring = &rdev->ring[fence->ring];
3504         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3505
3506         /* RELEASE_MEM - flush caches, send int */
3507         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3508         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3509                                  EOP_TC_ACTION_EN |
3510                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3511                                  EVENT_INDEX(5)));
3512         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3513         radeon_ring_write(ring, addr & 0xfffffffc);
3514         radeon_ring_write(ring, upper_32_bits(addr));
3515         radeon_ring_write(ring, fence->seq);
3516         radeon_ring_write(ring, 0);
3517 }
3518
3519 /**
3520  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3521  *
3522  * @rdev: radeon_device pointer
3523  * @ring: radeon ring buffer object
3524  * @semaphore: radeon semaphore object
3525  * @emit_wait: Is this a sempahore wait?
3526  *
3527  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3528  * from running ahead of semaphore waits.
3529  */
3530 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3531                              struct radeon_ring *ring,
3532                              struct radeon_semaphore *semaphore,
3533                              bool emit_wait)
3534 {
3535         uint64_t addr = semaphore->gpu_addr;
3536         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3537
3538         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3539         radeon_ring_write(ring, lower_32_bits(addr));
3540         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3541
3542         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3543                 /* Prevent the PFP from running ahead of the semaphore wait */
3544                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3545                 radeon_ring_write(ring, 0x0);
3546         }
3547
3548         return true;
3549 }
3550
3551 /**
3552  * cik_copy_cpdma - copy pages using the CP DMA engine
3553  *
3554  * @rdev: radeon_device pointer
3555  * @src_offset: src GPU address
3556  * @dst_offset: dst GPU address
3557  * @num_gpu_pages: number of GPU pages to xfer
3558  * @resv: reservation object to sync to
3559  *
3560  * Copy GPU paging using the CP DMA engine (CIK+).
3561  * Used by the radeon ttm implementation to move pages if
3562  * registered as the asic copy callback.
3563  */
3564 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3565                                     uint64_t src_offset, uint64_t dst_offset,
3566                                     unsigned num_gpu_pages,
3567                                     struct dma_resv *resv)
3568 {
3569         struct radeon_fence *fence;
3570         struct radeon_sync sync;
3571         int ring_index = rdev->asic->copy.blit_ring_index;
3572         struct radeon_ring *ring = &rdev->ring[ring_index];
3573         u32 size_in_bytes, cur_size_in_bytes, control;
3574         int i, num_loops;
3575         int r = 0;
3576
3577         radeon_sync_create(&sync);
3578
3579         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3580         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3581         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3582         if (r) {
3583                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3584                 radeon_sync_free(rdev, &sync, NULL);
3585                 return ERR_PTR(r);
3586         }
3587
3588         radeon_sync_resv(rdev, &sync, resv, false);
3589         radeon_sync_rings(rdev, &sync, ring->idx);
3590
3591         for (i = 0; i < num_loops; i++) {
3592                 cur_size_in_bytes = size_in_bytes;
3593                 if (cur_size_in_bytes > 0x1fffff)
3594                         cur_size_in_bytes = 0x1fffff;
3595                 size_in_bytes -= cur_size_in_bytes;
3596                 control = 0;
3597                 if (size_in_bytes == 0)
3598                         control |= PACKET3_DMA_DATA_CP_SYNC;
3599                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3600                 radeon_ring_write(ring, control);
3601                 radeon_ring_write(ring, lower_32_bits(src_offset));
3602                 radeon_ring_write(ring, upper_32_bits(src_offset));
3603                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3604                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3605                 radeon_ring_write(ring, cur_size_in_bytes);
3606                 src_offset += cur_size_in_bytes;
3607                 dst_offset += cur_size_in_bytes;
3608         }
3609
3610         r = radeon_fence_emit(rdev, &fence, ring->idx);
3611         if (r) {
3612                 radeon_ring_unlock_undo(rdev, ring);
3613                 radeon_sync_free(rdev, &sync, NULL);
3614                 return ERR_PTR(r);
3615         }
3616
3617         radeon_ring_unlock_commit(rdev, ring, false);
3618         radeon_sync_free(rdev, &sync, fence);
3619
3620         return fence;
3621 }
3622
3623 /*
3624  * IB stuff
3625  */
3626 /**
3627  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3628  *
3629  * @rdev: radeon_device pointer
3630  * @ib: radeon indirect buffer object
3631  *
3632  * Emits a DE (drawing engine) or CE (constant engine) IB
3633  * on the gfx ring.  IBs are usually generated by userspace
3634  * acceleration drivers and submitted to the kernel for
3635  * scheduling on the ring.  This function schedules the IB
3636  * on the gfx ring for execution by the GPU.
3637  */
3638 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3639 {
3640         struct radeon_ring *ring = &rdev->ring[ib->ring];
3641         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3642         u32 header, control = INDIRECT_BUFFER_VALID;
3643
3644         if (ib->is_const_ib) {
3645                 /* set switch buffer packet before const IB */
3646                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3647                 radeon_ring_write(ring, 0);
3648
3649                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3650         } else {
3651                 u32 next_rptr;
3652                 if (ring->rptr_save_reg) {
3653                         next_rptr = ring->wptr + 3 + 4;
3654                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3655                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3656                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3657                         radeon_ring_write(ring, next_rptr);
3658                 } else if (rdev->wb.enabled) {
3659                         next_rptr = ring->wptr + 5 + 4;
3660                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3661                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3662                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3663                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3664                         radeon_ring_write(ring, next_rptr);
3665                 }
3666
3667                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3668         }
3669
3670         control |= ib->length_dw | (vm_id << 24);
3671
3672         radeon_ring_write(ring, header);
3673         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3674         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3675         radeon_ring_write(ring, control);
3676 }
3677
3678 /**
3679  * cik_ib_test - basic gfx ring IB test
3680  *
3681  * @rdev: radeon_device pointer
3682  * @ring: radeon_ring structure holding ring information
3683  *
3684  * Allocate an IB and execute it on the gfx ring (CIK).
3685  * Provides a basic gfx ring test to verify that IBs are working.
3686  * Returns 0 on success, error on failure.
3687  */
3688 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3689 {
3690         struct radeon_ib ib;
3691         uint32_t scratch;
3692         uint32_t tmp = 0;
3693         unsigned i;
3694         int r;
3695
3696         r = radeon_scratch_get(rdev, &scratch);
3697         if (r) {
3698                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3699                 return r;
3700         }
3701         WREG32(scratch, 0xCAFEDEAD);
3702         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3703         if (r) {
3704                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3705                 radeon_scratch_free(rdev, scratch);
3706                 return r;
3707         }
3708         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3709         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3710         ib.ptr[2] = 0xDEADBEEF;
3711         ib.length_dw = 3;
3712         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3713         if (r) {
3714                 radeon_scratch_free(rdev, scratch);
3715                 radeon_ib_free(rdev, &ib);
3716                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3717                 return r;
3718         }
3719         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3720                 RADEON_USEC_IB_TEST_TIMEOUT));
3721         if (r < 0) {
3722                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3723                 radeon_scratch_free(rdev, scratch);
3724                 radeon_ib_free(rdev, &ib);
3725                 return r;
3726         } else if (r == 0) {
3727                 DRM_ERROR("radeon: fence wait timed out.\n");
3728                 radeon_scratch_free(rdev, scratch);
3729                 radeon_ib_free(rdev, &ib);
3730                 return -ETIMEDOUT;
3731         }
3732         r = 0;
3733         for (i = 0; i < rdev->usec_timeout; i++) {
3734                 tmp = RREG32(scratch);
3735                 if (tmp == 0xDEADBEEF)
3736                         break;
3737                 udelay(1);
3738         }
3739         if (i < rdev->usec_timeout) {
3740                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3741         } else {
3742                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3743                           scratch, tmp);
3744                 r = -EINVAL;
3745         }
3746         radeon_scratch_free(rdev, scratch);
3747         radeon_ib_free(rdev, &ib);
3748         return r;
3749 }
3750
3751 /*
3752  * CP.
3753  * On CIK, gfx and compute now have independant command processors.
3754  *
3755  * GFX
3756  * Gfx consists of a single ring and can process both gfx jobs and
3757  * compute jobs.  The gfx CP consists of three microengines (ME):
3758  * PFP - Pre-Fetch Parser
3759  * ME - Micro Engine
3760  * CE - Constant Engine
3761  * The PFP and ME make up what is considered the Drawing Engine (DE).
3762  * The CE is an asynchronous engine used for updating buffer desciptors
3763  * used by the DE so that they can be loaded into cache in parallel
3764  * while the DE is processing state update packets.
3765  *
3766  * Compute
3767  * The compute CP consists of two microengines (ME):
3768  * MEC1 - Compute MicroEngine 1
3769  * MEC2 - Compute MicroEngine 2
3770  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3771  * The queues are exposed to userspace and are programmed directly
3772  * by the compute runtime.
3773  */
3774 /**
3775  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3776  *
3777  * @rdev: radeon_device pointer
3778  * @enable: enable or disable the MEs
3779  *
3780  * Halts or unhalts the gfx MEs.
3781  */
3782 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3783 {
3784         if (enable)
3785                 WREG32(CP_ME_CNTL, 0);
3786         else {
3787                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3788                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3789                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3790                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3791         }
3792         udelay(50);
3793 }
3794
3795 /**
3796  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3797  *
3798  * @rdev: radeon_device pointer
3799  *
3800  * Loads the gfx PFP, ME, and CE ucode.
3801  * Returns 0 for success, -EINVAL if the ucode is not available.
3802  */
3803 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3804 {
3805         int i;
3806
3807         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3808                 return -EINVAL;
3809
3810         cik_cp_gfx_enable(rdev, false);
3811
3812         if (rdev->new_fw) {
3813                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3814                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3815                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3816                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3817                 const struct gfx_firmware_header_v1_0 *me_hdr =
3818                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3819                 const __le32 *fw_data;
3820                 u32 fw_size;
3821
3822                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3823                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3824                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3825
3826                 /* PFP */
3827                 fw_data = (const __le32 *)
3828                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3829                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3830                 WREG32(CP_PFP_UCODE_ADDR, 0);
3831                 for (i = 0; i < fw_size; i++)
3832                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3833                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3834
3835                 /* CE */
3836                 fw_data = (const __le32 *)
3837                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3838                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3839                 WREG32(CP_CE_UCODE_ADDR, 0);
3840                 for (i = 0; i < fw_size; i++)
3841                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3842                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3843
3844                 /* ME */
3845                 fw_data = (const __be32 *)
3846                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3847                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3848                 WREG32(CP_ME_RAM_WADDR, 0);
3849                 for (i = 0; i < fw_size; i++)
3850                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3851                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3852                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3853         } else {
3854                 const __be32 *fw_data;
3855
3856                 /* PFP */
3857                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3858                 WREG32(CP_PFP_UCODE_ADDR, 0);
3859                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3860                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3861                 WREG32(CP_PFP_UCODE_ADDR, 0);
3862
3863                 /* CE */
3864                 fw_data = (const __be32 *)rdev->ce_fw->data;
3865                 WREG32(CP_CE_UCODE_ADDR, 0);
3866                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3867                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3868                 WREG32(CP_CE_UCODE_ADDR, 0);
3869
3870                 /* ME */
3871                 fw_data = (const __be32 *)rdev->me_fw->data;
3872                 WREG32(CP_ME_RAM_WADDR, 0);
3873                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3874                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3875                 WREG32(CP_ME_RAM_WADDR, 0);
3876         }
3877
3878         return 0;
3879 }
3880
3881 /**
3882  * cik_cp_gfx_start - start the gfx ring
3883  *
3884  * @rdev: radeon_device pointer
3885  *
3886  * Enables the ring and loads the clear state context and other
3887  * packets required to init the ring.
3888  * Returns 0 for success, error for failure.
3889  */
3890 static int cik_cp_gfx_start(struct radeon_device *rdev)
3891 {
3892         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3893         int r, i;
3894
3895         /* init the CP */
3896         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3897         WREG32(CP_ENDIAN_SWAP, 0);
3898         WREG32(CP_DEVICE_ID, 1);
3899
3900         cik_cp_gfx_enable(rdev, true);
3901
3902         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3903         if (r) {
3904                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3905                 return r;
3906         }
3907
3908         /* init the CE partitions.  CE only used for gfx on CIK */
3909         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3910         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3911         radeon_ring_write(ring, 0x8000);
3912         radeon_ring_write(ring, 0x8000);
3913
3914         /* setup clear context state */
3915         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3916         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3917
3918         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3919         radeon_ring_write(ring, 0x80000000);
3920         radeon_ring_write(ring, 0x80000000);
3921
3922         for (i = 0; i < cik_default_size; i++)
3923                 radeon_ring_write(ring, cik_default_state[i]);
3924
3925         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3926         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3927
3928         /* set clear context state */
3929         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3930         radeon_ring_write(ring, 0);
3931
3932         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3933         radeon_ring_write(ring, 0x00000316);
3934         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3935         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3936
3937         radeon_ring_unlock_commit(rdev, ring, false);
3938
3939         return 0;
3940 }
3941
3942 /**
3943  * cik_cp_gfx_fini - stop the gfx ring
3944  *
3945  * @rdev: radeon_device pointer
3946  *
3947  * Stop the gfx ring and tear down the driver ring
3948  * info.
3949  */
3950 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3951 {
3952         cik_cp_gfx_enable(rdev, false);
3953         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3954 }
3955
3956 /**
3957  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3958  *
3959  * @rdev: radeon_device pointer
3960  *
3961  * Program the location and size of the gfx ring buffer
3962  * and test it to make sure it's working.
3963  * Returns 0 for success, error for failure.
3964  */
3965 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3966 {
3967         struct radeon_ring *ring;
3968         u32 tmp;
3969         u32 rb_bufsz;
3970         u64 rb_addr;
3971         int r;
3972
3973         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3974         if (rdev->family != CHIP_HAWAII)
3975                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3976
3977         /* Set the write pointer delay */
3978         WREG32(CP_RB_WPTR_DELAY, 0);
3979
3980         /* set the RB to use vmid 0 */
3981         WREG32(CP_RB_VMID, 0);
3982
3983         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3984
3985         /* ring 0 - compute and gfx */
3986         /* Set ring buffer size */
3987         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3988         rb_bufsz = order_base_2(ring->ring_size / 8);
3989         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3990 #ifdef __BIG_ENDIAN
3991         tmp |= BUF_SWAP_32BIT;
3992 #endif
3993         WREG32(CP_RB0_CNTL, tmp);
3994
3995         /* Initialize the ring buffer's read and write pointers */
3996         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3997         ring->wptr = 0;
3998         WREG32(CP_RB0_WPTR, ring->wptr);
3999
4000         /* set the wb address wether it's enabled or not */
4001         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4002         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4003
4004         /* scratch register shadowing is no longer supported */
4005         WREG32(SCRATCH_UMSK, 0);
4006
4007         if (!rdev->wb.enabled)
4008                 tmp |= RB_NO_UPDATE;
4009
4010         mdelay(1);
4011         WREG32(CP_RB0_CNTL, tmp);
4012
4013         rb_addr = ring->gpu_addr >> 8;
4014         WREG32(CP_RB0_BASE, rb_addr);
4015         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4016
4017         /* start the ring */
4018         cik_cp_gfx_start(rdev);
4019         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4020         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4021         if (r) {
4022                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4023                 return r;
4024         }
4025
4026         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4027                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4028
4029         return 0;
4030 }
4031
4032 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4033                      struct radeon_ring *ring)
4034 {
4035         u32 rptr;
4036
4037         if (rdev->wb.enabled)
4038                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4039         else
4040                 rptr = RREG32(CP_RB0_RPTR);
4041
4042         return rptr;
4043 }
4044
4045 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4046                      struct radeon_ring *ring)
4047 {
4048         return RREG32(CP_RB0_WPTR);
4049 }
4050
4051 void cik_gfx_set_wptr(struct radeon_device *rdev,
4052                       struct radeon_ring *ring)
4053 {
4054         WREG32(CP_RB0_WPTR, ring->wptr);
4055         (void)RREG32(CP_RB0_WPTR);
4056 }
4057
4058 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4059                          struct radeon_ring *ring)
4060 {
4061         u32 rptr;
4062
4063         if (rdev->wb.enabled) {
4064                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4065         } else {
4066                 mutex_lock(&rdev->srbm_mutex);
4067                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4068                 rptr = RREG32(CP_HQD_PQ_RPTR);
4069                 cik_srbm_select(rdev, 0, 0, 0, 0);
4070                 mutex_unlock(&rdev->srbm_mutex);
4071         }
4072
4073         return rptr;
4074 }
4075
4076 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4077                          struct radeon_ring *ring)
4078 {
4079         u32 wptr;
4080
4081         if (rdev->wb.enabled) {
4082                 /* XXX check if swapping is necessary on BE */
4083                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4084         } else {
4085                 mutex_lock(&rdev->srbm_mutex);
4086                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4087                 wptr = RREG32(CP_HQD_PQ_WPTR);
4088                 cik_srbm_select(rdev, 0, 0, 0, 0);
4089                 mutex_unlock(&rdev->srbm_mutex);
4090         }
4091
4092         return wptr;
4093 }
4094
4095 void cik_compute_set_wptr(struct radeon_device *rdev,
4096                           struct radeon_ring *ring)
4097 {
4098         /* XXX check if swapping is necessary on BE */
4099         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4100         WDOORBELL32(ring->doorbell_index, ring->wptr);
4101 }
4102
4103 static void cik_compute_stop(struct radeon_device *rdev,
4104                              struct radeon_ring *ring)
4105 {
4106         u32 j, tmp;
4107
4108         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4109         /* Disable wptr polling. */
4110         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4111         tmp &= ~WPTR_POLL_EN;
4112         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4113         /* Disable HQD. */
4114         if (RREG32(CP_HQD_ACTIVE) & 1) {
4115                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4116                 for (j = 0; j < rdev->usec_timeout; j++) {
4117                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4118                                 break;
4119                         udelay(1);
4120                 }
4121                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4122                 WREG32(CP_HQD_PQ_RPTR, 0);
4123                 WREG32(CP_HQD_PQ_WPTR, 0);
4124         }
4125         cik_srbm_select(rdev, 0, 0, 0, 0);
4126 }
4127
4128 /**
4129  * cik_cp_compute_enable - enable/disable the compute CP MEs
4130  *
4131  * @rdev: radeon_device pointer
4132  * @enable: enable or disable the MEs
4133  *
4134  * Halts or unhalts the compute MEs.
4135  */
4136 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4137 {
4138         if (enable)
4139                 WREG32(CP_MEC_CNTL, 0);
4140         else {
4141                 /*
4142                  * To make hibernation reliable we need to clear compute ring
4143                  * configuration before halting the compute ring.
4144                  */
4145                 mutex_lock(&rdev->srbm_mutex);
4146                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4147                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4148                 mutex_unlock(&rdev->srbm_mutex);
4149
4150                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4151                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4152                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4153         }
4154         udelay(50);
4155 }
4156
4157 /**
4158  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4159  *
4160  * @rdev: radeon_device pointer
4161  *
4162  * Loads the compute MEC1&2 ucode.
4163  * Returns 0 for success, -EINVAL if the ucode is not available.
4164  */
4165 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4166 {
4167         int i;
4168
4169         if (!rdev->mec_fw)
4170                 return -EINVAL;
4171
4172         cik_cp_compute_enable(rdev, false);
4173
4174         if (rdev->new_fw) {
4175                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4176                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4177                 const __le32 *fw_data;
4178                 u32 fw_size;
4179
4180                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4181
4182                 /* MEC1 */
4183                 fw_data = (const __le32 *)
4184                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4185                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4186                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4187                 for (i = 0; i < fw_size; i++)
4188                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4189                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4190
4191                 /* MEC2 */
4192                 if (rdev->family == CHIP_KAVERI) {
4193                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4194                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4195
4196                         fw_data = (const __le32 *)
4197                                 (rdev->mec2_fw->data +
4198                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4199                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4200                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4201                         for (i = 0; i < fw_size; i++)
4202                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4203                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4204                 }
4205         } else {
4206                 const __be32 *fw_data;
4207
4208                 /* MEC1 */
4209                 fw_data = (const __be32 *)rdev->mec_fw->data;
4210                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4211                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4212                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4213                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4214
4215                 if (rdev->family == CHIP_KAVERI) {
4216                         /* MEC2 */
4217                         fw_data = (const __be32 *)rdev->mec_fw->data;
4218                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4219                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4220                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4221                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4222                 }
4223         }
4224
4225         return 0;
4226 }
4227
4228 /**
4229  * cik_cp_compute_start - start the compute queues
4230  *
4231  * @rdev: radeon_device pointer
4232  *
4233  * Enable the compute queues.
4234  * Returns 0 for success, error for failure.
4235  */
4236 static int cik_cp_compute_start(struct radeon_device *rdev)
4237 {
4238         cik_cp_compute_enable(rdev, true);
4239
4240         return 0;
4241 }
4242
4243 /**
4244  * cik_cp_compute_fini - stop the compute queues
4245  *
4246  * @rdev: radeon_device pointer
4247  *
4248  * Stop the compute queues and tear down the driver queue
4249  * info.
4250  */
4251 static void cik_cp_compute_fini(struct radeon_device *rdev)
4252 {
4253         int i, idx, r;
4254
4255         cik_cp_compute_enable(rdev, false);
4256
4257         for (i = 0; i < 2; i++) {
4258                 if (i == 0)
4259                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4260                 else
4261                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4262
4263                 if (rdev->ring[idx].mqd_obj) {
4264                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4265                         if (unlikely(r != 0))
4266                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4267
4268                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4269                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4270
4271                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4272                         rdev->ring[idx].mqd_obj = NULL;
4273                 }
4274         }
4275 }
4276
4277 static void cik_mec_fini(struct radeon_device *rdev)
4278 {
4279         int r;
4280
4281         if (rdev->mec.hpd_eop_obj) {
4282                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4283                 if (unlikely(r != 0))
4284                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4285                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4286                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4287
4288                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4289                 rdev->mec.hpd_eop_obj = NULL;
4290         }
4291 }
4292
4293 #define MEC_HPD_SIZE 2048
4294
4295 static int cik_mec_init(struct radeon_device *rdev)
4296 {
4297         int r;
4298         u32 *hpd;
4299
4300         /*
4301          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4302          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4303          */
4304         if (rdev->family == CHIP_KAVERI)
4305                 rdev->mec.num_mec = 2;
4306         else
4307                 rdev->mec.num_mec = 1;
4308         rdev->mec.num_pipe = 4;
4309         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4310
4311         if (rdev->mec.hpd_eop_obj == NULL) {
4312                 r = radeon_bo_create(rdev,
4313                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4314                                      PAGE_SIZE, true,
4315                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4316                                      &rdev->mec.hpd_eop_obj);
4317                 if (r) {
4318                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4319                         return r;
4320                 }
4321         }
4322
4323         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4324         if (unlikely(r != 0)) {
4325                 cik_mec_fini(rdev);
4326                 return r;
4327         }
4328         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4329                           &rdev->mec.hpd_eop_gpu_addr);
4330         if (r) {
4331                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4332                 cik_mec_fini(rdev);
4333                 return r;
4334         }
4335         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4336         if (r) {
4337                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4338                 cik_mec_fini(rdev);
4339                 return r;
4340         }
4341
4342         /* clear memory.  Not sure if this is required or not */
4343         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4344
4345         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4346         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4347
4348         return 0;
4349 }
4350
4351 struct hqd_registers
4352 {
4353         u32 cp_mqd_base_addr;
4354         u32 cp_mqd_base_addr_hi;
4355         u32 cp_hqd_active;
4356         u32 cp_hqd_vmid;
4357         u32 cp_hqd_persistent_state;
4358         u32 cp_hqd_pipe_priority;
4359         u32 cp_hqd_queue_priority;
4360         u32 cp_hqd_quantum;
4361         u32 cp_hqd_pq_base;
4362         u32 cp_hqd_pq_base_hi;
4363         u32 cp_hqd_pq_rptr;
4364         u32 cp_hqd_pq_rptr_report_addr;
4365         u32 cp_hqd_pq_rptr_report_addr_hi;
4366         u32 cp_hqd_pq_wptr_poll_addr;
4367         u32 cp_hqd_pq_wptr_poll_addr_hi;
4368         u32 cp_hqd_pq_doorbell_control;
4369         u32 cp_hqd_pq_wptr;
4370         u32 cp_hqd_pq_control;
4371         u32 cp_hqd_ib_base_addr;
4372         u32 cp_hqd_ib_base_addr_hi;
4373         u32 cp_hqd_ib_rptr;
4374         u32 cp_hqd_ib_control;
4375         u32 cp_hqd_iq_timer;
4376         u32 cp_hqd_iq_rptr;
4377         u32 cp_hqd_dequeue_request;
4378         u32 cp_hqd_dma_offload;
4379         u32 cp_hqd_sema_cmd;
4380         u32 cp_hqd_msg_type;
4381         u32 cp_hqd_atomic0_preop_lo;
4382         u32 cp_hqd_atomic0_preop_hi;
4383         u32 cp_hqd_atomic1_preop_lo;
4384         u32 cp_hqd_atomic1_preop_hi;
4385         u32 cp_hqd_hq_scheduler0;
4386         u32 cp_hqd_hq_scheduler1;
4387         u32 cp_mqd_control;
4388 };
4389
4390 struct bonaire_mqd
4391 {
4392         u32 header;
4393         u32 dispatch_initiator;
4394         u32 dimensions[3];
4395         u32 start_idx[3];
4396         u32 num_threads[3];
4397         u32 pipeline_stat_enable;
4398         u32 perf_counter_enable;
4399         u32 pgm[2];
4400         u32 tba[2];
4401         u32 tma[2];
4402         u32 pgm_rsrc[2];
4403         u32 vmid;
4404         u32 resource_limits;
4405         u32 static_thread_mgmt01[2];
4406         u32 tmp_ring_size;
4407         u32 static_thread_mgmt23[2];
4408         u32 restart[3];
4409         u32 thread_trace_enable;
4410         u32 reserved1;
4411         u32 user_data[16];
4412         u32 vgtcs_invoke_count[2];
4413         struct hqd_registers queue_state;
4414         u32 dequeue_cntr;
4415         u32 interrupt_queue[64];
4416 };
4417
4418 /**
4419  * cik_cp_compute_resume - setup the compute queue registers
4420  *
4421  * @rdev: radeon_device pointer
4422  *
4423  * Program the compute queues and test them to make sure they
4424  * are working.
4425  * Returns 0 for success, error for failure.
4426  */
4427 static int cik_cp_compute_resume(struct radeon_device *rdev)
4428 {
4429         int r, i, j, idx;
4430         u32 tmp;
4431         bool use_doorbell = true;
4432         u64 hqd_gpu_addr;
4433         u64 mqd_gpu_addr;
4434         u64 eop_gpu_addr;
4435         u64 wb_gpu_addr;
4436         u32 *buf;
4437         struct bonaire_mqd *mqd;
4438
4439         r = cik_cp_compute_start(rdev);
4440         if (r)
4441                 return r;
4442
4443         /* fix up chicken bits */
4444         tmp = RREG32(CP_CPF_DEBUG);
4445         tmp |= (1 << 23);
4446         WREG32(CP_CPF_DEBUG, tmp);
4447
4448         /* init the pipes */
4449         mutex_lock(&rdev->srbm_mutex);
4450
4451         for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4452                 int me = (i < 4) ? 1 : 2;
4453                 int pipe = (i < 4) ? i : (i - 4);
4454
4455                 cik_srbm_select(rdev, me, pipe, 0, 0);
4456
4457                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4458                 /* write the EOP addr */
4459                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4460                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4461
4462                 /* set the VMID assigned */
4463                 WREG32(CP_HPD_EOP_VMID, 0);
4464
4465                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4466                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4467                 tmp &= ~EOP_SIZE_MASK;
4468                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4469                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4470
4471         }
4472         cik_srbm_select(rdev, 0, 0, 0, 0);
4473         mutex_unlock(&rdev->srbm_mutex);
4474
4475         /* init the queues.  Just two for now. */
4476         for (i = 0; i < 2; i++) {
4477                 if (i == 0)
4478                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4479                 else
4480                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4481
4482                 if (rdev->ring[idx].mqd_obj == NULL) {
4483                         r = radeon_bo_create(rdev,
4484                                              sizeof(struct bonaire_mqd),
4485                                              PAGE_SIZE, true,
4486                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4487                                              NULL, &rdev->ring[idx].mqd_obj);
4488                         if (r) {
4489                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4490                                 return r;
4491                         }
4492                 }
4493
4494                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4495                 if (unlikely(r != 0)) {
4496                         cik_cp_compute_fini(rdev);
4497                         return r;
4498                 }
4499                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4500                                   &mqd_gpu_addr);
4501                 if (r) {
4502                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4503                         cik_cp_compute_fini(rdev);
4504                         return r;
4505                 }
4506                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4507                 if (r) {
4508                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4509                         cik_cp_compute_fini(rdev);
4510                         return r;
4511                 }
4512
4513                 /* init the mqd struct */
4514                 memset(buf, 0, sizeof(struct bonaire_mqd));
4515
4516                 mqd = (struct bonaire_mqd *)buf;
4517                 mqd->header = 0xC0310800;
4518                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4519                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4520                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4521                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4522
4523                 mutex_lock(&rdev->srbm_mutex);
4524                 cik_srbm_select(rdev, rdev->ring[idx].me,
4525                                 rdev->ring[idx].pipe,
4526                                 rdev->ring[idx].queue, 0);
4527
4528                 /* disable wptr polling */
4529                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4530                 tmp &= ~WPTR_POLL_EN;
4531                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4532
4533                 /* enable doorbell? */
4534                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4535                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4536                 if (use_doorbell)
4537                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4538                 else
4539                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4540                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4541                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4542
4543                 /* disable the queue if it's active */
4544                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4545                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4546                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4547                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4548                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4549                         for (j = 0; j < rdev->usec_timeout; j++) {
4550                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4551                                         break;
4552                                 udelay(1);
4553                         }
4554                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4555                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4556                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4557                 }
4558
4559                 /* set the pointer to the MQD */
4560                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4561                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4562                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4563                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4564                 /* set MQD vmid to 0 */
4565                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4566                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4567                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4568
4569                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4570                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4571                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4572                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4573                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4574                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4575
4576                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4577                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4578                 mqd->queue_state.cp_hqd_pq_control &=
4579                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4580
4581                 mqd->queue_state.cp_hqd_pq_control |=
4582                         order_base_2(rdev->ring[idx].ring_size / 8);
4583                 mqd->queue_state.cp_hqd_pq_control |=
4584                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4585 #ifdef __BIG_ENDIAN
4586                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4587 #endif
4588                 mqd->queue_state.cp_hqd_pq_control &=
4589                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4590                 mqd->queue_state.cp_hqd_pq_control |=
4591                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4592                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4593
4594                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4595                 if (i == 0)
4596                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4597                 else
4598                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4599                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4600                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4601                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4602                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4603                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4604
4605                 /* set the wb address wether it's enabled or not */
4606                 if (i == 0)
4607                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4608                 else
4609                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4610                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4611                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4612                         upper_32_bits(wb_gpu_addr) & 0xffff;
4613                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4614                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4615                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4616                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4617
4618                 /* enable the doorbell if requested */
4619                 if (use_doorbell) {
4620                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4621                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4622                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4623                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4624                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4625                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4626                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4627                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4628
4629                 } else {
4630                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4631                 }
4632                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4633                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4634
4635                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4636                 rdev->ring[idx].wptr = 0;
4637                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4638                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4639                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4640
4641                 /* set the vmid for the queue */
4642                 mqd->queue_state.cp_hqd_vmid = 0;
4643                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4644
4645                 /* activate the queue */
4646                 mqd->queue_state.cp_hqd_active = 1;
4647                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4648
4649                 cik_srbm_select(rdev, 0, 0, 0, 0);
4650                 mutex_unlock(&rdev->srbm_mutex);
4651
4652                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4653                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4654
4655                 rdev->ring[idx].ready = true;
4656                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4657                 if (r)
4658                         rdev->ring[idx].ready = false;
4659         }
4660
4661         return 0;
4662 }
4663
4664 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4665 {
4666         cik_cp_gfx_enable(rdev, enable);
4667         cik_cp_compute_enable(rdev, enable);
4668 }
4669
4670 static int cik_cp_load_microcode(struct radeon_device *rdev)
4671 {
4672         int r;
4673
4674         r = cik_cp_gfx_load_microcode(rdev);
4675         if (r)
4676                 return r;
4677         r = cik_cp_compute_load_microcode(rdev);
4678         if (r)
4679                 return r;
4680
4681         return 0;
4682 }
4683
4684 static void cik_cp_fini(struct radeon_device *rdev)
4685 {
4686         cik_cp_gfx_fini(rdev);
4687         cik_cp_compute_fini(rdev);
4688 }
4689
4690 static int cik_cp_resume(struct radeon_device *rdev)
4691 {
4692         int r;
4693
4694         cik_enable_gui_idle_interrupt(rdev, false);
4695
4696         r = cik_cp_load_microcode(rdev);
4697         if (r)
4698                 return r;
4699
4700         r = cik_cp_gfx_resume(rdev);
4701         if (r)
4702                 return r;
4703         r = cik_cp_compute_resume(rdev);
4704         if (r)
4705                 return r;
4706
4707         cik_enable_gui_idle_interrupt(rdev, true);
4708
4709         return 0;
4710 }
4711
4712 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4713 {
4714         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4715                 RREG32(GRBM_STATUS));
4716         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4717                 RREG32(GRBM_STATUS2));
4718         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4719                 RREG32(GRBM_STATUS_SE0));
4720         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4721                 RREG32(GRBM_STATUS_SE1));
4722         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4723                 RREG32(GRBM_STATUS_SE2));
4724         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4725                 RREG32(GRBM_STATUS_SE3));
4726         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4727                 RREG32(SRBM_STATUS));
4728         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4729                 RREG32(SRBM_STATUS2));
4730         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4731                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4732         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4733                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4734         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4735         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4736                  RREG32(CP_STALLED_STAT1));
4737         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4738                  RREG32(CP_STALLED_STAT2));
4739         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4740                  RREG32(CP_STALLED_STAT3));
4741         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4742                  RREG32(CP_CPF_BUSY_STAT));
4743         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4744                  RREG32(CP_CPF_STALLED_STAT1));
4745         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4746         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4747         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4748                  RREG32(CP_CPC_STALLED_STAT1));
4749         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4750 }
4751
4752 /**
4753  * cik_gpu_check_soft_reset - check which blocks are busy
4754  *
4755  * @rdev: radeon_device pointer
4756  *
4757  * Check which blocks are busy and return the relevant reset
4758  * mask to be used by cik_gpu_soft_reset().
4759  * Returns a mask of the blocks to be reset.
4760  */
4761 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4762 {
4763         u32 reset_mask = 0;
4764         u32 tmp;
4765
4766         /* GRBM_STATUS */
4767         tmp = RREG32(GRBM_STATUS);
4768         if (tmp & (PA_BUSY | SC_BUSY |
4769                    BCI_BUSY | SX_BUSY |
4770                    TA_BUSY | VGT_BUSY |
4771                    DB_BUSY | CB_BUSY |
4772                    GDS_BUSY | SPI_BUSY |
4773                    IA_BUSY | IA_BUSY_NO_DMA))
4774                 reset_mask |= RADEON_RESET_GFX;
4775
4776         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4777                 reset_mask |= RADEON_RESET_CP;
4778
4779         /* GRBM_STATUS2 */
4780         tmp = RREG32(GRBM_STATUS2);
4781         if (tmp & RLC_BUSY)
4782                 reset_mask |= RADEON_RESET_RLC;
4783
4784         /* SDMA0_STATUS_REG */
4785         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4786         if (!(tmp & SDMA_IDLE))
4787                 reset_mask |= RADEON_RESET_DMA;
4788
4789         /* SDMA1_STATUS_REG */
4790         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4791         if (!(tmp & SDMA_IDLE))
4792                 reset_mask |= RADEON_RESET_DMA1;
4793
4794         /* SRBM_STATUS2 */
4795         tmp = RREG32(SRBM_STATUS2);
4796         if (tmp & SDMA_BUSY)
4797                 reset_mask |= RADEON_RESET_DMA;
4798
4799         if (tmp & SDMA1_BUSY)
4800                 reset_mask |= RADEON_RESET_DMA1;
4801
4802         /* SRBM_STATUS */
4803         tmp = RREG32(SRBM_STATUS);
4804
4805         if (tmp & IH_BUSY)
4806                 reset_mask |= RADEON_RESET_IH;
4807
4808         if (tmp & SEM_BUSY)
4809                 reset_mask |= RADEON_RESET_SEM;
4810
4811         if (tmp & GRBM_RQ_PENDING)
4812                 reset_mask |= RADEON_RESET_GRBM;
4813
4814         if (tmp & VMC_BUSY)
4815                 reset_mask |= RADEON_RESET_VMC;
4816
4817         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4818                    MCC_BUSY | MCD_BUSY))
4819                 reset_mask |= RADEON_RESET_MC;
4820
4821         if (evergreen_is_display_hung(rdev))
4822                 reset_mask |= RADEON_RESET_DISPLAY;
4823
4824         /* Skip MC reset as it's mostly likely not hung, just busy */
4825         if (reset_mask & RADEON_RESET_MC) {
4826                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4827                 reset_mask &= ~RADEON_RESET_MC;
4828         }
4829
4830         return reset_mask;
4831 }
4832
4833 /**
4834  * cik_gpu_soft_reset - soft reset GPU
4835  *
4836  * @rdev: radeon_device pointer
4837  * @reset_mask: mask of which blocks to reset
4838  *
4839  * Soft reset the blocks specified in @reset_mask.
4840  */
4841 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4842 {
4843         struct evergreen_mc_save save;
4844         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4845         u32 tmp;
4846
4847         if (reset_mask == 0)
4848                 return;
4849
4850         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4851
4852         cik_print_gpu_status_regs(rdev);
4853         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4854                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4855         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4856                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4857
4858         /* disable CG/PG */
4859         cik_fini_pg(rdev);
4860         cik_fini_cg(rdev);
4861
4862         /* stop the rlc */
4863         cik_rlc_stop(rdev);
4864
4865         /* Disable GFX parsing/prefetching */
4866         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4867
4868         /* Disable MEC parsing/prefetching */
4869         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4870
4871         if (reset_mask & RADEON_RESET_DMA) {
4872                 /* sdma0 */
4873                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4874                 tmp |= SDMA_HALT;
4875                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4876         }
4877         if (reset_mask & RADEON_RESET_DMA1) {
4878                 /* sdma1 */
4879                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4880                 tmp |= SDMA_HALT;
4881                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4882         }
4883
4884         evergreen_mc_stop(rdev, &save);
4885         if (evergreen_mc_wait_for_idle(rdev)) {
4886                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4887         }
4888
4889         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4890                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4891
4892         if (reset_mask & RADEON_RESET_CP) {
4893                 grbm_soft_reset |= SOFT_RESET_CP;
4894
4895                 srbm_soft_reset |= SOFT_RESET_GRBM;
4896         }
4897
4898         if (reset_mask & RADEON_RESET_DMA)
4899                 srbm_soft_reset |= SOFT_RESET_SDMA;
4900
4901         if (reset_mask & RADEON_RESET_DMA1)
4902                 srbm_soft_reset |= SOFT_RESET_SDMA1;
4903
4904         if (reset_mask & RADEON_RESET_DISPLAY)
4905                 srbm_soft_reset |= SOFT_RESET_DC;
4906
4907         if (reset_mask & RADEON_RESET_RLC)
4908                 grbm_soft_reset |= SOFT_RESET_RLC;
4909
4910         if (reset_mask & RADEON_RESET_SEM)
4911                 srbm_soft_reset |= SOFT_RESET_SEM;
4912
4913         if (reset_mask & RADEON_RESET_IH)
4914                 srbm_soft_reset |= SOFT_RESET_IH;
4915
4916         if (reset_mask & RADEON_RESET_GRBM)
4917                 srbm_soft_reset |= SOFT_RESET_GRBM;
4918
4919         if (reset_mask & RADEON_RESET_VMC)
4920                 srbm_soft_reset |= SOFT_RESET_VMC;
4921
4922         if (!(rdev->flags & RADEON_IS_IGP)) {
4923                 if (reset_mask & RADEON_RESET_MC)
4924                         srbm_soft_reset |= SOFT_RESET_MC;
4925         }
4926
4927         if (grbm_soft_reset) {
4928                 tmp = RREG32(GRBM_SOFT_RESET);
4929                 tmp |= grbm_soft_reset;
4930                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4931                 WREG32(GRBM_SOFT_RESET, tmp);
4932                 tmp = RREG32(GRBM_SOFT_RESET);
4933
4934                 udelay(50);
4935
4936                 tmp &= ~grbm_soft_reset;
4937                 WREG32(GRBM_SOFT_RESET, tmp);
4938                 tmp = RREG32(GRBM_SOFT_RESET);
4939         }
4940
4941         if (srbm_soft_reset) {
4942                 tmp = RREG32(SRBM_SOFT_RESET);
4943                 tmp |= srbm_soft_reset;
4944                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4945                 WREG32(SRBM_SOFT_RESET, tmp);
4946                 tmp = RREG32(SRBM_SOFT_RESET);
4947
4948                 udelay(50);
4949
4950                 tmp &= ~srbm_soft_reset;
4951                 WREG32(SRBM_SOFT_RESET, tmp);
4952                 tmp = RREG32(SRBM_SOFT_RESET);
4953         }
4954
4955         /* Wait a little for things to settle down */
4956         udelay(50);
4957
4958         evergreen_mc_resume(rdev, &save);
4959         udelay(50);
4960
4961         cik_print_gpu_status_regs(rdev);
4962 }
4963
4964 struct kv_reset_save_regs {
4965         u32 gmcon_reng_execute;
4966         u32 gmcon_misc;
4967         u32 gmcon_misc3;
4968 };
4969
4970 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4971                                    struct kv_reset_save_regs *save)
4972 {
4973         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4974         save->gmcon_misc = RREG32(GMCON_MISC);
4975         save->gmcon_misc3 = RREG32(GMCON_MISC3);
4976
4977         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4978         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4979                                                 STCTRL_STUTTER_EN));
4980 }
4981
4982 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
4983                                       struct kv_reset_save_regs *save)
4984 {
4985         int i;
4986
4987         WREG32(GMCON_PGFSM_WRITE, 0);
4988         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
4989
4990         for (i = 0; i < 5; i++)
4991                 WREG32(GMCON_PGFSM_WRITE, 0);
4992
4993         WREG32(GMCON_PGFSM_WRITE, 0);
4994         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
4995
4996         for (i = 0; i < 5; i++)
4997                 WREG32(GMCON_PGFSM_WRITE, 0);
4998
4999         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5000         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5001
5002         for (i = 0; i < 5; i++)
5003                 WREG32(GMCON_PGFSM_WRITE, 0);
5004
5005         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5006         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5007
5008         for (i = 0; i < 5; i++)
5009                 WREG32(GMCON_PGFSM_WRITE, 0);
5010
5011         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5012         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5013
5014         for (i = 0; i < 5; i++)
5015                 WREG32(GMCON_PGFSM_WRITE, 0);
5016
5017         WREG32(GMCON_PGFSM_WRITE, 0);
5018         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5019
5020         for (i = 0; i < 5; i++)
5021                 WREG32(GMCON_PGFSM_WRITE, 0);
5022
5023         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5024         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5025
5026         for (i = 0; i < 5; i++)
5027                 WREG32(GMCON_PGFSM_WRITE, 0);
5028
5029         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5030         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5031
5032         for (i = 0; i < 5; i++)
5033                 WREG32(GMCON_PGFSM_WRITE, 0);
5034
5035         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5036         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5037
5038         for (i = 0; i < 5; i++)
5039                 WREG32(GMCON_PGFSM_WRITE, 0);
5040
5041         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5042         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5043
5044         for (i = 0; i < 5; i++)
5045                 WREG32(GMCON_PGFSM_WRITE, 0);
5046
5047         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5048         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5049
5050         WREG32(GMCON_MISC3, save->gmcon_misc3);
5051         WREG32(GMCON_MISC, save->gmcon_misc);
5052         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5053 }
5054
5055 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5056 {
5057         struct evergreen_mc_save save;
5058         struct kv_reset_save_regs kv_save = { 0 };
5059         u32 tmp, i;
5060
5061         dev_info(rdev->dev, "GPU pci config reset\n");
5062
5063         /* disable dpm? */
5064
5065         /* disable cg/pg */
5066         cik_fini_pg(rdev);
5067         cik_fini_cg(rdev);
5068
5069         /* Disable GFX parsing/prefetching */
5070         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5071
5072         /* Disable MEC parsing/prefetching */
5073         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5074
5075         /* sdma0 */
5076         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5077         tmp |= SDMA_HALT;
5078         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5079         /* sdma1 */
5080         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5081         tmp |= SDMA_HALT;
5082         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5083         /* XXX other engines? */
5084
5085         /* halt the rlc, disable cp internal ints */
5086         cik_rlc_stop(rdev);
5087
5088         udelay(50);
5089
5090         /* disable mem access */
5091         evergreen_mc_stop(rdev, &save);
5092         if (evergreen_mc_wait_for_idle(rdev)) {
5093                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5094         }
5095
5096         if (rdev->flags & RADEON_IS_IGP)
5097                 kv_save_regs_for_reset(rdev, &kv_save);
5098
5099         /* disable BM */
5100         pci_clear_master(rdev->pdev);
5101         /* reset */
5102         radeon_pci_config_reset(rdev);
5103
5104         udelay(100);
5105
5106         /* wait for asic to come out of reset */
5107         for (i = 0; i < rdev->usec_timeout; i++) {
5108                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5109                         break;
5110                 udelay(1);
5111         }
5112
5113         /* does asic init need to be run first??? */
5114         if (rdev->flags & RADEON_IS_IGP)
5115                 kv_restore_regs_for_reset(rdev, &kv_save);
5116 }
5117
5118 /**
5119  * cik_asic_reset - soft reset GPU
5120  *
5121  * @rdev: radeon_device pointer
5122  * @hard: force hard reset
5123  *
5124  * Look up which blocks are hung and attempt
5125  * to reset them.
5126  * Returns 0 for success.
5127  */
5128 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5129 {
5130         u32 reset_mask;
5131
5132         if (hard) {
5133                 cik_gpu_pci_config_reset(rdev);
5134                 return 0;
5135         }
5136
5137         reset_mask = cik_gpu_check_soft_reset(rdev);
5138
5139         if (reset_mask)
5140                 r600_set_bios_scratch_engine_hung(rdev, true);
5141
5142         /* try soft reset */
5143         cik_gpu_soft_reset(rdev, reset_mask);
5144
5145         reset_mask = cik_gpu_check_soft_reset(rdev);
5146
5147         /* try pci config reset */
5148         if (reset_mask && radeon_hard_reset)
5149                 cik_gpu_pci_config_reset(rdev);
5150
5151         reset_mask = cik_gpu_check_soft_reset(rdev);
5152
5153         if (!reset_mask)
5154                 r600_set_bios_scratch_engine_hung(rdev, false);
5155
5156         return 0;
5157 }
5158
5159 /**
5160  * cik_gfx_is_lockup - check if the 3D engine is locked up
5161  *
5162  * @rdev: radeon_device pointer
5163  * @ring: radeon_ring structure holding ring information
5164  *
5165  * Check if the 3D engine is locked up (CIK).
5166  * Returns true if the engine is locked, false if not.
5167  */
5168 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5169 {
5170         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5171
5172         if (!(reset_mask & (RADEON_RESET_GFX |
5173                             RADEON_RESET_COMPUTE |
5174                             RADEON_RESET_CP))) {
5175                 radeon_ring_lockup_update(rdev, ring);
5176                 return false;
5177         }
5178         return radeon_ring_test_lockup(rdev, ring);
5179 }
5180
5181 /* MC */
5182 /**
5183  * cik_mc_program - program the GPU memory controller
5184  *
5185  * @rdev: radeon_device pointer
5186  *
5187  * Set the location of vram, gart, and AGP in the GPU's
5188  * physical address space (CIK).
5189  */
5190 static void cik_mc_program(struct radeon_device *rdev)
5191 {
5192         struct evergreen_mc_save save;
5193         u32 tmp;
5194         int i, j;
5195
5196         /* Initialize HDP */
5197         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5198                 WREG32((0x2c14 + j), 0x00000000);
5199                 WREG32((0x2c18 + j), 0x00000000);
5200                 WREG32((0x2c1c + j), 0x00000000);
5201                 WREG32((0x2c20 + j), 0x00000000);
5202                 WREG32((0x2c24 + j), 0x00000000);
5203         }
5204         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5205
5206         evergreen_mc_stop(rdev, &save);
5207         if (radeon_mc_wait_for_idle(rdev)) {
5208                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5209         }
5210         /* Lockout access through VGA aperture*/
5211         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5212         /* Update configuration */
5213         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5214                rdev->mc.vram_start >> 12);
5215         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5216                rdev->mc.vram_end >> 12);
5217         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5218                rdev->vram_scratch.gpu_addr >> 12);
5219         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5220         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5221         WREG32(MC_VM_FB_LOCATION, tmp);
5222         /* XXX double check these! */
5223         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5224         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5225         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5226         WREG32(MC_VM_AGP_BASE, 0);
5227         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5228         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5229         if (radeon_mc_wait_for_idle(rdev)) {
5230                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5231         }
5232         evergreen_mc_resume(rdev, &save);
5233         /* we need to own VRAM, so turn off the VGA renderer here
5234          * to stop it overwriting our objects */
5235         rv515_vga_render_disable(rdev);
5236 }
5237
5238 /**
5239  * cik_mc_init - initialize the memory controller driver params
5240  *
5241  * @rdev: radeon_device pointer
5242  *
5243  * Look up the amount of vram, vram width, and decide how to place
5244  * vram and gart within the GPU's physical address space (CIK).
5245  * Returns 0 for success.
5246  */
5247 static int cik_mc_init(struct radeon_device *rdev)
5248 {
5249         u32 tmp;
5250         int chansize, numchan;
5251
5252         /* Get VRAM informations */
5253         rdev->mc.vram_is_ddr = true;
5254         tmp = RREG32(MC_ARB_RAMCFG);
5255         if (tmp & CHANSIZE_MASK) {
5256                 chansize = 64;
5257         } else {
5258                 chansize = 32;
5259         }
5260         tmp = RREG32(MC_SHARED_CHMAP);
5261         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5262         case 0:
5263         default:
5264                 numchan = 1;
5265                 break;
5266         case 1:
5267                 numchan = 2;
5268                 break;
5269         case 2:
5270                 numchan = 4;
5271                 break;
5272         case 3:
5273                 numchan = 8;
5274                 break;
5275         case 4:
5276                 numchan = 3;
5277                 break;
5278         case 5:
5279                 numchan = 6;
5280                 break;
5281         case 6:
5282                 numchan = 10;
5283                 break;
5284         case 7:
5285                 numchan = 12;
5286                 break;
5287         case 8:
5288                 numchan = 16;
5289                 break;
5290         }
5291         rdev->mc.vram_width = numchan * chansize;
5292         /* Could aper size report 0 ? */
5293         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5294         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5295         /* size in MB on si */
5296         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5297         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5298         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5299         si_vram_gtt_location(rdev, &rdev->mc);
5300         radeon_update_bandwidth_info(rdev);
5301
5302         return 0;
5303 }
5304
5305 /*
5306  * GART
5307  * VMID 0 is the physical GPU addresses as used by the kernel.
5308  * VMIDs 1-15 are used for userspace clients and are handled
5309  * by the radeon vm/hsa code.
5310  */
5311 /**
5312  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5313  *
5314  * @rdev: radeon_device pointer
5315  *
5316  * Flush the TLB for the VMID 0 page table (CIK).
5317  */
5318 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5319 {
5320         /* flush hdp cache */
5321         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5322
5323         /* bits 0-15 are the VM contexts0-15 */
5324         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5325 }
5326
5327 /**
5328  * cik_pcie_gart_enable - gart enable
5329  *
5330  * @rdev: radeon_device pointer
5331  *
5332  * This sets up the TLBs, programs the page tables for VMID0,
5333  * sets up the hw for VMIDs 1-15 which are allocated on
5334  * demand, and sets up the global locations for the LDS, GDS,
5335  * and GPUVM for FSA64 clients (CIK).
5336  * Returns 0 for success, errors for failure.
5337  */
5338 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5339 {
5340         int r, i;
5341
5342         if (rdev->gart.robj == NULL) {
5343                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5344                 return -EINVAL;
5345         }
5346         r = radeon_gart_table_vram_pin(rdev);
5347         if (r)
5348                 return r;
5349         /* Setup TLB control */
5350         WREG32(MC_VM_MX_L1_TLB_CNTL,
5351                (0xA << 7) |
5352                ENABLE_L1_TLB |
5353                ENABLE_L1_FRAGMENT_PROCESSING |
5354                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5355                ENABLE_ADVANCED_DRIVER_MODEL |
5356                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5357         /* Setup L2 cache */
5358         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5359                ENABLE_L2_FRAGMENT_PROCESSING |
5360                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5361                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5362                EFFECTIVE_L2_QUEUE_SIZE(7) |
5363                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5364         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5365         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5366                BANK_SELECT(4) |
5367                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5368         /* setup context0 */
5369         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5370         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5371         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5372         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5373                         (u32)(rdev->dummy_page.addr >> 12));
5374         WREG32(VM_CONTEXT0_CNTL2, 0);
5375         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5376                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5377
5378         WREG32(0x15D4, 0);
5379         WREG32(0x15D8, 0);
5380         WREG32(0x15DC, 0);
5381
5382         /* restore context1-15 */
5383         /* set vm size, must be a multiple of 4 */
5384         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5385         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5386         for (i = 1; i < 16; i++) {
5387                 if (i < 8)
5388                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5389                                rdev->vm_manager.saved_table_addr[i]);
5390                 else
5391                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5392                                rdev->vm_manager.saved_table_addr[i]);
5393         }
5394
5395         /* enable context1-15 */
5396         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5397                (u32)(rdev->dummy_page.addr >> 12));
5398         WREG32(VM_CONTEXT1_CNTL2, 4);
5399         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5400                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5401                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5402                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5403                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5404                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5405                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5406                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5407                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5408                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5409                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5410                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5411                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5412                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5413
5414         if (rdev->family == CHIP_KAVERI) {
5415                 u32 tmp = RREG32(CHUB_CONTROL);
5416                 tmp &= ~BYPASS_VM;
5417                 WREG32(CHUB_CONTROL, tmp);
5418         }
5419
5420         /* XXX SH_MEM regs */
5421         /* where to put LDS, scratch, GPUVM in FSA64 space */
5422         mutex_lock(&rdev->srbm_mutex);
5423         for (i = 0; i < 16; i++) {
5424                 cik_srbm_select(rdev, 0, 0, 0, i);
5425                 /* CP and shaders */
5426                 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5427                 WREG32(SH_MEM_APE1_BASE, 1);
5428                 WREG32(SH_MEM_APE1_LIMIT, 0);
5429                 WREG32(SH_MEM_BASES, 0);
5430                 /* SDMA GFX */
5431                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5432                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5433                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5434                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5435                 /* XXX SDMA RLC - todo */
5436         }
5437         cik_srbm_select(rdev, 0, 0, 0, 0);
5438         mutex_unlock(&rdev->srbm_mutex);
5439
5440         cik_pcie_gart_tlb_flush(rdev);
5441         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5442                  (unsigned)(rdev->mc.gtt_size >> 20),
5443                  (unsigned long long)rdev->gart.table_addr);
5444         rdev->gart.ready = true;
5445         return 0;
5446 }
5447
5448 /**
5449  * cik_pcie_gart_disable - gart disable
5450  *
5451  * @rdev: radeon_device pointer
5452  *
5453  * This disables all VM page table (CIK).
5454  */
5455 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5456 {
5457         unsigned i;
5458
5459         for (i = 1; i < 16; ++i) {
5460                 uint32_t reg;
5461                 if (i < 8)
5462                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5463                 else
5464                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5465                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5466         }
5467
5468         /* Disable all tables */
5469         WREG32(VM_CONTEXT0_CNTL, 0);
5470         WREG32(VM_CONTEXT1_CNTL, 0);
5471         /* Setup TLB control */
5472         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5473                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5474         /* Setup L2 cache */
5475         WREG32(VM_L2_CNTL,
5476                ENABLE_L2_FRAGMENT_PROCESSING |
5477                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5478                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5479                EFFECTIVE_L2_QUEUE_SIZE(7) |
5480                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5481         WREG32(VM_L2_CNTL2, 0);
5482         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5483                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5484         radeon_gart_table_vram_unpin(rdev);
5485 }
5486
5487 /**
5488  * cik_pcie_gart_fini - vm fini callback
5489  *
5490  * @rdev: radeon_device pointer
5491  *
5492  * Tears down the driver GART/VM setup (CIK).
5493  */
5494 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5495 {
5496         cik_pcie_gart_disable(rdev);
5497         radeon_gart_table_vram_free(rdev);
5498         radeon_gart_fini(rdev);
5499 }
5500
5501 /* vm parser */
5502 /**
5503  * cik_ib_parse - vm ib_parse callback
5504  *
5505  * @rdev: radeon_device pointer
5506  * @ib: indirect buffer pointer
5507  *
5508  * CIK uses hw IB checking so this is a nop (CIK).
5509  */
5510 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5511 {
5512         return 0;
5513 }
5514
5515 /*
5516  * vm
5517  * VMID 0 is the physical GPU addresses as used by the kernel.
5518  * VMIDs 1-15 are used for userspace clients and are handled
5519  * by the radeon vm/hsa code.
5520  */
5521 /**
5522  * cik_vm_init - cik vm init callback
5523  *
5524  * @rdev: radeon_device pointer
5525  *
5526  * Inits cik specific vm parameters (number of VMs, base of vram for
5527  * VMIDs 1-15) (CIK).
5528  * Returns 0 for success.
5529  */
5530 int cik_vm_init(struct radeon_device *rdev)
5531 {
5532         /*
5533          * number of VMs
5534          * VMID 0 is reserved for System
5535          * radeon graphics/compute will use VMIDs 1-15
5536          */
5537         rdev->vm_manager.nvm = 16;
5538         /* base offset of vram pages */
5539         if (rdev->flags & RADEON_IS_IGP) {
5540                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5541                 tmp <<= 22;
5542                 rdev->vm_manager.vram_base_offset = tmp;
5543         } else
5544                 rdev->vm_manager.vram_base_offset = 0;
5545
5546         return 0;
5547 }
5548
5549 /**
5550  * cik_vm_fini - cik vm fini callback
5551  *
5552  * @rdev: radeon_device pointer
5553  *
5554  * Tear down any asic specific VM setup (CIK).
5555  */
5556 void cik_vm_fini(struct radeon_device *rdev)
5557 {
5558 }
5559
5560 /**
5561  * cik_vm_decode_fault - print human readable fault info
5562  *
5563  * @rdev: radeon_device pointer
5564  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5565  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5566  * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value
5567  *
5568  * Print human readable fault information (CIK).
5569  */
5570 static void cik_vm_decode_fault(struct radeon_device *rdev,
5571                                 u32 status, u32 addr, u32 mc_client)
5572 {
5573         u32 mc_id;
5574         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5575         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5576         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5577                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5578
5579         if (rdev->family == CHIP_HAWAII)
5580                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5581         else
5582                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5583
5584         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5585                protections, vmid, addr,
5586                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5587                block, mc_client, mc_id);
5588 }
5589
5590 /*
5591  * cik_vm_flush - cik vm flush using the CP
5592  *
5593  * Update the page table base and flush the VM TLB
5594  * using the CP (CIK).
5595  */
5596 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5597                   unsigned vm_id, uint64_t pd_addr)
5598 {
5599         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5600
5601         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5602         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5603                                  WRITE_DATA_DST_SEL(0)));
5604         if (vm_id < 8) {
5605                 radeon_ring_write(ring,
5606                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5607         } else {
5608                 radeon_ring_write(ring,
5609                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5610         }
5611         radeon_ring_write(ring, 0);
5612         radeon_ring_write(ring, pd_addr >> 12);
5613
5614         /* update SH_MEM_* regs */
5615         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5616         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5617                                  WRITE_DATA_DST_SEL(0)));
5618         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5619         radeon_ring_write(ring, 0);
5620         radeon_ring_write(ring, VMID(vm_id));
5621
5622         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5623         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5624                                  WRITE_DATA_DST_SEL(0)));
5625         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5626         radeon_ring_write(ring, 0);
5627
5628         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5629         radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5630         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5631         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5632
5633         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5634         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5635                                  WRITE_DATA_DST_SEL(0)));
5636         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5637         radeon_ring_write(ring, 0);
5638         radeon_ring_write(ring, VMID(0));
5639
5640         /* HDP flush */
5641         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5642
5643         /* bits 0-15 are the VM contexts0-15 */
5644         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5645         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5646                                  WRITE_DATA_DST_SEL(0)));
5647         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5648         radeon_ring_write(ring, 0);
5649         radeon_ring_write(ring, 1 << vm_id);
5650
5651         /* wait for the invalidate to complete */
5652         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5653         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5654                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5655                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5656         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5657         radeon_ring_write(ring, 0);
5658         radeon_ring_write(ring, 0); /* ref */
5659         radeon_ring_write(ring, 0); /* mask */
5660         radeon_ring_write(ring, 0x20); /* poll interval */
5661
5662         /* compute doesn't have PFP */
5663         if (usepfp) {
5664                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5665                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5666                 radeon_ring_write(ring, 0x0);
5667         }
5668 }
5669
5670 /*
5671  * RLC
5672  * The RLC is a multi-purpose microengine that handles a
5673  * variety of functions, the most important of which is
5674  * the interrupt controller.
5675  */
5676 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5677                                           bool enable)
5678 {
5679         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5680
5681         if (enable)
5682                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5683         else
5684                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5685         WREG32(CP_INT_CNTL_RING0, tmp);
5686 }
5687
5688 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5689 {
5690         u32 tmp;
5691
5692         tmp = RREG32(RLC_LB_CNTL);
5693         if (enable)
5694                 tmp |= LOAD_BALANCE_ENABLE;
5695         else
5696                 tmp &= ~LOAD_BALANCE_ENABLE;
5697         WREG32(RLC_LB_CNTL, tmp);
5698 }
5699
5700 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5701 {
5702         u32 i, j, k;
5703         u32 mask;
5704
5705         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5706                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5707                         cik_select_se_sh(rdev, i, j);
5708                         for (k = 0; k < rdev->usec_timeout; k++) {
5709                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5710                                         break;
5711                                 udelay(1);
5712                         }
5713                 }
5714         }
5715         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5716
5717         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5718         for (k = 0; k < rdev->usec_timeout; k++) {
5719                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5720                         break;
5721                 udelay(1);
5722         }
5723 }
5724
5725 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5726 {
5727         u32 tmp;
5728
5729         tmp = RREG32(RLC_CNTL);
5730         if (tmp != rlc)
5731                 WREG32(RLC_CNTL, rlc);
5732 }
5733
5734 static u32 cik_halt_rlc(struct radeon_device *rdev)
5735 {
5736         u32 data, orig;
5737
5738         orig = data = RREG32(RLC_CNTL);
5739
5740         if (data & RLC_ENABLE) {
5741                 u32 i;
5742
5743                 data &= ~RLC_ENABLE;
5744                 WREG32(RLC_CNTL, data);
5745
5746                 for (i = 0; i < rdev->usec_timeout; i++) {
5747                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5748                                 break;
5749                         udelay(1);
5750                 }
5751
5752                 cik_wait_for_rlc_serdes(rdev);
5753         }
5754
5755         return orig;
5756 }
5757
5758 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5759 {
5760         u32 tmp, i, mask;
5761
5762         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5763         WREG32(RLC_GPR_REG2, tmp);
5764
5765         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5766         for (i = 0; i < rdev->usec_timeout; i++) {
5767                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5768                         break;
5769                 udelay(1);
5770         }
5771
5772         for (i = 0; i < rdev->usec_timeout; i++) {
5773                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5774                         break;
5775                 udelay(1);
5776         }
5777 }
5778
5779 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5780 {
5781         u32 tmp;
5782
5783         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5784         WREG32(RLC_GPR_REG2, tmp);
5785 }
5786
5787 /**
5788  * cik_rlc_stop - stop the RLC ME
5789  *
5790  * @rdev: radeon_device pointer
5791  *
5792  * Halt the RLC ME (MicroEngine) (CIK).
5793  */
5794 static void cik_rlc_stop(struct radeon_device *rdev)
5795 {
5796         WREG32(RLC_CNTL, 0);
5797
5798         cik_enable_gui_idle_interrupt(rdev, false);
5799
5800         cik_wait_for_rlc_serdes(rdev);
5801 }
5802
5803 /**
5804  * cik_rlc_start - start the RLC ME
5805  *
5806  * @rdev: radeon_device pointer
5807  *
5808  * Unhalt the RLC ME (MicroEngine) (CIK).
5809  */
5810 static void cik_rlc_start(struct radeon_device *rdev)
5811 {
5812         WREG32(RLC_CNTL, RLC_ENABLE);
5813
5814         cik_enable_gui_idle_interrupt(rdev, true);
5815
5816         udelay(50);
5817 }
5818
5819 /**
5820  * cik_rlc_resume - setup the RLC hw
5821  *
5822  * @rdev: radeon_device pointer
5823  *
5824  * Initialize the RLC registers, load the ucode,
5825  * and start the RLC (CIK).
5826  * Returns 0 for success, -EINVAL if the ucode is not available.
5827  */
5828 static int cik_rlc_resume(struct radeon_device *rdev)
5829 {
5830         u32 i, size, tmp;
5831
5832         if (!rdev->rlc_fw)
5833                 return -EINVAL;
5834
5835         cik_rlc_stop(rdev);
5836
5837         /* disable CG */
5838         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5839         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5840
5841         si_rlc_reset(rdev);
5842
5843         cik_init_pg(rdev);
5844
5845         cik_init_cg(rdev);
5846
5847         WREG32(RLC_LB_CNTR_INIT, 0);
5848         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5849
5850         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5851         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5852         WREG32(RLC_LB_PARAMS, 0x00600408);
5853         WREG32(RLC_LB_CNTL, 0x80000004);
5854
5855         WREG32(RLC_MC_CNTL, 0);
5856         WREG32(RLC_UCODE_CNTL, 0);
5857
5858         if (rdev->new_fw) {
5859                 const struct rlc_firmware_header_v1_0 *hdr =
5860                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5861                 const __le32 *fw_data = (const __le32 *)
5862                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5863
5864                 radeon_ucode_print_rlc_hdr(&hdr->header);
5865
5866                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5867                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5868                 for (i = 0; i < size; i++)
5869                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5870                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5871         } else {
5872                 const __be32 *fw_data;
5873
5874                 switch (rdev->family) {
5875                 case CHIP_BONAIRE:
5876                 case CHIP_HAWAII:
5877                 default:
5878                         size = BONAIRE_RLC_UCODE_SIZE;
5879                         break;
5880                 case CHIP_KAVERI:
5881                         size = KV_RLC_UCODE_SIZE;
5882                         break;
5883                 case CHIP_KABINI:
5884                         size = KB_RLC_UCODE_SIZE;
5885                         break;
5886                 case CHIP_MULLINS:
5887                         size = ML_RLC_UCODE_SIZE;
5888                         break;
5889                 }
5890
5891                 fw_data = (const __be32 *)rdev->rlc_fw->data;
5892                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5893                 for (i = 0; i < size; i++)
5894                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5895                 WREG32(RLC_GPM_UCODE_ADDR, 0);
5896         }
5897
5898         /* XXX - find out what chips support lbpw */
5899         cik_enable_lbpw(rdev, false);
5900
5901         if (rdev->family == CHIP_BONAIRE)
5902                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5903
5904         cik_rlc_start(rdev);
5905
5906         return 0;
5907 }
5908
5909 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5910 {
5911         u32 data, orig, tmp, tmp2;
5912
5913         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5914
5915         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5916                 cik_enable_gui_idle_interrupt(rdev, true);
5917
5918                 tmp = cik_halt_rlc(rdev);
5919
5920                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5921                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5922                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5923                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5924                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5925
5926                 cik_update_rlc(rdev, tmp);
5927
5928                 data |= CGCG_EN | CGLS_EN;
5929         } else {
5930                 cik_enable_gui_idle_interrupt(rdev, false);
5931
5932                 RREG32(CB_CGTT_SCLK_CTRL);
5933                 RREG32(CB_CGTT_SCLK_CTRL);
5934                 RREG32(CB_CGTT_SCLK_CTRL);
5935                 RREG32(CB_CGTT_SCLK_CTRL);
5936
5937                 data &= ~(CGCG_EN | CGLS_EN);
5938         }
5939
5940         if (orig != data)
5941                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5942
5943 }
5944
5945 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5946 {
5947         u32 data, orig, tmp = 0;
5948
5949         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5950                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5951                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5952                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
5953                                 data |= CP_MEM_LS_EN;
5954                                 if (orig != data)
5955                                         WREG32(CP_MEM_SLP_CNTL, data);
5956                         }
5957                 }
5958
5959                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5960                 data |= 0x00000001;
5961                 data &= 0xfffffffd;
5962                 if (orig != data)
5963                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5964
5965                 tmp = cik_halt_rlc(rdev);
5966
5967                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5968                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5969                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5970                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5971                 WREG32(RLC_SERDES_WR_CTRL, data);
5972
5973                 cik_update_rlc(rdev, tmp);
5974
5975                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5976                         orig = data = RREG32(CGTS_SM_CTRL_REG);
5977                         data &= ~SM_MODE_MASK;
5978                         data |= SM_MODE(0x2);
5979                         data |= SM_MODE_ENABLE;
5980                         data &= ~CGTS_OVERRIDE;
5981                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5982                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5983                                 data &= ~CGTS_LS_OVERRIDE;
5984                         data &= ~ON_MONITOR_ADD_MASK;
5985                         data |= ON_MONITOR_ADD_EN;
5986                         data |= ON_MONITOR_ADD(0x96);
5987                         if (orig != data)
5988                                 WREG32(CGTS_SM_CTRL_REG, data);
5989                 }
5990         } else {
5991                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5992                 data |= 0x00000003;
5993                 if (orig != data)
5994                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5995
5996                 data = RREG32(RLC_MEM_SLP_CNTL);
5997                 if (data & RLC_MEM_LS_EN) {
5998                         data &= ~RLC_MEM_LS_EN;
5999                         WREG32(RLC_MEM_SLP_CNTL, data);
6000                 }
6001
6002                 data = RREG32(CP_MEM_SLP_CNTL);
6003                 if (data & CP_MEM_LS_EN) {
6004                         data &= ~CP_MEM_LS_EN;
6005                         WREG32(CP_MEM_SLP_CNTL, data);
6006                 }
6007
6008                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6009                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6010                 if (orig != data)
6011                         WREG32(CGTS_SM_CTRL_REG, data);
6012
6013                 tmp = cik_halt_rlc(rdev);
6014
6015                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6016                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6017                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6018                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6019                 WREG32(RLC_SERDES_WR_CTRL, data);
6020
6021                 cik_update_rlc(rdev, tmp);
6022         }
6023 }
6024
6025 static const u32 mc_cg_registers[] =
6026 {
6027         MC_HUB_MISC_HUB_CG,
6028         MC_HUB_MISC_SIP_CG,
6029         MC_HUB_MISC_VM_CG,
6030         MC_XPB_CLK_GAT,
6031         ATC_MISC_CG,
6032         MC_CITF_MISC_WR_CG,
6033         MC_CITF_MISC_RD_CG,
6034         MC_CITF_MISC_VM_CG,
6035         VM_L2_CG,
6036 };
6037
6038 static void cik_enable_mc_ls(struct radeon_device *rdev,
6039                              bool enable)
6040 {
6041         int i;
6042         u32 orig, data;
6043
6044         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6045                 orig = data = RREG32(mc_cg_registers[i]);
6046                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6047                         data |= MC_LS_ENABLE;
6048                 else
6049                         data &= ~MC_LS_ENABLE;
6050                 if (data != orig)
6051                         WREG32(mc_cg_registers[i], data);
6052         }
6053 }
6054
6055 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6056                                bool enable)
6057 {
6058         int i;
6059         u32 orig, data;
6060
6061         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6062                 orig = data = RREG32(mc_cg_registers[i]);
6063                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6064                         data |= MC_CG_ENABLE;
6065                 else
6066                         data &= ~MC_CG_ENABLE;
6067                 if (data != orig)
6068                         WREG32(mc_cg_registers[i], data);
6069         }
6070 }
6071
6072 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6073                                  bool enable)
6074 {
6075         u32 orig, data;
6076
6077         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6078                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6079                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6080         } else {
6081                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6082                 data |= 0xff000000;
6083                 if (data != orig)
6084                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6085
6086                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6087                 data |= 0xff000000;
6088                 if (data != orig)
6089                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6090         }
6091 }
6092
6093 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6094                                  bool enable)
6095 {
6096         u32 orig, data;
6097
6098         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6099                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6100                 data |= 0x100;
6101                 if (orig != data)
6102                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6103
6104                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6105                 data |= 0x100;
6106                 if (orig != data)
6107                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6108         } else {
6109                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6110                 data &= ~0x100;
6111                 if (orig != data)
6112                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6113
6114                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6115                 data &= ~0x100;
6116                 if (orig != data)
6117                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6118         }
6119 }
6120
6121 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6122                                 bool enable)
6123 {
6124         u32 orig, data;
6125
6126         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6127                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6128                 data = 0xfff;
6129                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6130
6131                 orig = data = RREG32(UVD_CGC_CTRL);
6132                 data |= DCM;
6133                 if (orig != data)
6134                         WREG32(UVD_CGC_CTRL, data);
6135         } else {
6136                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6137                 data &= ~0xfff;
6138                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6139
6140                 orig = data = RREG32(UVD_CGC_CTRL);
6141                 data &= ~DCM;
6142                 if (orig != data)
6143                         WREG32(UVD_CGC_CTRL, data);
6144         }
6145 }
6146
6147 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6148                                bool enable)
6149 {
6150         u32 orig, data;
6151
6152         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6153
6154         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6155                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6156                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6157         else
6158                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6159                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6160
6161         if (orig != data)
6162                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6163 }
6164
6165 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6166                                 bool enable)
6167 {
6168         u32 orig, data;
6169
6170         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6171
6172         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6173                 data &= ~CLOCK_GATING_DIS;
6174         else
6175                 data |= CLOCK_GATING_DIS;
6176
6177         if (orig != data)
6178                 WREG32(HDP_HOST_PATH_CNTL, data);
6179 }
6180
6181 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6182                               bool enable)
6183 {
6184         u32 orig, data;
6185
6186         orig = data = RREG32(HDP_MEM_POWER_LS);
6187
6188         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6189                 data |= HDP_LS_ENABLE;
6190         else
6191                 data &= ~HDP_LS_ENABLE;
6192
6193         if (orig != data)
6194                 WREG32(HDP_MEM_POWER_LS, data);
6195 }
6196
6197 void cik_update_cg(struct radeon_device *rdev,
6198                    u32 block, bool enable)
6199 {
6200
6201         if (block & RADEON_CG_BLOCK_GFX) {
6202                 cik_enable_gui_idle_interrupt(rdev, false);
6203                 /* order matters! */
6204                 if (enable) {
6205                         cik_enable_mgcg(rdev, true);
6206                         cik_enable_cgcg(rdev, true);
6207                 } else {
6208                         cik_enable_cgcg(rdev, false);
6209                         cik_enable_mgcg(rdev, false);
6210                 }
6211                 cik_enable_gui_idle_interrupt(rdev, true);
6212         }
6213
6214         if (block & RADEON_CG_BLOCK_MC) {
6215                 if (!(rdev->flags & RADEON_IS_IGP)) {
6216                         cik_enable_mc_mgcg(rdev, enable);
6217                         cik_enable_mc_ls(rdev, enable);
6218                 }
6219         }
6220
6221         if (block & RADEON_CG_BLOCK_SDMA) {
6222                 cik_enable_sdma_mgcg(rdev, enable);
6223                 cik_enable_sdma_mgls(rdev, enable);
6224         }
6225
6226         if (block & RADEON_CG_BLOCK_BIF) {
6227                 cik_enable_bif_mgls(rdev, enable);
6228         }
6229
6230         if (block & RADEON_CG_BLOCK_UVD) {
6231                 if (rdev->has_uvd)
6232                         cik_enable_uvd_mgcg(rdev, enable);
6233         }
6234
6235         if (block & RADEON_CG_BLOCK_HDP) {
6236                 cik_enable_hdp_mgcg(rdev, enable);
6237                 cik_enable_hdp_ls(rdev, enable);
6238         }
6239
6240         if (block & RADEON_CG_BLOCK_VCE) {
6241                 vce_v2_0_enable_mgcg(rdev, enable);
6242         }
6243 }
6244
6245 static void cik_init_cg(struct radeon_device *rdev)
6246 {
6247
6248         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6249
6250         if (rdev->has_uvd)
6251                 si_init_uvd_internal_cg(rdev);
6252
6253         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6254                              RADEON_CG_BLOCK_SDMA |
6255                              RADEON_CG_BLOCK_BIF |
6256                              RADEON_CG_BLOCK_UVD |
6257                              RADEON_CG_BLOCK_HDP), true);
6258 }
6259
6260 static void cik_fini_cg(struct radeon_device *rdev)
6261 {
6262         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6263                              RADEON_CG_BLOCK_SDMA |
6264                              RADEON_CG_BLOCK_BIF |
6265                              RADEON_CG_BLOCK_UVD |
6266                              RADEON_CG_BLOCK_HDP), false);
6267
6268         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6269 }
6270
6271 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6272                                           bool enable)
6273 {
6274         u32 data, orig;
6275
6276         orig = data = RREG32(RLC_PG_CNTL);
6277         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6278                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6279         else
6280                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6281         if (orig != data)
6282                 WREG32(RLC_PG_CNTL, data);
6283 }
6284
6285 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6286                                           bool enable)
6287 {
6288         u32 data, orig;
6289
6290         orig = data = RREG32(RLC_PG_CNTL);
6291         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6292                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6293         else
6294                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6295         if (orig != data)
6296                 WREG32(RLC_PG_CNTL, data);
6297 }
6298
6299 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6300 {
6301         u32 data, orig;
6302
6303         orig = data = RREG32(RLC_PG_CNTL);
6304         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6305                 data &= ~DISABLE_CP_PG;
6306         else
6307                 data |= DISABLE_CP_PG;
6308         if (orig != data)
6309                 WREG32(RLC_PG_CNTL, data);
6310 }
6311
6312 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6313 {
6314         u32 data, orig;
6315
6316         orig = data = RREG32(RLC_PG_CNTL);
6317         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6318                 data &= ~DISABLE_GDS_PG;
6319         else
6320                 data |= DISABLE_GDS_PG;
6321         if (orig != data)
6322                 WREG32(RLC_PG_CNTL, data);
6323 }
6324
6325 #define CP_ME_TABLE_SIZE    96
6326 #define CP_ME_TABLE_OFFSET  2048
6327 #define CP_MEC_TABLE_OFFSET 4096
6328
6329 void cik_init_cp_pg_table(struct radeon_device *rdev)
6330 {
6331         volatile u32 *dst_ptr;
6332         int me, i, max_me = 4;
6333         u32 bo_offset = 0;
6334         u32 table_offset, table_size;
6335
6336         if (rdev->family == CHIP_KAVERI)
6337                 max_me = 5;
6338
6339         if (rdev->rlc.cp_table_ptr == NULL)
6340                 return;
6341
6342         /* write the cp table buffer */
6343         dst_ptr = rdev->rlc.cp_table_ptr;
6344         for (me = 0; me < max_me; me++) {
6345                 if (rdev->new_fw) {
6346                         const __le32 *fw_data;
6347                         const struct gfx_firmware_header_v1_0 *hdr;
6348
6349                         if (me == 0) {
6350                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6351                                 fw_data = (const __le32 *)
6352                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6353                                 table_offset = le32_to_cpu(hdr->jt_offset);
6354                                 table_size = le32_to_cpu(hdr->jt_size);
6355                         } else if (me == 1) {
6356                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6357                                 fw_data = (const __le32 *)
6358                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6359                                 table_offset = le32_to_cpu(hdr->jt_offset);
6360                                 table_size = le32_to_cpu(hdr->jt_size);
6361                         } else if (me == 2) {
6362                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6363                                 fw_data = (const __le32 *)
6364                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6365                                 table_offset = le32_to_cpu(hdr->jt_offset);
6366                                 table_size = le32_to_cpu(hdr->jt_size);
6367                         } else if (me == 3) {
6368                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6369                                 fw_data = (const __le32 *)
6370                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6371                                 table_offset = le32_to_cpu(hdr->jt_offset);
6372                                 table_size = le32_to_cpu(hdr->jt_size);
6373                         } else {
6374                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6375                                 fw_data = (const __le32 *)
6376                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6377                                 table_offset = le32_to_cpu(hdr->jt_offset);
6378                                 table_size = le32_to_cpu(hdr->jt_size);
6379                         }
6380
6381                         for (i = 0; i < table_size; i ++) {
6382                                 dst_ptr[bo_offset + i] =
6383                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6384                         }
6385                         bo_offset += table_size;
6386                 } else {
6387                         const __be32 *fw_data;
6388                         table_size = CP_ME_TABLE_SIZE;
6389
6390                         if (me == 0) {
6391                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6392                                 table_offset = CP_ME_TABLE_OFFSET;
6393                         } else if (me == 1) {
6394                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6395                                 table_offset = CP_ME_TABLE_OFFSET;
6396                         } else if (me == 2) {
6397                                 fw_data = (const __be32 *)rdev->me_fw->data;
6398                                 table_offset = CP_ME_TABLE_OFFSET;
6399                         } else {
6400                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6401                                 table_offset = CP_MEC_TABLE_OFFSET;
6402                         }
6403
6404                         for (i = 0; i < table_size; i ++) {
6405                                 dst_ptr[bo_offset + i] =
6406                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6407                         }
6408                         bo_offset += table_size;
6409                 }
6410         }
6411 }
6412
6413 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6414                                 bool enable)
6415 {
6416         u32 data, orig;
6417
6418         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6419                 orig = data = RREG32(RLC_PG_CNTL);
6420                 data |= GFX_PG_ENABLE;
6421                 if (orig != data)
6422                         WREG32(RLC_PG_CNTL, data);
6423
6424                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6425                 data |= AUTO_PG_EN;
6426                 if (orig != data)
6427                         WREG32(RLC_AUTO_PG_CTRL, data);
6428         } else {
6429                 orig = data = RREG32(RLC_PG_CNTL);
6430                 data &= ~GFX_PG_ENABLE;
6431                 if (orig != data)
6432                         WREG32(RLC_PG_CNTL, data);
6433
6434                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6435                 data &= ~AUTO_PG_EN;
6436                 if (orig != data)
6437                         WREG32(RLC_AUTO_PG_CTRL, data);
6438
6439                 data = RREG32(DB_RENDER_CONTROL);
6440         }
6441 }
6442
6443 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6444 {
6445         u32 mask = 0, tmp, tmp1;
6446         int i;
6447
6448         cik_select_se_sh(rdev, se, sh);
6449         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6450         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6451         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6452
6453         tmp &= 0xffff0000;
6454
6455         tmp |= tmp1;
6456         tmp >>= 16;
6457
6458         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6459                 mask <<= 1;
6460                 mask |= 1;
6461         }
6462
6463         return (~tmp) & mask;
6464 }
6465
6466 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6467 {
6468         u32 i, j, k, active_cu_number = 0;
6469         u32 mask, counter, cu_bitmap;
6470         u32 tmp = 0;
6471
6472         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6473                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6474                         mask = 1;
6475                         cu_bitmap = 0;
6476                         counter = 0;
6477                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6478                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6479                                         if (counter < 2)
6480                                                 cu_bitmap |= mask;
6481                                         counter ++;
6482                                 }
6483                                 mask <<= 1;
6484                         }
6485
6486                         active_cu_number += counter;
6487                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6488                 }
6489         }
6490
6491         WREG32(RLC_PG_AO_CU_MASK, tmp);
6492
6493         tmp = RREG32(RLC_MAX_PG_CU);
6494         tmp &= ~MAX_PU_CU_MASK;
6495         tmp |= MAX_PU_CU(active_cu_number);
6496         WREG32(RLC_MAX_PG_CU, tmp);
6497 }
6498
6499 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6500                                        bool enable)
6501 {
6502         u32 data, orig;
6503
6504         orig = data = RREG32(RLC_PG_CNTL);
6505         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6506                 data |= STATIC_PER_CU_PG_ENABLE;
6507         else
6508                 data &= ~STATIC_PER_CU_PG_ENABLE;
6509         if (orig != data)
6510                 WREG32(RLC_PG_CNTL, data);
6511 }
6512
6513 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6514                                         bool enable)
6515 {
6516         u32 data, orig;
6517
6518         orig = data = RREG32(RLC_PG_CNTL);
6519         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6520                 data |= DYN_PER_CU_PG_ENABLE;
6521         else
6522                 data &= ~DYN_PER_CU_PG_ENABLE;
6523         if (orig != data)
6524                 WREG32(RLC_PG_CNTL, data);
6525 }
6526
6527 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6528 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6529
6530 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6531 {
6532         u32 data, orig;
6533         u32 i;
6534
6535         if (rdev->rlc.cs_data) {
6536                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6537                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6538                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6539                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6540         } else {
6541                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6542                 for (i = 0; i < 3; i++)
6543                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6544         }
6545         if (rdev->rlc.reg_list) {
6546                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6547                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6548                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6549         }
6550
6551         orig = data = RREG32(RLC_PG_CNTL);
6552         data |= GFX_PG_SRC;
6553         if (orig != data)
6554                 WREG32(RLC_PG_CNTL, data);
6555
6556         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6557         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6558
6559         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6560         data &= ~IDLE_POLL_COUNT_MASK;
6561         data |= IDLE_POLL_COUNT(0x60);
6562         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6563
6564         data = 0x10101010;
6565         WREG32(RLC_PG_DELAY, data);
6566
6567         data = RREG32(RLC_PG_DELAY_2);
6568         data &= ~0xff;
6569         data |= 0x3;
6570         WREG32(RLC_PG_DELAY_2, data);
6571
6572         data = RREG32(RLC_AUTO_PG_CTRL);
6573         data &= ~GRBM_REG_SGIT_MASK;
6574         data |= GRBM_REG_SGIT(0x700);
6575         WREG32(RLC_AUTO_PG_CTRL, data);
6576
6577 }
6578
6579 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6580 {
6581         cik_enable_gfx_cgpg(rdev, enable);
6582         cik_enable_gfx_static_mgpg(rdev, enable);
6583         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6584 }
6585
6586 u32 cik_get_csb_size(struct radeon_device *rdev)
6587 {
6588         u32 count = 0;
6589         const struct cs_section_def *sect = NULL;
6590         const struct cs_extent_def *ext = NULL;
6591
6592         if (rdev->rlc.cs_data == NULL)
6593                 return 0;
6594
6595         /* begin clear state */
6596         count += 2;
6597         /* context control state */
6598         count += 3;
6599
6600         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6601                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6602                         if (sect->id == SECT_CONTEXT)
6603                                 count += 2 + ext->reg_count;
6604                         else
6605                                 return 0;
6606                 }
6607         }
6608         /* pa_sc_raster_config/pa_sc_raster_config1 */
6609         count += 4;
6610         /* end clear state */
6611         count += 2;
6612         /* clear state */
6613         count += 2;
6614
6615         return count;
6616 }
6617
6618 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6619 {
6620         u32 count = 0, i;
6621         const struct cs_section_def *sect = NULL;
6622         const struct cs_extent_def *ext = NULL;
6623
6624         if (rdev->rlc.cs_data == NULL)
6625                 return;
6626         if (buffer == NULL)
6627                 return;
6628
6629         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6630         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6631
6632         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6633         buffer[count++] = cpu_to_le32(0x80000000);
6634         buffer[count++] = cpu_to_le32(0x80000000);
6635
6636         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6637                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6638                         if (sect->id == SECT_CONTEXT) {
6639                                 buffer[count++] =
6640                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6641                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6642                                 for (i = 0; i < ext->reg_count; i++)
6643                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6644                         } else {
6645                                 return;
6646                         }
6647                 }
6648         }
6649
6650         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6651         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6652         switch (rdev->family) {
6653         case CHIP_BONAIRE:
6654                 buffer[count++] = cpu_to_le32(0x16000012);
6655                 buffer[count++] = cpu_to_le32(0x00000000);
6656                 break;
6657         case CHIP_KAVERI:
6658                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6659                 buffer[count++] = cpu_to_le32(0x00000000);
6660                 break;
6661         case CHIP_KABINI:
6662         case CHIP_MULLINS:
6663                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6664                 buffer[count++] = cpu_to_le32(0x00000000);
6665                 break;
6666         case CHIP_HAWAII:
6667                 buffer[count++] = cpu_to_le32(0x3a00161a);
6668                 buffer[count++] = cpu_to_le32(0x0000002e);
6669                 break;
6670         default:
6671                 buffer[count++] = cpu_to_le32(0x00000000);
6672                 buffer[count++] = cpu_to_le32(0x00000000);
6673                 break;
6674         }
6675
6676         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6677         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6678
6679         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6680         buffer[count++] = cpu_to_le32(0);
6681 }
6682
6683 static void cik_init_pg(struct radeon_device *rdev)
6684 {
6685         if (rdev->pg_flags) {
6686                 cik_enable_sck_slowdown_on_pu(rdev, true);
6687                 cik_enable_sck_slowdown_on_pd(rdev, true);
6688                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6689                         cik_init_gfx_cgpg(rdev);
6690                         cik_enable_cp_pg(rdev, true);
6691                         cik_enable_gds_pg(rdev, true);
6692                 }
6693                 cik_init_ao_cu_mask(rdev);
6694                 cik_update_gfx_pg(rdev, true);
6695         }
6696 }
6697
6698 static void cik_fini_pg(struct radeon_device *rdev)
6699 {
6700         if (rdev->pg_flags) {
6701                 cik_update_gfx_pg(rdev, false);
6702                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6703                         cik_enable_cp_pg(rdev, false);
6704                         cik_enable_gds_pg(rdev, false);
6705                 }
6706         }
6707 }
6708
6709 /*
6710  * Interrupts
6711  * Starting with r6xx, interrupts are handled via a ring buffer.
6712  * Ring buffers are areas of GPU accessible memory that the GPU
6713  * writes interrupt vectors into and the host reads vectors out of.
6714  * There is a rptr (read pointer) that determines where the
6715  * host is currently reading, and a wptr (write pointer)
6716  * which determines where the GPU has written.  When the
6717  * pointers are equal, the ring is idle.  When the GPU
6718  * writes vectors to the ring buffer, it increments the
6719  * wptr.  When there is an interrupt, the host then starts
6720  * fetching commands and processing them until the pointers are
6721  * equal again at which point it updates the rptr.
6722  */
6723
6724 /**
6725  * cik_enable_interrupts - Enable the interrupt ring buffer
6726  *
6727  * @rdev: radeon_device pointer
6728  *
6729  * Enable the interrupt ring buffer (CIK).
6730  */
6731 static void cik_enable_interrupts(struct radeon_device *rdev)
6732 {
6733         u32 ih_cntl = RREG32(IH_CNTL);
6734         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6735
6736         ih_cntl |= ENABLE_INTR;
6737         ih_rb_cntl |= IH_RB_ENABLE;
6738         WREG32(IH_CNTL, ih_cntl);
6739         WREG32(IH_RB_CNTL, ih_rb_cntl);
6740         rdev->ih.enabled = true;
6741 }
6742
6743 /**
6744  * cik_disable_interrupts - Disable the interrupt ring buffer
6745  *
6746  * @rdev: radeon_device pointer
6747  *
6748  * Disable the interrupt ring buffer (CIK).
6749  */
6750 static void cik_disable_interrupts(struct radeon_device *rdev)
6751 {
6752         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6753         u32 ih_cntl = RREG32(IH_CNTL);
6754
6755         ih_rb_cntl &= ~IH_RB_ENABLE;
6756         ih_cntl &= ~ENABLE_INTR;
6757         WREG32(IH_RB_CNTL, ih_rb_cntl);
6758         WREG32(IH_CNTL, ih_cntl);
6759         /* set rptr, wptr to 0 */
6760         WREG32(IH_RB_RPTR, 0);
6761         WREG32(IH_RB_WPTR, 0);
6762         rdev->ih.enabled = false;
6763         rdev->ih.rptr = 0;
6764 }
6765
6766 /**
6767  * cik_disable_interrupt_state - Disable all interrupt sources
6768  *
6769  * @rdev: radeon_device pointer
6770  *
6771  * Clear all interrupt enable bits used by the driver (CIK).
6772  */
6773 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6774 {
6775         u32 tmp;
6776
6777         /* gfx ring */
6778         tmp = RREG32(CP_INT_CNTL_RING0) &
6779                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6780         WREG32(CP_INT_CNTL_RING0, tmp);
6781         /* sdma */
6782         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6783         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6784         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6785         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6786         /* compute queues */
6787         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6788         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6789         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6790         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6791         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6792         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6793         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6794         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6795         /* grbm */
6796         WREG32(GRBM_INT_CNTL, 0);
6797         /* SRBM */
6798         WREG32(SRBM_INT_CNTL, 0);
6799         /* vline/vblank, etc. */
6800         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6801         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6802         if (rdev->num_crtc >= 4) {
6803                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6804                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6805         }
6806         if (rdev->num_crtc >= 6) {
6807                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6808                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6809         }
6810         /* pflip */
6811         if (rdev->num_crtc >= 2) {
6812                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6813                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6814         }
6815         if (rdev->num_crtc >= 4) {
6816                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6817                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6818         }
6819         if (rdev->num_crtc >= 6) {
6820                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6821                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6822         }
6823
6824         /* dac hotplug */
6825         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6826
6827         /* digital hotplug */
6828         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6829         WREG32(DC_HPD1_INT_CONTROL, tmp);
6830         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6831         WREG32(DC_HPD2_INT_CONTROL, tmp);
6832         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6833         WREG32(DC_HPD3_INT_CONTROL, tmp);
6834         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6835         WREG32(DC_HPD4_INT_CONTROL, tmp);
6836         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6837         WREG32(DC_HPD5_INT_CONTROL, tmp);
6838         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6839         WREG32(DC_HPD6_INT_CONTROL, tmp);
6840
6841 }
6842
6843 /**
6844  * cik_irq_init - init and enable the interrupt ring
6845  *
6846  * @rdev: radeon_device pointer
6847  *
6848  * Allocate a ring buffer for the interrupt controller,
6849  * enable the RLC, disable interrupts, enable the IH
6850  * ring buffer and enable it (CIK).
6851  * Called at device load and reume.
6852  * Returns 0 for success, errors for failure.
6853  */
6854 static int cik_irq_init(struct radeon_device *rdev)
6855 {
6856         int ret = 0;
6857         int rb_bufsz;
6858         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6859
6860         /* allocate ring */
6861         ret = r600_ih_ring_alloc(rdev);
6862         if (ret)
6863                 return ret;
6864
6865         /* disable irqs */
6866         cik_disable_interrupts(rdev);
6867
6868         /* init rlc */
6869         ret = cik_rlc_resume(rdev);
6870         if (ret) {
6871                 r600_ih_ring_fini(rdev);
6872                 return ret;
6873         }
6874
6875         /* setup interrupt control */
6876         /* set dummy read address to dummy page address */
6877         WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6878         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6879         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6880          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6881          */
6882         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6883         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6884         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6885         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6886
6887         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6888         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6889
6890         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6891                       IH_WPTR_OVERFLOW_CLEAR |
6892                       (rb_bufsz << 1));
6893
6894         if (rdev->wb.enabled)
6895                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6896
6897         /* set the writeback address whether it's enabled or not */
6898         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6899         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6900
6901         WREG32(IH_RB_CNTL, ih_rb_cntl);
6902
6903         /* set rptr, wptr to 0 */
6904         WREG32(IH_RB_RPTR, 0);
6905         WREG32(IH_RB_WPTR, 0);
6906
6907         /* Default settings for IH_CNTL (disabled at first) */
6908         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6909         /* RPTR_REARM only works if msi's are enabled */
6910         if (rdev->msi_enabled)
6911                 ih_cntl |= RPTR_REARM;
6912         WREG32(IH_CNTL, ih_cntl);
6913
6914         /* force the active interrupt state to all disabled */
6915         cik_disable_interrupt_state(rdev);
6916
6917         pci_set_master(rdev->pdev);
6918
6919         /* enable irqs */
6920         cik_enable_interrupts(rdev);
6921
6922         return ret;
6923 }
6924
6925 /**
6926  * cik_irq_set - enable/disable interrupt sources
6927  *
6928  * @rdev: radeon_device pointer
6929  *
6930  * Enable interrupt sources on the GPU (vblanks, hpd,
6931  * etc.) (CIK).
6932  * Returns 0 for success, errors for failure.
6933  */
6934 int cik_irq_set(struct radeon_device *rdev)
6935 {
6936         u32 cp_int_cntl;
6937         u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6938         u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6939         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6940         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6941         u32 grbm_int_cntl = 0;
6942         u32 dma_cntl, dma_cntl1;
6943
6944         if (!rdev->irq.installed) {
6945                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6946                 return -EINVAL;
6947         }
6948         /* don't enable anything if the ih is disabled */
6949         if (!rdev->ih.enabled) {
6950                 cik_disable_interrupts(rdev);
6951                 /* force the active interrupt state to all disabled */
6952                 cik_disable_interrupt_state(rdev);
6953                 return 0;
6954         }
6955
6956         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6957                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6958         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6959
6960         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6961         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6962         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6963         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6964         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6965         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6966
6967         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6968         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6969
6970         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6971         cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6972         cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6973         cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6974         cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6975         cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6976         cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6977         cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6978
6979         /* enable CP interrupts on all rings */
6980         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6981                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6982                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6983         }
6984         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6985                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6986                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6987                 if (ring->me == 1) {
6988                         switch (ring->pipe) {
6989                         case 0:
6990                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6991                                 break;
6992                         case 1:
6993                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6994                                 break;
6995                         case 2:
6996                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6997                                 break;
6998                         case 3:
6999                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7000                                 break;
7001                         default:
7002                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7003                                 break;
7004                         }
7005                 } else if (ring->me == 2) {
7006                         switch (ring->pipe) {
7007                         case 0:
7008                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7009                                 break;
7010                         case 1:
7011                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7012                                 break;
7013                         case 2:
7014                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7015                                 break;
7016                         case 3:
7017                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7018                                 break;
7019                         default:
7020                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7021                                 break;
7022                         }
7023                 } else {
7024                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7025                 }
7026         }
7027         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7028                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7029                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7030                 if (ring->me == 1) {
7031                         switch (ring->pipe) {
7032                         case 0:
7033                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7034                                 break;
7035                         case 1:
7036                                 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7037                                 break;
7038                         case 2:
7039                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7040                                 break;
7041                         case 3:
7042                                 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7043                                 break;
7044                         default:
7045                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7046                                 break;
7047                         }
7048                 } else if (ring->me == 2) {
7049                         switch (ring->pipe) {
7050                         case 0:
7051                                 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7052                                 break;
7053                         case 1:
7054                                 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7055                                 break;
7056                         case 2:
7057                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7058                                 break;
7059                         case 3:
7060                                 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7061                                 break;
7062                         default:
7063                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7064                                 break;
7065                         }
7066                 } else {
7067                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7068                 }
7069         }
7070
7071         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7072                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7073                 dma_cntl |= TRAP_ENABLE;
7074         }
7075
7076         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7077                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7078                 dma_cntl1 |= TRAP_ENABLE;
7079         }
7080
7081         if (rdev->irq.crtc_vblank_int[0] ||
7082             atomic_read(&rdev->irq.pflip[0])) {
7083                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7084                 crtc1 |= VBLANK_INTERRUPT_MASK;
7085         }
7086         if (rdev->irq.crtc_vblank_int[1] ||
7087             atomic_read(&rdev->irq.pflip[1])) {
7088                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7089                 crtc2 |= VBLANK_INTERRUPT_MASK;
7090         }
7091         if (rdev->irq.crtc_vblank_int[2] ||
7092             atomic_read(&rdev->irq.pflip[2])) {
7093                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7094                 crtc3 |= VBLANK_INTERRUPT_MASK;
7095         }
7096         if (rdev->irq.crtc_vblank_int[3] ||
7097             atomic_read(&rdev->irq.pflip[3])) {
7098                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7099                 crtc4 |= VBLANK_INTERRUPT_MASK;
7100         }
7101         if (rdev->irq.crtc_vblank_int[4] ||
7102             atomic_read(&rdev->irq.pflip[4])) {
7103                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7104                 crtc5 |= VBLANK_INTERRUPT_MASK;
7105         }
7106         if (rdev->irq.crtc_vblank_int[5] ||
7107             atomic_read(&rdev->irq.pflip[5])) {
7108                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7109                 crtc6 |= VBLANK_INTERRUPT_MASK;
7110         }
7111         if (rdev->irq.hpd[0]) {
7112                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7113                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7114         }
7115         if (rdev->irq.hpd[1]) {
7116                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7117                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7118         }
7119         if (rdev->irq.hpd[2]) {
7120                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7121                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7122         }
7123         if (rdev->irq.hpd[3]) {
7124                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7125                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7126         }
7127         if (rdev->irq.hpd[4]) {
7128                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7129                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7130         }
7131         if (rdev->irq.hpd[5]) {
7132                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7133                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7134         }
7135
7136         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7137
7138         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7139         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7140
7141         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7142         WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7143         WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7144         WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7145         WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7146         WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7147         WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7148         WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7149
7150         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7151
7152         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7153         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7154         if (rdev->num_crtc >= 4) {
7155                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7156                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7157         }
7158         if (rdev->num_crtc >= 6) {
7159                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7160                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7161         }
7162
7163         if (rdev->num_crtc >= 2) {
7164                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7165                        GRPH_PFLIP_INT_MASK);
7166                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7167                        GRPH_PFLIP_INT_MASK);
7168         }
7169         if (rdev->num_crtc >= 4) {
7170                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7171                        GRPH_PFLIP_INT_MASK);
7172                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7173                        GRPH_PFLIP_INT_MASK);
7174         }
7175         if (rdev->num_crtc >= 6) {
7176                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7177                        GRPH_PFLIP_INT_MASK);
7178                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7179                        GRPH_PFLIP_INT_MASK);
7180         }
7181
7182         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7183         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7184         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7185         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7186         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7187         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7188
7189         /* posting read */
7190         RREG32(SRBM_STATUS);
7191
7192         return 0;
7193 }
7194
7195 /**
7196  * cik_irq_ack - ack interrupt sources
7197  *
7198  * @rdev: radeon_device pointer
7199  *
7200  * Ack interrupt sources on the GPU (vblanks, hpd,
7201  * etc.) (CIK).  Certain interrupts sources are sw
7202  * generated and do not require an explicit ack.
7203  */
7204 static inline void cik_irq_ack(struct radeon_device *rdev)
7205 {
7206         u32 tmp;
7207
7208         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7209         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7210         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7211         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7212         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7213         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7214         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7215
7216         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7217                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7218         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7219                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7220         if (rdev->num_crtc >= 4) {
7221                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7222                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7223                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7224                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7225         }
7226         if (rdev->num_crtc >= 6) {
7227                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7228                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7229                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7230                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7231         }
7232
7233         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7234                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7235                        GRPH_PFLIP_INT_CLEAR);
7236         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7237                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7238                        GRPH_PFLIP_INT_CLEAR);
7239         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7240                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7241         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7242                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7243         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7244                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7245         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7246                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7247
7248         if (rdev->num_crtc >= 4) {
7249                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7250                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7251                                GRPH_PFLIP_INT_CLEAR);
7252                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7253                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7254                                GRPH_PFLIP_INT_CLEAR);
7255                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7256                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7257                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7258                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7259                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7260                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7261                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7262                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7263         }
7264
7265         if (rdev->num_crtc >= 6) {
7266                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7267                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7268                                GRPH_PFLIP_INT_CLEAR);
7269                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7270                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7271                                GRPH_PFLIP_INT_CLEAR);
7272                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7273                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7274                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7275                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7276                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7277                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7278                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7279                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7280         }
7281
7282         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7283                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7284                 tmp |= DC_HPDx_INT_ACK;
7285                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7286         }
7287         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7288                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7289                 tmp |= DC_HPDx_INT_ACK;
7290                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7291         }
7292         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7293                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7294                 tmp |= DC_HPDx_INT_ACK;
7295                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7296         }
7297         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7298                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7299                 tmp |= DC_HPDx_INT_ACK;
7300                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7301         }
7302         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7303                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7304                 tmp |= DC_HPDx_INT_ACK;
7305                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7306         }
7307         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7308                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7309                 tmp |= DC_HPDx_INT_ACK;
7310                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7311         }
7312         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7313                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7314                 tmp |= DC_HPDx_RX_INT_ACK;
7315                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7316         }
7317         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7318                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7319                 tmp |= DC_HPDx_RX_INT_ACK;
7320                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7321         }
7322         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7323                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7324                 tmp |= DC_HPDx_RX_INT_ACK;
7325                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7326         }
7327         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7328                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7329                 tmp |= DC_HPDx_RX_INT_ACK;
7330                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7331         }
7332         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7333                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7334                 tmp |= DC_HPDx_RX_INT_ACK;
7335                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7336         }
7337         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7338                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7339                 tmp |= DC_HPDx_RX_INT_ACK;
7340                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7341         }
7342 }
7343
7344 /**
7345  * cik_irq_disable - disable interrupts
7346  *
7347  * @rdev: radeon_device pointer
7348  *
7349  * Disable interrupts on the hw (CIK).
7350  */
7351 static void cik_irq_disable(struct radeon_device *rdev)
7352 {
7353         cik_disable_interrupts(rdev);
7354         /* Wait and acknowledge irq */
7355         mdelay(1);
7356         cik_irq_ack(rdev);
7357         cik_disable_interrupt_state(rdev);
7358 }
7359
7360 /**
7361  * cik_irq_suspend - disable interrupts for suspend
7362  *
7363  * @rdev: radeon_device pointer
7364  *
7365  * Disable interrupts and stop the RLC (CIK).
7366  * Used for suspend.
7367  */
7368 static void cik_irq_suspend(struct radeon_device *rdev)
7369 {
7370         cik_irq_disable(rdev);
7371         cik_rlc_stop(rdev);
7372 }
7373
7374 /**
7375  * cik_irq_fini - tear down interrupt support
7376  *
7377  * @rdev: radeon_device pointer
7378  *
7379  * Disable interrupts on the hw and free the IH ring
7380  * buffer (CIK).
7381  * Used for driver unload.
7382  */
7383 static void cik_irq_fini(struct radeon_device *rdev)
7384 {
7385         cik_irq_suspend(rdev);
7386         r600_ih_ring_fini(rdev);
7387 }
7388
7389 /**
7390  * cik_get_ih_wptr - get the IH ring buffer wptr
7391  *
7392  * @rdev: radeon_device pointer
7393  *
7394  * Get the IH ring buffer wptr from either the register
7395  * or the writeback memory buffer (CIK).  Also check for
7396  * ring buffer overflow and deal with it.
7397  * Used by cik_irq_process().
7398  * Returns the value of the wptr.
7399  */
7400 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7401 {
7402         u32 wptr, tmp;
7403
7404         if (rdev->wb.enabled)
7405                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7406         else
7407                 wptr = RREG32(IH_RB_WPTR);
7408
7409         if (wptr & RB_OVERFLOW) {
7410                 wptr &= ~RB_OVERFLOW;
7411                 /* When a ring buffer overflow happen start parsing interrupt
7412                  * from the last not overwritten vector (wptr + 16). Hopefully
7413                  * this should allow us to catchup.
7414                  */
7415                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7416                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7417                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7418                 tmp = RREG32(IH_RB_CNTL);
7419                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7420                 WREG32(IH_RB_CNTL, tmp);
7421         }
7422         return (wptr & rdev->ih.ptr_mask);
7423 }
7424
7425 /*        CIK IV Ring
7426  * Each IV ring entry is 128 bits:
7427  * [7:0]    - interrupt source id
7428  * [31:8]   - reserved
7429  * [59:32]  - interrupt source data
7430  * [63:60]  - reserved
7431  * [71:64]  - RINGID
7432  *            CP:
7433  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7434  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7435  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7436  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7437  *            PIPE_ID - ME0 0=3D
7438  *                    - ME1&2 compute dispatcher (4 pipes each)
7439  *            SDMA:
7440  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7441  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7442  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7443  * [79:72]  - VMID
7444  * [95:80]  - PASID
7445  * [127:96] - reserved
7446  */
7447 /**
7448  * cik_irq_process - interrupt handler
7449  *
7450  * @rdev: radeon_device pointer
7451  *
7452  * Interrupt hander (CIK).  Walk the IH ring,
7453  * ack interrupts and schedule work to handle
7454  * interrupt events.
7455  * Returns irq process return code.
7456  */
7457 int cik_irq_process(struct radeon_device *rdev)
7458 {
7459         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7460         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7461         u32 wptr;
7462         u32 rptr;
7463         u32 src_id, src_data, ring_id;
7464         u8 me_id, pipe_id, queue_id;
7465         u32 ring_index;
7466         bool queue_hotplug = false;
7467         bool queue_dp = false;
7468         bool queue_reset = false;
7469         u32 addr, status, mc_client;
7470         bool queue_thermal = false;
7471
7472         if (!rdev->ih.enabled || rdev->shutdown)
7473                 return IRQ_NONE;
7474
7475         wptr = cik_get_ih_wptr(rdev);
7476
7477 restart_ih:
7478         /* is somebody else already processing irqs? */
7479         if (atomic_xchg(&rdev->ih.lock, 1))
7480                 return IRQ_NONE;
7481
7482         rptr = rdev->ih.rptr;
7483         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7484
7485         /* Order reading of wptr vs. reading of IH ring data */
7486         rmb();
7487
7488         /* display interrupts */
7489         cik_irq_ack(rdev);
7490
7491         while (rptr != wptr) {
7492                 /* wptr/rptr are in bytes! */
7493                 ring_index = rptr / 4;
7494
7495                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7496                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7497                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7498
7499                 switch (src_id) {
7500                 case 1: /* D1 vblank/vline */
7501                         switch (src_data) {
7502                         case 0: /* D1 vblank */
7503                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7504                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7505
7506                                 if (rdev->irq.crtc_vblank_int[0]) {
7507                                         drm_handle_vblank(rdev->ddev, 0);
7508                                         rdev->pm.vblank_sync = true;
7509                                         wake_up(&rdev->irq.vblank_queue);
7510                                 }
7511                                 if (atomic_read(&rdev->irq.pflip[0]))
7512                                         radeon_crtc_handle_vblank(rdev, 0);
7513                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7514                                 DRM_DEBUG("IH: D1 vblank\n");
7515
7516                                 break;
7517                         case 1: /* D1 vline */
7518                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7519                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7520
7521                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7522                                 DRM_DEBUG("IH: D1 vline\n");
7523
7524                                 break;
7525                         default:
7526                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7527                                 break;
7528                         }
7529                         break;
7530                 case 2: /* D2 vblank/vline */
7531                         switch (src_data) {
7532                         case 0: /* D2 vblank */
7533                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7534                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7535
7536                                 if (rdev->irq.crtc_vblank_int[1]) {
7537                                         drm_handle_vblank(rdev->ddev, 1);
7538                                         rdev->pm.vblank_sync = true;
7539                                         wake_up(&rdev->irq.vblank_queue);
7540                                 }
7541                                 if (atomic_read(&rdev->irq.pflip[1]))
7542                                         radeon_crtc_handle_vblank(rdev, 1);
7543                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7544                                 DRM_DEBUG("IH: D2 vblank\n");
7545
7546                                 break;
7547                         case 1: /* D2 vline */
7548                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7549                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7550
7551                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7552                                 DRM_DEBUG("IH: D2 vline\n");
7553
7554                                 break;
7555                         default:
7556                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7557                                 break;
7558                         }
7559                         break;
7560                 case 3: /* D3 vblank/vline */
7561                         switch (src_data) {
7562                         case 0: /* D3 vblank */
7563                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7564                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7565
7566                                 if (rdev->irq.crtc_vblank_int[2]) {
7567                                         drm_handle_vblank(rdev->ddev, 2);
7568                                         rdev->pm.vblank_sync = true;
7569                                         wake_up(&rdev->irq.vblank_queue);
7570                                 }
7571                                 if (atomic_read(&rdev->irq.pflip[2]))
7572                                         radeon_crtc_handle_vblank(rdev, 2);
7573                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7574                                 DRM_DEBUG("IH: D3 vblank\n");
7575
7576                                 break;
7577                         case 1: /* D3 vline */
7578                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7579                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7580
7581                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7582                                 DRM_DEBUG("IH: D3 vline\n");
7583
7584                                 break;
7585                         default:
7586                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7587                                 break;
7588                         }
7589                         break;
7590                 case 4: /* D4 vblank/vline */
7591                         switch (src_data) {
7592                         case 0: /* D4 vblank */
7593                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7594                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7595
7596                                 if (rdev->irq.crtc_vblank_int[3]) {
7597                                         drm_handle_vblank(rdev->ddev, 3);
7598                                         rdev->pm.vblank_sync = true;
7599                                         wake_up(&rdev->irq.vblank_queue);
7600                                 }
7601                                 if (atomic_read(&rdev->irq.pflip[3]))
7602                                         radeon_crtc_handle_vblank(rdev, 3);
7603                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7604                                 DRM_DEBUG("IH: D4 vblank\n");
7605
7606                                 break;
7607                         case 1: /* D4 vline */
7608                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7609                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7610
7611                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7612                                 DRM_DEBUG("IH: D4 vline\n");
7613
7614                                 break;
7615                         default:
7616                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7617                                 break;
7618                         }
7619                         break;
7620                 case 5: /* D5 vblank/vline */
7621                         switch (src_data) {
7622                         case 0: /* D5 vblank */
7623                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7624                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7625
7626                                 if (rdev->irq.crtc_vblank_int[4]) {
7627                                         drm_handle_vblank(rdev->ddev, 4);
7628                                         rdev->pm.vblank_sync = true;
7629                                         wake_up(&rdev->irq.vblank_queue);
7630                                 }
7631                                 if (atomic_read(&rdev->irq.pflip[4]))
7632                                         radeon_crtc_handle_vblank(rdev, 4);
7633                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7634                                 DRM_DEBUG("IH: D5 vblank\n");
7635
7636                                 break;
7637                         case 1: /* D5 vline */
7638                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7639                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7640
7641                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7642                                 DRM_DEBUG("IH: D5 vline\n");
7643
7644                                 break;
7645                         default:
7646                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7647                                 break;
7648                         }
7649                         break;
7650                 case 6: /* D6 vblank/vline */
7651                         switch (src_data) {
7652                         case 0: /* D6 vblank */
7653                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7654                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7655
7656                                 if (rdev->irq.crtc_vblank_int[5]) {
7657                                         drm_handle_vblank(rdev->ddev, 5);
7658                                         rdev->pm.vblank_sync = true;
7659                                         wake_up(&rdev->irq.vblank_queue);
7660                                 }
7661                                 if (atomic_read(&rdev->irq.pflip[5]))
7662                                         radeon_crtc_handle_vblank(rdev, 5);
7663                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7664                                 DRM_DEBUG("IH: D6 vblank\n");
7665
7666                                 break;
7667                         case 1: /* D6 vline */
7668                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7669                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7670
7671                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7672                                 DRM_DEBUG("IH: D6 vline\n");
7673
7674                                 break;
7675                         default:
7676                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7677                                 break;
7678                         }
7679                         break;
7680                 case 8: /* D1 page flip */
7681                 case 10: /* D2 page flip */
7682                 case 12: /* D3 page flip */
7683                 case 14: /* D4 page flip */
7684                 case 16: /* D5 page flip */
7685                 case 18: /* D6 page flip */
7686                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7687                         if (radeon_use_pflipirq > 0)
7688                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7689                         break;
7690                 case 42: /* HPD hotplug */
7691                         switch (src_data) {
7692                         case 0:
7693                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7694                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7695
7696                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7697                                 queue_hotplug = true;
7698                                 DRM_DEBUG("IH: HPD1\n");
7699
7700                                 break;
7701                         case 1:
7702                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7703                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7704
7705                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7706                                 queue_hotplug = true;
7707                                 DRM_DEBUG("IH: HPD2\n");
7708
7709                                 break;
7710                         case 2:
7711                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7712                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7713
7714                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7715                                 queue_hotplug = true;
7716                                 DRM_DEBUG("IH: HPD3\n");
7717
7718                                 break;
7719                         case 3:
7720                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7721                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7722
7723                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7724                                 queue_hotplug = true;
7725                                 DRM_DEBUG("IH: HPD4\n");
7726
7727                                 break;
7728                         case 4:
7729                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7730                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7731
7732                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7733                                 queue_hotplug = true;
7734                                 DRM_DEBUG("IH: HPD5\n");
7735
7736                                 break;
7737                         case 5:
7738                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7739                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7740
7741                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7742                                 queue_hotplug = true;
7743                                 DRM_DEBUG("IH: HPD6\n");
7744
7745                                 break;
7746                         case 6:
7747                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7748                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7749
7750                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7751                                 queue_dp = true;
7752                                 DRM_DEBUG("IH: HPD_RX 1\n");
7753
7754                                 break;
7755                         case 7:
7756                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7757                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7758
7759                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7760                                 queue_dp = true;
7761                                 DRM_DEBUG("IH: HPD_RX 2\n");
7762
7763                                 break;
7764                         case 8:
7765                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7766                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7767
7768                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7769                                 queue_dp = true;
7770                                 DRM_DEBUG("IH: HPD_RX 3\n");
7771
7772                                 break;
7773                         case 9:
7774                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7775                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7776
7777                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7778                                 queue_dp = true;
7779                                 DRM_DEBUG("IH: HPD_RX 4\n");
7780
7781                                 break;
7782                         case 10:
7783                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7784                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7785
7786                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7787                                 queue_dp = true;
7788                                 DRM_DEBUG("IH: HPD_RX 5\n");
7789
7790                                 break;
7791                         case 11:
7792                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7793                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7794
7795                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7796                                 queue_dp = true;
7797                                 DRM_DEBUG("IH: HPD_RX 6\n");
7798
7799                                 break;
7800                         default:
7801                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7802                                 break;
7803                         }
7804                         break;
7805                 case 96:
7806                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7807                         WREG32(SRBM_INT_ACK, 0x1);
7808                         break;
7809                 case 124: /* UVD */
7810                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7811                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7812                         break;
7813                 case 146:
7814                 case 147:
7815                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7816                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7817                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7818                         /* reset addr and status */
7819                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7820                         if (addr == 0x0 && status == 0x0)
7821                                 break;
7822                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7823                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7824                                 addr);
7825                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7826                                 status);
7827                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7828                         break;
7829                 case 167: /* VCE */
7830                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7831                         switch (src_data) {
7832                         case 0:
7833                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7834                                 break;
7835                         case 1:
7836                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7837                                 break;
7838                         default:
7839                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7840                                 break;
7841                         }
7842                         break;
7843                 case 176: /* GFX RB CP_INT */
7844                 case 177: /* GFX IB CP_INT */
7845                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7846                         break;
7847                 case 181: /* CP EOP event */
7848                         DRM_DEBUG("IH: CP EOP\n");
7849                         /* XXX check the bitfield order! */
7850                         me_id = (ring_id & 0x60) >> 5;
7851                         pipe_id = (ring_id & 0x18) >> 3;
7852                         queue_id = (ring_id & 0x7) >> 0;
7853                         switch (me_id) {
7854                         case 0:
7855                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7856                                 break;
7857                         case 1:
7858                         case 2:
7859                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7860                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7861                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7862                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7863                                 break;
7864                         }
7865                         break;
7866                 case 184: /* CP Privileged reg access */
7867                         DRM_ERROR("Illegal register access in command stream\n");
7868                         /* XXX check the bitfield order! */
7869                         me_id = (ring_id & 0x60) >> 5;
7870                         switch (me_id) {
7871                         case 0:
7872                                 /* This results in a full GPU reset, but all we need to do is soft
7873                                  * reset the CP for gfx
7874                                  */
7875                                 queue_reset = true;
7876                                 break;
7877                         case 1:
7878                                 /* XXX compute */
7879                                 queue_reset = true;
7880                                 break;
7881                         case 2:
7882                                 /* XXX compute */
7883                                 queue_reset = true;
7884                                 break;
7885                         }
7886                         break;
7887                 case 185: /* CP Privileged inst */
7888                         DRM_ERROR("Illegal instruction in command stream\n");
7889                         /* XXX check the bitfield order! */
7890                         me_id = (ring_id & 0x60) >> 5;
7891                         switch (me_id) {
7892                         case 0:
7893                                 /* This results in a full GPU reset, but all we need to do is soft
7894                                  * reset the CP for gfx
7895                                  */
7896                                 queue_reset = true;
7897                                 break;
7898                         case 1:
7899                                 /* XXX compute */
7900                                 queue_reset = true;
7901                                 break;
7902                         case 2:
7903                                 /* XXX compute */
7904                                 queue_reset = true;
7905                                 break;
7906                         }
7907                         break;
7908                 case 224: /* SDMA trap event */
7909                         /* XXX check the bitfield order! */
7910                         me_id = (ring_id & 0x3) >> 0;
7911                         queue_id = (ring_id & 0xc) >> 2;
7912                         DRM_DEBUG("IH: SDMA trap\n");
7913                         switch (me_id) {
7914                         case 0:
7915                                 switch (queue_id) {
7916                                 case 0:
7917                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7918                                         break;
7919                                 case 1:
7920                                         /* XXX compute */
7921                                         break;
7922                                 case 2:
7923                                         /* XXX compute */
7924                                         break;
7925                                 }
7926                                 break;
7927                         case 1:
7928                                 switch (queue_id) {
7929                                 case 0:
7930                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7931                                         break;
7932                                 case 1:
7933                                         /* XXX compute */
7934                                         break;
7935                                 case 2:
7936                                         /* XXX compute */
7937                                         break;
7938                                 }
7939                                 break;
7940                         }
7941                         break;
7942                 case 230: /* thermal low to high */
7943                         DRM_DEBUG("IH: thermal low to high\n");
7944                         rdev->pm.dpm.thermal.high_to_low = false;
7945                         queue_thermal = true;
7946                         break;
7947                 case 231: /* thermal high to low */
7948                         DRM_DEBUG("IH: thermal high to low\n");
7949                         rdev->pm.dpm.thermal.high_to_low = true;
7950                         queue_thermal = true;
7951                         break;
7952                 case 233: /* GUI IDLE */
7953                         DRM_DEBUG("IH: GUI idle\n");
7954                         break;
7955                 case 241: /* SDMA Privileged inst */
7956                 case 247: /* SDMA Privileged inst */
7957                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
7958                         /* XXX check the bitfield order! */
7959                         me_id = (ring_id & 0x3) >> 0;
7960                         queue_id = (ring_id & 0xc) >> 2;
7961                         switch (me_id) {
7962                         case 0:
7963                                 switch (queue_id) {
7964                                 case 0:
7965                                         queue_reset = true;
7966                                         break;
7967                                 case 1:
7968                                         /* XXX compute */
7969                                         queue_reset = true;
7970                                         break;
7971                                 case 2:
7972                                         /* XXX compute */
7973                                         queue_reset = true;
7974                                         break;
7975                                 }
7976                                 break;
7977                         case 1:
7978                                 switch (queue_id) {
7979                                 case 0:
7980                                         queue_reset = true;
7981                                         break;
7982                                 case 1:
7983                                         /* XXX compute */
7984                                         queue_reset = true;
7985                                         break;
7986                                 case 2:
7987                                         /* XXX compute */
7988                                         queue_reset = true;
7989                                         break;
7990                                 }
7991                                 break;
7992                         }
7993                         break;
7994                 default:
7995                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7996                         break;
7997                 }
7998
7999                 /* wptr/rptr are in bytes! */
8000                 rptr += 16;
8001                 rptr &= rdev->ih.ptr_mask;
8002                 WREG32(IH_RB_RPTR, rptr);
8003         }
8004         if (queue_dp)
8005                 schedule_work(&rdev->dp_work);
8006         if (queue_hotplug)
8007                 schedule_delayed_work(&rdev->hotplug_work, 0);
8008         if (queue_reset) {
8009                 rdev->needs_reset = true;
8010                 wake_up_all(&rdev->fence_queue);
8011         }
8012         if (queue_thermal)
8013                 schedule_work(&rdev->pm.dpm.thermal.work);
8014         rdev->ih.rptr = rptr;
8015         atomic_set(&rdev->ih.lock, 0);
8016
8017         /* make sure wptr hasn't changed while processing */
8018         wptr = cik_get_ih_wptr(rdev);
8019         if (wptr != rptr)
8020                 goto restart_ih;
8021
8022         return IRQ_HANDLED;
8023 }
8024
8025 /*
8026  * startup/shutdown callbacks
8027  */
8028 static void cik_uvd_init(struct radeon_device *rdev)
8029 {
8030         int r;
8031
8032         if (!rdev->has_uvd)
8033                 return;
8034
8035         r = radeon_uvd_init(rdev);
8036         if (r) {
8037                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8038                 /*
8039                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8040                  * to early fails cik_uvd_start() and thus nothing happens
8041                  * there. So it is pointless to try to go through that code
8042                  * hence why we disable uvd here.
8043                  */
8044                 rdev->has_uvd = false;
8045                 return;
8046         }
8047         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8048         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8049 }
8050
8051 static void cik_uvd_start(struct radeon_device *rdev)
8052 {
8053         int r;
8054
8055         if (!rdev->has_uvd)
8056                 return;
8057
8058         r = radeon_uvd_resume(rdev);
8059         if (r) {
8060                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8061                 goto error;
8062         }
8063         r = uvd_v4_2_resume(rdev);
8064         if (r) {
8065                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8066                 goto error;
8067         }
8068         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8069         if (r) {
8070                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8071                 goto error;
8072         }
8073         return;
8074
8075 error:
8076         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8077 }
8078
8079 static void cik_uvd_resume(struct radeon_device *rdev)
8080 {
8081         struct radeon_ring *ring;
8082         int r;
8083
8084         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8085                 return;
8086
8087         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8088         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8089         if (r) {
8090                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8091                 return;
8092         }
8093         r = uvd_v1_0_init(rdev);
8094         if (r) {
8095                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8096                 return;
8097         }
8098 }
8099
8100 static void cik_vce_init(struct radeon_device *rdev)
8101 {
8102         int r;
8103
8104         if (!rdev->has_vce)
8105                 return;
8106
8107         r = radeon_vce_init(rdev);
8108         if (r) {
8109                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8110                 /*
8111                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8112                  * to early fails cik_vce_start() and thus nothing happens
8113                  * there. So it is pointless to try to go through that code
8114                  * hence why we disable vce here.
8115                  */
8116                 rdev->has_vce = false;
8117                 return;
8118         }
8119         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8120         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8121         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8122         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8123 }
8124
8125 static void cik_vce_start(struct radeon_device *rdev)
8126 {
8127         int r;
8128
8129         if (!rdev->has_vce)
8130                 return;
8131
8132         r = radeon_vce_resume(rdev);
8133         if (r) {
8134                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8135                 goto error;
8136         }
8137         r = vce_v2_0_resume(rdev);
8138         if (r) {
8139                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8140                 goto error;
8141         }
8142         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8143         if (r) {
8144                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8145                 goto error;
8146         }
8147         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8148         if (r) {
8149                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8150                 goto error;
8151         }
8152         return;
8153
8154 error:
8155         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8156         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8157 }
8158
8159 static void cik_vce_resume(struct radeon_device *rdev)
8160 {
8161         struct radeon_ring *ring;
8162         int r;
8163
8164         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8165                 return;
8166
8167         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8168         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8169         if (r) {
8170                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8171                 return;
8172         }
8173         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8174         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8175         if (r) {
8176                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8177                 return;
8178         }
8179         r = vce_v1_0_init(rdev);
8180         if (r) {
8181                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8182                 return;
8183         }
8184 }
8185
8186 /**
8187  * cik_startup - program the asic to a functional state
8188  *
8189  * @rdev: radeon_device pointer
8190  *
8191  * Programs the asic to a functional state (CIK).
8192  * Called by cik_init() and cik_resume().
8193  * Returns 0 for success, error for failure.
8194  */
8195 static int cik_startup(struct radeon_device *rdev)
8196 {
8197         struct radeon_ring *ring;
8198         u32 nop;
8199         int r;
8200
8201         /* enable pcie gen2/3 link */
8202         cik_pcie_gen3_enable(rdev);
8203         /* enable aspm */
8204         cik_program_aspm(rdev);
8205
8206         /* scratch needs to be initialized before MC */
8207         r = r600_vram_scratch_init(rdev);
8208         if (r)
8209                 return r;
8210
8211         cik_mc_program(rdev);
8212
8213         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8214                 r = ci_mc_load_microcode(rdev);
8215                 if (r) {
8216                         DRM_ERROR("Failed to load MC firmware!\n");
8217                         return r;
8218                 }
8219         }
8220
8221         r = cik_pcie_gart_enable(rdev);
8222         if (r)
8223                 return r;
8224         cik_gpu_init(rdev);
8225
8226         /* allocate rlc buffers */
8227         if (rdev->flags & RADEON_IS_IGP) {
8228                 if (rdev->family == CHIP_KAVERI) {
8229                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8230                         rdev->rlc.reg_list_size =
8231                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8232                 } else {
8233                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8234                         rdev->rlc.reg_list_size =
8235                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8236                 }
8237         }
8238         rdev->rlc.cs_data = ci_cs_data;
8239         rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8240         rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8241         r = sumo_rlc_init(rdev);
8242         if (r) {
8243                 DRM_ERROR("Failed to init rlc BOs!\n");
8244                 return r;
8245         }
8246
8247         /* allocate wb buffer */
8248         r = radeon_wb_init(rdev);
8249         if (r)
8250                 return r;
8251
8252         /* allocate mec buffers */
8253         r = cik_mec_init(rdev);
8254         if (r) {
8255                 DRM_ERROR("Failed to init MEC BOs!\n");
8256                 return r;
8257         }
8258
8259         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8260         if (r) {
8261                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8262                 return r;
8263         }
8264
8265         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8266         if (r) {
8267                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8268                 return r;
8269         }
8270
8271         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8272         if (r) {
8273                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8274                 return r;
8275         }
8276
8277         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8278         if (r) {
8279                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8280                 return r;
8281         }
8282
8283         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8284         if (r) {
8285                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8286                 return r;
8287         }
8288
8289         cik_uvd_start(rdev);
8290         cik_vce_start(rdev);
8291
8292         /* Enable IRQ */
8293         if (!rdev->irq.installed) {
8294                 r = radeon_irq_kms_init(rdev);
8295                 if (r)
8296                         return r;
8297         }
8298
8299         r = cik_irq_init(rdev);
8300         if (r) {
8301                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8302                 radeon_irq_kms_fini(rdev);
8303                 return r;
8304         }
8305         cik_irq_set(rdev);
8306
8307         if (rdev->family == CHIP_HAWAII) {
8308                 if (rdev->new_fw)
8309                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8310                 else
8311                         nop = RADEON_CP_PACKET2;
8312         } else {
8313                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8314         }
8315
8316         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8317         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8318                              nop);
8319         if (r)
8320                 return r;
8321
8322         /* set up the compute queues */
8323         /* type-2 packets are deprecated on MEC, use type-3 instead */
8324         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8325         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8326                              nop);
8327         if (r)
8328                 return r;
8329         ring->me = 1; /* first MEC */
8330         ring->pipe = 0; /* first pipe */
8331         ring->queue = 0; /* first queue */
8332         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8333
8334         /* type-2 packets are deprecated on MEC, use type-3 instead */
8335         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8336         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8337                              nop);
8338         if (r)
8339                 return r;
8340         /* dGPU only have 1 MEC */
8341         ring->me = 1; /* first MEC */
8342         ring->pipe = 0; /* first pipe */
8343         ring->queue = 1; /* second queue */
8344         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8345
8346         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8347         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8348                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8349         if (r)
8350                 return r;
8351
8352         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8353         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8354                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8355         if (r)
8356                 return r;
8357
8358         r = cik_cp_resume(rdev);
8359         if (r)
8360                 return r;
8361
8362         r = cik_sdma_resume(rdev);
8363         if (r)
8364                 return r;
8365
8366         cik_uvd_resume(rdev);
8367         cik_vce_resume(rdev);
8368
8369         r = radeon_ib_pool_init(rdev);
8370         if (r) {
8371                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8372                 return r;
8373         }
8374
8375         r = radeon_vm_manager_init(rdev);
8376         if (r) {
8377                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8378                 return r;
8379         }
8380
8381         r = radeon_audio_init(rdev);
8382         if (r)
8383                 return r;
8384
8385         return 0;
8386 }
8387
8388 /**
8389  * cik_resume - resume the asic to a functional state
8390  *
8391  * @rdev: radeon_device pointer
8392  *
8393  * Programs the asic to a functional state (CIK).
8394  * Called at resume.
8395  * Returns 0 for success, error for failure.
8396  */
8397 int cik_resume(struct radeon_device *rdev)
8398 {
8399         int r;
8400
8401         /* post card */
8402         atom_asic_init(rdev->mode_info.atom_context);
8403
8404         /* init golden registers */
8405         cik_init_golden_registers(rdev);
8406
8407         if (rdev->pm.pm_method == PM_METHOD_DPM)
8408                 radeon_pm_resume(rdev);
8409
8410         rdev->accel_working = true;
8411         r = cik_startup(rdev);
8412         if (r) {
8413                 DRM_ERROR("cik startup failed on resume\n");
8414                 rdev->accel_working = false;
8415                 return r;
8416         }
8417
8418         return r;
8419
8420 }
8421
8422 /**
8423  * cik_suspend - suspend the asic
8424  *
8425  * @rdev: radeon_device pointer
8426  *
8427  * Bring the chip into a state suitable for suspend (CIK).
8428  * Called at suspend.
8429  * Returns 0 for success.
8430  */
8431 int cik_suspend(struct radeon_device *rdev)
8432 {
8433         radeon_pm_suspend(rdev);
8434         radeon_audio_fini(rdev);
8435         radeon_vm_manager_fini(rdev);
8436         cik_cp_enable(rdev, false);
8437         cik_sdma_enable(rdev, false);
8438         if (rdev->has_uvd) {
8439                 radeon_uvd_suspend(rdev);
8440                 uvd_v1_0_fini(rdev);
8441         }
8442         if (rdev->has_vce)
8443                 radeon_vce_suspend(rdev);
8444         cik_fini_pg(rdev);
8445         cik_fini_cg(rdev);
8446         cik_irq_suspend(rdev);
8447         radeon_wb_disable(rdev);
8448         cik_pcie_gart_disable(rdev);
8449         return 0;
8450 }
8451
8452 /* Plan is to move initialization in that function and use
8453  * helper function so that radeon_device_init pretty much
8454  * do nothing more than calling asic specific function. This
8455  * should also allow to remove a bunch of callback function
8456  * like vram_info.
8457  */
8458 /**
8459  * cik_init - asic specific driver and hw init
8460  *
8461  * @rdev: radeon_device pointer
8462  *
8463  * Setup asic specific driver variables and program the hw
8464  * to a functional state (CIK).
8465  * Called at driver startup.
8466  * Returns 0 for success, errors for failure.
8467  */
8468 int cik_init(struct radeon_device *rdev)
8469 {
8470         struct radeon_ring *ring;
8471         int r;
8472
8473         /* Read BIOS */
8474         if (!radeon_get_bios(rdev)) {
8475                 if (ASIC_IS_AVIVO(rdev))
8476                         return -EINVAL;
8477         }
8478         /* Must be an ATOMBIOS */
8479         if (!rdev->is_atom_bios) {
8480                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8481                 return -EINVAL;
8482         }
8483         r = radeon_atombios_init(rdev);
8484         if (r)
8485                 return r;
8486
8487         /* Post card if necessary */
8488         if (!radeon_card_posted(rdev)) {
8489                 if (!rdev->bios) {
8490                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8491                         return -EINVAL;
8492                 }
8493                 DRM_INFO("GPU not posted. posting now...\n");
8494                 atom_asic_init(rdev->mode_info.atom_context);
8495         }
8496         /* init golden registers */
8497         cik_init_golden_registers(rdev);
8498         /* Initialize scratch registers */
8499         cik_scratch_init(rdev);
8500         /* Initialize surface registers */
8501         radeon_surface_init(rdev);
8502         /* Initialize clocks */
8503         radeon_get_clock_info(rdev->ddev);
8504
8505         /* Fence driver */
8506         radeon_fence_driver_init(rdev);
8507
8508         /* initialize memory controller */
8509         r = cik_mc_init(rdev);
8510         if (r)
8511                 return r;
8512         /* Memory manager */
8513         r = radeon_bo_init(rdev);
8514         if (r)
8515                 return r;
8516
8517         if (rdev->flags & RADEON_IS_IGP) {
8518                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8519                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8520                         r = cik_init_microcode(rdev);
8521                         if (r) {
8522                                 DRM_ERROR("Failed to load firmware!\n");
8523                                 return r;
8524                         }
8525                 }
8526         } else {
8527                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8528                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8529                     !rdev->mc_fw) {
8530                         r = cik_init_microcode(rdev);
8531                         if (r) {
8532                                 DRM_ERROR("Failed to load firmware!\n");
8533                                 return r;
8534                         }
8535                 }
8536         }
8537
8538         /* Initialize power management */
8539         radeon_pm_init(rdev);
8540
8541         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8542         ring->ring_obj = NULL;
8543         r600_ring_init(rdev, ring, 1024 * 1024);
8544
8545         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8546         ring->ring_obj = NULL;
8547         r600_ring_init(rdev, ring, 1024 * 1024);
8548         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8549         if (r)
8550                 return r;
8551
8552         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8553         ring->ring_obj = NULL;
8554         r600_ring_init(rdev, ring, 1024 * 1024);
8555         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8556         if (r)
8557                 return r;
8558
8559         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8560         ring->ring_obj = NULL;
8561         r600_ring_init(rdev, ring, 256 * 1024);
8562
8563         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8564         ring->ring_obj = NULL;
8565         r600_ring_init(rdev, ring, 256 * 1024);
8566
8567         cik_uvd_init(rdev);
8568         cik_vce_init(rdev);
8569
8570         rdev->ih.ring_obj = NULL;
8571         r600_ih_ring_init(rdev, 64 * 1024);
8572
8573         r = r600_pcie_gart_init(rdev);
8574         if (r)
8575                 return r;
8576
8577         rdev->accel_working = true;
8578         r = cik_startup(rdev);
8579         if (r) {
8580                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8581                 cik_cp_fini(rdev);
8582                 cik_sdma_fini(rdev);
8583                 cik_irq_fini(rdev);
8584                 sumo_rlc_fini(rdev);
8585                 cik_mec_fini(rdev);
8586                 radeon_wb_fini(rdev);
8587                 radeon_ib_pool_fini(rdev);
8588                 radeon_vm_manager_fini(rdev);
8589                 radeon_irq_kms_fini(rdev);
8590                 cik_pcie_gart_fini(rdev);
8591                 rdev->accel_working = false;
8592         }
8593
8594         /* Don't start up if the MC ucode is missing.
8595          * The default clocks and voltages before the MC ucode
8596          * is loaded are not suffient for advanced operations.
8597          */
8598         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8599                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8600                 return -EINVAL;
8601         }
8602
8603         return 0;
8604 }
8605
8606 /**
8607  * cik_fini - asic specific driver and hw fini
8608  *
8609  * @rdev: radeon_device pointer
8610  *
8611  * Tear down the asic specific driver variables and program the hw
8612  * to an idle state (CIK).
8613  * Called at driver unload.
8614  */
8615 void cik_fini(struct radeon_device *rdev)
8616 {
8617         radeon_pm_fini(rdev);
8618         cik_cp_fini(rdev);
8619         cik_sdma_fini(rdev);
8620         cik_fini_pg(rdev);
8621         cik_fini_cg(rdev);
8622         cik_irq_fini(rdev);
8623         sumo_rlc_fini(rdev);
8624         cik_mec_fini(rdev);
8625         radeon_wb_fini(rdev);
8626         radeon_vm_manager_fini(rdev);
8627         radeon_ib_pool_fini(rdev);
8628         radeon_irq_kms_fini(rdev);
8629         uvd_v1_0_fini(rdev);
8630         radeon_uvd_fini(rdev);
8631         radeon_vce_fini(rdev);
8632         cik_pcie_gart_fini(rdev);
8633         r600_vram_scratch_fini(rdev);
8634         radeon_gem_fini(rdev);
8635         radeon_fence_driver_fini(rdev);
8636         radeon_bo_fini(rdev);
8637         radeon_atombios_fini(rdev);
8638         kfree(rdev->bios);
8639         rdev->bios = NULL;
8640 }
8641
8642 void dce8_program_fmt(struct drm_encoder *encoder)
8643 {
8644         struct drm_device *dev = encoder->dev;
8645         struct radeon_device *rdev = dev->dev_private;
8646         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8647         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8648         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8649         int bpc = 0;
8650         u32 tmp = 0;
8651         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8652
8653         if (connector) {
8654                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8655                 bpc = radeon_get_monitor_bpc(connector);
8656                 dither = radeon_connector->dither;
8657         }
8658
8659         /* LVDS/eDP FMT is set up by atom */
8660         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8661                 return;
8662
8663         /* not needed for analog */
8664         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8665             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8666                 return;
8667
8668         if (bpc == 0)
8669                 return;
8670
8671         switch (bpc) {
8672         case 6:
8673                 if (dither == RADEON_FMT_DITHER_ENABLE)
8674                         /* XXX sort out optimal dither settings */
8675                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8676                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8677                 else
8678                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8679                 break;
8680         case 8:
8681                 if (dither == RADEON_FMT_DITHER_ENABLE)
8682                         /* XXX sort out optimal dither settings */
8683                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8684                                 FMT_RGB_RANDOM_ENABLE |
8685                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8686                 else
8687                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8688                 break;
8689         case 10:
8690                 if (dither == RADEON_FMT_DITHER_ENABLE)
8691                         /* XXX sort out optimal dither settings */
8692                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8693                                 FMT_RGB_RANDOM_ENABLE |
8694                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8695                 else
8696                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8697                 break;
8698         default:
8699                 /* not needed */
8700                 break;
8701         }
8702
8703         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8704 }
8705
8706 /* display watermark setup */
8707 /**
8708  * dce8_line_buffer_adjust - Set up the line buffer
8709  *
8710  * @rdev: radeon_device pointer
8711  * @radeon_crtc: the selected display controller
8712  * @mode: the current display mode on the selected display
8713  * controller
8714  *
8715  * Setup up the line buffer allocation for
8716  * the selected display controller (CIK).
8717  * Returns the line buffer size in pixels.
8718  */
8719 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8720                                    struct radeon_crtc *radeon_crtc,
8721                                    struct drm_display_mode *mode)
8722 {
8723         u32 tmp, buffer_alloc, i;
8724         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8725         /*
8726          * Line Buffer Setup
8727          * There are 6 line buffers, one for each display controllers.
8728          * There are 3 partitions per LB. Select the number of partitions
8729          * to enable based on the display width.  For display widths larger
8730          * than 4096, you need use to use 2 display controllers and combine
8731          * them using the stereo blender.
8732          */
8733         if (radeon_crtc->base.enabled && mode) {
8734                 if (mode->crtc_hdisplay < 1920) {
8735                         tmp = 1;
8736                         buffer_alloc = 2;
8737                 } else if (mode->crtc_hdisplay < 2560) {
8738                         tmp = 2;
8739                         buffer_alloc = 2;
8740                 } else if (mode->crtc_hdisplay < 4096) {
8741                         tmp = 0;
8742                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8743                 } else {
8744                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8745                         tmp = 0;
8746                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8747                 }
8748         } else {
8749                 tmp = 1;
8750                 buffer_alloc = 0;
8751         }
8752
8753         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8754                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8755
8756         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8757                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8758         for (i = 0; i < rdev->usec_timeout; i++) {
8759                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8760                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8761                         break;
8762                 udelay(1);
8763         }
8764
8765         if (radeon_crtc->base.enabled && mode) {
8766                 switch (tmp) {
8767                 case 0:
8768                 default:
8769                         return 4096 * 2;
8770                 case 1:
8771                         return 1920 * 2;
8772                 case 2:
8773                         return 2560 * 2;
8774                 }
8775         }
8776
8777         /* controller not enabled, so no lb used */
8778         return 0;
8779 }
8780
8781 /**
8782  * cik_get_number_of_dram_channels - get the number of dram channels
8783  *
8784  * @rdev: radeon_device pointer
8785  *
8786  * Look up the number of video ram channels (CIK).
8787  * Used for display watermark bandwidth calculations
8788  * Returns the number of dram channels
8789  */
8790 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8791 {
8792         u32 tmp = RREG32(MC_SHARED_CHMAP);
8793
8794         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8795         case 0:
8796         default:
8797                 return 1;
8798         case 1:
8799                 return 2;
8800         case 2:
8801                 return 4;
8802         case 3:
8803                 return 8;
8804         case 4:
8805                 return 3;
8806         case 5:
8807                 return 6;
8808         case 6:
8809                 return 10;
8810         case 7:
8811                 return 12;
8812         case 8:
8813                 return 16;
8814         }
8815 }
8816
8817 struct dce8_wm_params {
8818         u32 dram_channels; /* number of dram channels */
8819         u32 yclk;          /* bandwidth per dram data pin in kHz */
8820         u32 sclk;          /* engine clock in kHz */
8821         u32 disp_clk;      /* display clock in kHz */
8822         u32 src_width;     /* viewport width */
8823         u32 active_time;   /* active display time in ns */
8824         u32 blank_time;    /* blank time in ns */
8825         bool interlaced;    /* mode is interlaced */
8826         fixed20_12 vsc;    /* vertical scale ratio */
8827         u32 num_heads;     /* number of active crtcs */
8828         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8829         u32 lb_size;       /* line buffer allocated to pipe */
8830         u32 vtaps;         /* vertical scaler taps */
8831 };
8832
8833 /**
8834  * dce8_dram_bandwidth - get the dram bandwidth
8835  *
8836  * @wm: watermark calculation data
8837  *
8838  * Calculate the raw dram bandwidth (CIK).
8839  * Used for display watermark bandwidth calculations
8840  * Returns the dram bandwidth in MBytes/s
8841  */
8842 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8843 {
8844         /* Calculate raw DRAM Bandwidth */
8845         fixed20_12 dram_efficiency; /* 0.7 */
8846         fixed20_12 yclk, dram_channels, bandwidth;
8847         fixed20_12 a;
8848
8849         a.full = dfixed_const(1000);
8850         yclk.full = dfixed_const(wm->yclk);
8851         yclk.full = dfixed_div(yclk, a);
8852         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8853         a.full = dfixed_const(10);
8854         dram_efficiency.full = dfixed_const(7);
8855         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8856         bandwidth.full = dfixed_mul(dram_channels, yclk);
8857         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8858
8859         return dfixed_trunc(bandwidth);
8860 }
8861
8862 /**
8863  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8864  *
8865  * @wm: watermark calculation data
8866  *
8867  * Calculate the dram bandwidth used for display (CIK).
8868  * Used for display watermark bandwidth calculations
8869  * Returns the dram bandwidth for display in MBytes/s
8870  */
8871 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8872 {
8873         /* Calculate DRAM Bandwidth and the part allocated to display. */
8874         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8875         fixed20_12 yclk, dram_channels, bandwidth;
8876         fixed20_12 a;
8877
8878         a.full = dfixed_const(1000);
8879         yclk.full = dfixed_const(wm->yclk);
8880         yclk.full = dfixed_div(yclk, a);
8881         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8882         a.full = dfixed_const(10);
8883         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8884         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8885         bandwidth.full = dfixed_mul(dram_channels, yclk);
8886         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8887
8888         return dfixed_trunc(bandwidth);
8889 }
8890
8891 /**
8892  * dce8_data_return_bandwidth - get the data return bandwidth
8893  *
8894  * @wm: watermark calculation data
8895  *
8896  * Calculate the data return bandwidth used for display (CIK).
8897  * Used for display watermark bandwidth calculations
8898  * Returns the data return bandwidth in MBytes/s
8899  */
8900 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8901 {
8902         /* Calculate the display Data return Bandwidth */
8903         fixed20_12 return_efficiency; /* 0.8 */
8904         fixed20_12 sclk, bandwidth;
8905         fixed20_12 a;
8906
8907         a.full = dfixed_const(1000);
8908         sclk.full = dfixed_const(wm->sclk);
8909         sclk.full = dfixed_div(sclk, a);
8910         a.full = dfixed_const(10);
8911         return_efficiency.full = dfixed_const(8);
8912         return_efficiency.full = dfixed_div(return_efficiency, a);
8913         a.full = dfixed_const(32);
8914         bandwidth.full = dfixed_mul(a, sclk);
8915         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8916
8917         return dfixed_trunc(bandwidth);
8918 }
8919
8920 /**
8921  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8922  *
8923  * @wm: watermark calculation data
8924  *
8925  * Calculate the dmif bandwidth used for display (CIK).
8926  * Used for display watermark bandwidth calculations
8927  * Returns the dmif bandwidth in MBytes/s
8928  */
8929 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8930 {
8931         /* Calculate the DMIF Request Bandwidth */
8932         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8933         fixed20_12 disp_clk, bandwidth;
8934         fixed20_12 a, b;
8935
8936         a.full = dfixed_const(1000);
8937         disp_clk.full = dfixed_const(wm->disp_clk);
8938         disp_clk.full = dfixed_div(disp_clk, a);
8939         a.full = dfixed_const(32);
8940         b.full = dfixed_mul(a, disp_clk);
8941
8942         a.full = dfixed_const(10);
8943         disp_clk_request_efficiency.full = dfixed_const(8);
8944         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8945
8946         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8947
8948         return dfixed_trunc(bandwidth);
8949 }
8950
8951 /**
8952  * dce8_available_bandwidth - get the min available bandwidth
8953  *
8954  * @wm: watermark calculation data
8955  *
8956  * Calculate the min available bandwidth used for display (CIK).
8957  * Used for display watermark bandwidth calculations
8958  * Returns the min available bandwidth in MBytes/s
8959  */
8960 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8961 {
8962         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8963         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8964         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8965         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8966
8967         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8968 }
8969
8970 /**
8971  * dce8_average_bandwidth - get the average available bandwidth
8972  *
8973  * @wm: watermark calculation data
8974  *
8975  * Calculate the average available bandwidth used for display (CIK).
8976  * Used for display watermark bandwidth calculations
8977  * Returns the average available bandwidth in MBytes/s
8978  */
8979 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8980 {
8981         /* Calculate the display mode Average Bandwidth
8982          * DisplayMode should contain the source and destination dimensions,
8983          * timing, etc.
8984          */
8985         fixed20_12 bpp;
8986         fixed20_12 line_time;
8987         fixed20_12 src_width;
8988         fixed20_12 bandwidth;
8989         fixed20_12 a;
8990
8991         a.full = dfixed_const(1000);
8992         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8993         line_time.full = dfixed_div(line_time, a);
8994         bpp.full = dfixed_const(wm->bytes_per_pixel);
8995         src_width.full = dfixed_const(wm->src_width);
8996         bandwidth.full = dfixed_mul(src_width, bpp);
8997         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8998         bandwidth.full = dfixed_div(bandwidth, line_time);
8999
9000         return dfixed_trunc(bandwidth);
9001 }
9002
9003 /**
9004  * dce8_latency_watermark - get the latency watermark
9005  *
9006  * @wm: watermark calculation data
9007  *
9008  * Calculate the latency watermark (CIK).
9009  * Used for display watermark bandwidth calculations
9010  * Returns the latency watermark in ns
9011  */
9012 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9013 {
9014         /* First calculate the latency in ns */
9015         u32 mc_latency = 2000; /* 2000 ns. */
9016         u32 available_bandwidth = dce8_available_bandwidth(wm);
9017         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9018         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9019         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9020         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9021                 (wm->num_heads * cursor_line_pair_return_time);
9022         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9023         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9024         u32 tmp, dmif_size = 12288;
9025         fixed20_12 a, b, c;
9026
9027         if (wm->num_heads == 0)
9028                 return 0;
9029
9030         a.full = dfixed_const(2);
9031         b.full = dfixed_const(1);
9032         if ((wm->vsc.full > a.full) ||
9033             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9034             (wm->vtaps >= 5) ||
9035             ((wm->vsc.full >= a.full) && wm->interlaced))
9036                 max_src_lines_per_dst_line = 4;
9037         else
9038                 max_src_lines_per_dst_line = 2;
9039
9040         a.full = dfixed_const(available_bandwidth);
9041         b.full = dfixed_const(wm->num_heads);
9042         a.full = dfixed_div(a, b);
9043         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9044         tmp = min(dfixed_trunc(a), tmp);
9045
9046         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9047
9048         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9049         b.full = dfixed_const(1000);
9050         c.full = dfixed_const(lb_fill_bw);
9051         b.full = dfixed_div(c, b);
9052         a.full = dfixed_div(a, b);
9053         line_fill_time = dfixed_trunc(a);
9054
9055         if (line_fill_time < wm->active_time)
9056                 return latency;
9057         else
9058                 return latency + (line_fill_time - wm->active_time);
9059
9060 }
9061
9062 /**
9063  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9064  * average and available dram bandwidth
9065  *
9066  * @wm: watermark calculation data
9067  *
9068  * Check if the display average bandwidth fits in the display
9069  * dram bandwidth (CIK).
9070  * Used for display watermark bandwidth calculations
9071  * Returns true if the display fits, false if not.
9072  */
9073 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9074 {
9075         if (dce8_average_bandwidth(wm) <=
9076             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9077                 return true;
9078         else
9079                 return false;
9080 }
9081
9082 /**
9083  * dce8_average_bandwidth_vs_available_bandwidth - check
9084  * average and available bandwidth
9085  *
9086  * @wm: watermark calculation data
9087  *
9088  * Check if the display average bandwidth fits in the display
9089  * available bandwidth (CIK).
9090  * Used for display watermark bandwidth calculations
9091  * Returns true if the display fits, false if not.
9092  */
9093 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9094 {
9095         if (dce8_average_bandwidth(wm) <=
9096             (dce8_available_bandwidth(wm) / wm->num_heads))
9097                 return true;
9098         else
9099                 return false;
9100 }
9101
9102 /**
9103  * dce8_check_latency_hiding - check latency hiding
9104  *
9105  * @wm: watermark calculation data
9106  *
9107  * Check latency hiding (CIK).
9108  * Used for display watermark bandwidth calculations
9109  * Returns true if the display fits, false if not.
9110  */
9111 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9112 {
9113         u32 lb_partitions = wm->lb_size / wm->src_width;
9114         u32 line_time = wm->active_time + wm->blank_time;
9115         u32 latency_tolerant_lines;
9116         u32 latency_hiding;
9117         fixed20_12 a;
9118
9119         a.full = dfixed_const(1);
9120         if (wm->vsc.full > a.full)
9121                 latency_tolerant_lines = 1;
9122         else {
9123                 if (lb_partitions <= (wm->vtaps + 1))
9124                         latency_tolerant_lines = 1;
9125                 else
9126                         latency_tolerant_lines = 2;
9127         }
9128
9129         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9130
9131         if (dce8_latency_watermark(wm) <= latency_hiding)
9132                 return true;
9133         else
9134                 return false;
9135 }
9136
9137 /**
9138  * dce8_program_watermarks - program display watermarks
9139  *
9140  * @rdev: radeon_device pointer
9141  * @radeon_crtc: the selected display controller
9142  * @lb_size: line buffer size
9143  * @num_heads: number of display controllers in use
9144  *
9145  * Calculate and program the display watermarks for the
9146  * selected display controller (CIK).
9147  */
9148 static void dce8_program_watermarks(struct radeon_device *rdev,
9149                                     struct radeon_crtc *radeon_crtc,
9150                                     u32 lb_size, u32 num_heads)
9151 {
9152         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9153         struct dce8_wm_params wm_low, wm_high;
9154         u32 active_time;
9155         u32 line_time = 0;
9156         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9157         u32 tmp, wm_mask;
9158
9159         if (radeon_crtc->base.enabled && num_heads && mode) {
9160                 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9161                                             (u32)mode->clock);
9162                 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9163                                           (u32)mode->clock);
9164                 line_time = min(line_time, (u32)65535);
9165
9166                 /* watermark for high clocks */
9167                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9168                     rdev->pm.dpm_enabled) {
9169                         wm_high.yclk =
9170                                 radeon_dpm_get_mclk(rdev, false) * 10;
9171                         wm_high.sclk =
9172                                 radeon_dpm_get_sclk(rdev, false) * 10;
9173                 } else {
9174                         wm_high.yclk = rdev->pm.current_mclk * 10;
9175                         wm_high.sclk = rdev->pm.current_sclk * 10;
9176                 }
9177
9178                 wm_high.disp_clk = mode->clock;
9179                 wm_high.src_width = mode->crtc_hdisplay;
9180                 wm_high.active_time = active_time;
9181                 wm_high.blank_time = line_time - wm_high.active_time;
9182                 wm_high.interlaced = false;
9183                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9184                         wm_high.interlaced = true;
9185                 wm_high.vsc = radeon_crtc->vsc;
9186                 wm_high.vtaps = 1;
9187                 if (radeon_crtc->rmx_type != RMX_OFF)
9188                         wm_high.vtaps = 2;
9189                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9190                 wm_high.lb_size = lb_size;
9191                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9192                 wm_high.num_heads = num_heads;
9193
9194                 /* set for high clocks */
9195                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9196
9197                 /* possibly force display priority to high */
9198                 /* should really do this at mode validation time... */
9199                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9200                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9201                     !dce8_check_latency_hiding(&wm_high) ||
9202                     (rdev->disp_priority == 2)) {
9203                         DRM_DEBUG_KMS("force priority to high\n");
9204                 }
9205
9206                 /* watermark for low clocks */
9207                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9208                     rdev->pm.dpm_enabled) {
9209                         wm_low.yclk =
9210                                 radeon_dpm_get_mclk(rdev, true) * 10;
9211                         wm_low.sclk =
9212                                 radeon_dpm_get_sclk(rdev, true) * 10;
9213                 } else {
9214                         wm_low.yclk = rdev->pm.current_mclk * 10;
9215                         wm_low.sclk = rdev->pm.current_sclk * 10;
9216                 }
9217
9218                 wm_low.disp_clk = mode->clock;
9219                 wm_low.src_width = mode->crtc_hdisplay;
9220                 wm_low.active_time = active_time;
9221                 wm_low.blank_time = line_time - wm_low.active_time;
9222                 wm_low.interlaced = false;
9223                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9224                         wm_low.interlaced = true;
9225                 wm_low.vsc = radeon_crtc->vsc;
9226                 wm_low.vtaps = 1;
9227                 if (radeon_crtc->rmx_type != RMX_OFF)
9228                         wm_low.vtaps = 2;
9229                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9230                 wm_low.lb_size = lb_size;
9231                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9232                 wm_low.num_heads = num_heads;
9233
9234                 /* set for low clocks */
9235                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9236
9237                 /* possibly force display priority to high */
9238                 /* should really do this at mode validation time... */
9239                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9240                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9241                     !dce8_check_latency_hiding(&wm_low) ||
9242                     (rdev->disp_priority == 2)) {
9243                         DRM_DEBUG_KMS("force priority to high\n");
9244                 }
9245
9246                 /* Save number of lines the linebuffer leads before the scanout */
9247                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9248         }
9249
9250         /* select wm A */
9251         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9252         tmp = wm_mask;
9253         tmp &= ~LATENCY_WATERMARK_MASK(3);
9254         tmp |= LATENCY_WATERMARK_MASK(1);
9255         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9256         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9257                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9258                 LATENCY_HIGH_WATERMARK(line_time)));
9259         /* select wm B */
9260         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9261         tmp &= ~LATENCY_WATERMARK_MASK(3);
9262         tmp |= LATENCY_WATERMARK_MASK(2);
9263         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9264         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9265                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9266                 LATENCY_HIGH_WATERMARK(line_time)));
9267         /* restore original selection */
9268         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9269
9270         /* save values for DPM */
9271         radeon_crtc->line_time = line_time;
9272         radeon_crtc->wm_high = latency_watermark_a;
9273         radeon_crtc->wm_low = latency_watermark_b;
9274 }
9275
9276 /**
9277  * dce8_bandwidth_update - program display watermarks
9278  *
9279  * @rdev: radeon_device pointer
9280  *
9281  * Calculate and program the display watermarks and line
9282  * buffer allocation (CIK).
9283  */
9284 void dce8_bandwidth_update(struct radeon_device *rdev)
9285 {
9286         struct drm_display_mode *mode = NULL;
9287         u32 num_heads = 0, lb_size;
9288         int i;
9289
9290         if (!rdev->mode_info.mode_config_initialized)
9291                 return;
9292
9293         radeon_update_display_priority(rdev);
9294
9295         for (i = 0; i < rdev->num_crtc; i++) {
9296                 if (rdev->mode_info.crtcs[i]->base.enabled)
9297                         num_heads++;
9298         }
9299         for (i = 0; i < rdev->num_crtc; i++) {
9300                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9301                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9302                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9303         }
9304 }
9305
9306 /**
9307  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9308  *
9309  * @rdev: radeon_device pointer
9310  *
9311  * Fetches a GPU clock counter snapshot (SI).
9312  * Returns the 64 bit clock counter snapshot.
9313  */
9314 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9315 {
9316         uint64_t clock;
9317
9318         mutex_lock(&rdev->gpu_clock_mutex);
9319         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9320         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9321                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9322         mutex_unlock(&rdev->gpu_clock_mutex);
9323         return clock;
9324 }
9325
9326 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9327                              u32 cntl_reg, u32 status_reg)
9328 {
9329         int r, i;
9330         struct atom_clock_dividers dividers;
9331         uint32_t tmp;
9332
9333         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9334                                            clock, false, &dividers);
9335         if (r)
9336                 return r;
9337
9338         tmp = RREG32_SMC(cntl_reg);
9339         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9340         tmp |= dividers.post_divider;
9341         WREG32_SMC(cntl_reg, tmp);
9342
9343         for (i = 0; i < 100; i++) {
9344                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9345                         break;
9346                 mdelay(10);
9347         }
9348         if (i == 100)
9349                 return -ETIMEDOUT;
9350
9351         return 0;
9352 }
9353
9354 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9355 {
9356         int r = 0;
9357
9358         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9359         if (r)
9360                 return r;
9361
9362         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9363         return r;
9364 }
9365
9366 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9367 {
9368         int r, i;
9369         struct atom_clock_dividers dividers;
9370         u32 tmp;
9371
9372         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9373                                            ecclk, false, &dividers);
9374         if (r)
9375                 return r;
9376
9377         for (i = 0; i < 100; i++) {
9378                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9379                         break;
9380                 mdelay(10);
9381         }
9382         if (i == 100)
9383                 return -ETIMEDOUT;
9384
9385         tmp = RREG32_SMC(CG_ECLK_CNTL);
9386         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9387         tmp |= dividers.post_divider;
9388         WREG32_SMC(CG_ECLK_CNTL, tmp);
9389
9390         for (i = 0; i < 100; i++) {
9391                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9392                         break;
9393                 mdelay(10);
9394         }
9395         if (i == 100)
9396                 return -ETIMEDOUT;
9397
9398         return 0;
9399 }
9400
9401 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9402 {
9403         struct pci_dev *root = rdev->pdev->bus->self;
9404         enum pci_bus_speed speed_cap;
9405         u32 speed_cntl, current_data_rate;
9406         int i;
9407         u16 tmp16;
9408
9409         if (pci_is_root_bus(rdev->pdev->bus))
9410                 return;
9411
9412         if (radeon_pcie_gen2 == 0)
9413                 return;
9414
9415         if (rdev->flags & RADEON_IS_IGP)
9416                 return;
9417
9418         if (!(rdev->flags & RADEON_IS_PCIE))
9419                 return;
9420
9421         speed_cap = pcie_get_speed_cap(root);
9422         if (speed_cap == PCI_SPEED_UNKNOWN)
9423                 return;
9424
9425         if ((speed_cap != PCIE_SPEED_8_0GT) &&
9426             (speed_cap != PCIE_SPEED_5_0GT))
9427                 return;
9428
9429         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9430         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9431                 LC_CURRENT_DATA_RATE_SHIFT;
9432         if (speed_cap == PCIE_SPEED_8_0GT) {
9433                 if (current_data_rate == 2) {
9434                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9435                         return;
9436                 }
9437                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9438         } else if (speed_cap == PCIE_SPEED_5_0GT) {
9439                 if (current_data_rate == 1) {
9440                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9441                         return;
9442                 }
9443                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9444         }
9445
9446         if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
9447                 return;
9448
9449         if (speed_cap == PCIE_SPEED_8_0GT) {
9450                 /* re-try equalization if gen3 is not already enabled */
9451                 if (current_data_rate != 2) {
9452                         u16 bridge_cfg, gpu_cfg;
9453                         u16 bridge_cfg2, gpu_cfg2;
9454                         u32 max_lw, current_lw, tmp;
9455
9456                         pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9457                                                   &bridge_cfg);
9458                         pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
9459                                                   &gpu_cfg);
9460
9461                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9462                         pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
9463
9464                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9465                         pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
9466                                                    tmp16);
9467
9468                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9469                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9470                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9471
9472                         if (current_lw < max_lw) {
9473                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9474                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9475                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9476                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9477                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9478                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9479                                 }
9480                         }
9481
9482                         for (i = 0; i < 10; i++) {
9483                                 /* check status */
9484                                 pcie_capability_read_word(rdev->pdev,
9485                                                           PCI_EXP_DEVSTA,
9486                                                           &tmp16);
9487                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9488                                         break;
9489
9490                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9491                                                           &bridge_cfg);
9492                                 pcie_capability_read_word(rdev->pdev,
9493                                                           PCI_EXP_LNKCTL,
9494                                                           &gpu_cfg);
9495
9496                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9497                                                           &bridge_cfg2);
9498                                 pcie_capability_read_word(rdev->pdev,
9499                                                           PCI_EXP_LNKCTL2,
9500                                                           &gpu_cfg2);
9501
9502                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9503                                 tmp |= LC_SET_QUIESCE;
9504                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9505
9506                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9507                                 tmp |= LC_REDO_EQ;
9508                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9509
9510                                 msleep(100);
9511
9512                                 /* linkctl */
9513                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9514                                                           &tmp16);
9515                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9516                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9517                                 pcie_capability_write_word(root, PCI_EXP_LNKCTL,
9518                                                            tmp16);
9519
9520                                 pcie_capability_read_word(rdev->pdev,
9521                                                           PCI_EXP_LNKCTL,
9522                                                           &tmp16);
9523                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9524                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9525                                 pcie_capability_write_word(rdev->pdev,
9526                                                            PCI_EXP_LNKCTL,
9527                                                            tmp16);
9528
9529                                 /* linkctl2 */
9530                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9531                                                           &tmp16);
9532                                 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9533                                            PCI_EXP_LNKCTL2_TX_MARGIN);
9534                                 tmp16 |= (bridge_cfg2 &
9535                                           (PCI_EXP_LNKCTL2_ENTER_COMP |
9536                                            PCI_EXP_LNKCTL2_TX_MARGIN));
9537                                 pcie_capability_write_word(root,
9538                                                            PCI_EXP_LNKCTL2,
9539                                                            tmp16);
9540
9541                                 pcie_capability_read_word(rdev->pdev,
9542                                                           PCI_EXP_LNKCTL2,
9543                                                           &tmp16);
9544                                 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9545                                            PCI_EXP_LNKCTL2_TX_MARGIN);
9546                                 tmp16 |= (gpu_cfg2 &
9547                                           (PCI_EXP_LNKCTL2_ENTER_COMP |
9548                                            PCI_EXP_LNKCTL2_TX_MARGIN));
9549                                 pcie_capability_write_word(rdev->pdev,
9550                                                            PCI_EXP_LNKCTL2,
9551                                                            tmp16);
9552
9553                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9554                                 tmp &= ~LC_SET_QUIESCE;
9555                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9556                         }
9557                 }
9558         }
9559
9560         /* set the link speed */
9561         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9562         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9563         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9564
9565         pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
9566         tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
9567         if (speed_cap == PCIE_SPEED_8_0GT)
9568                 tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
9569         else if (speed_cap == PCIE_SPEED_5_0GT)
9570                 tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
9571         else
9572                 tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
9573         pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
9574
9575         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9576         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9577         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9578
9579         for (i = 0; i < rdev->usec_timeout; i++) {
9580                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9581                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9582                         break;
9583                 udelay(1);
9584         }
9585 }
9586
9587 static void cik_program_aspm(struct radeon_device *rdev)
9588 {
9589         u32 data, orig;
9590         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9591         bool disable_clkreq = false;
9592
9593         if (radeon_aspm == 0)
9594                 return;
9595
9596         /* XXX double check IGPs */
9597         if (rdev->flags & RADEON_IS_IGP)
9598                 return;
9599
9600         if (!(rdev->flags & RADEON_IS_PCIE))
9601                 return;
9602
9603         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9604         data &= ~LC_XMIT_N_FTS_MASK;
9605         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9606         if (orig != data)
9607                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9608
9609         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9610         data |= LC_GO_TO_RECOVERY;
9611         if (orig != data)
9612                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9613
9614         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9615         data |= P_IGNORE_EDB_ERR;
9616         if (orig != data)
9617                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9618
9619         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9620         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9621         data |= LC_PMI_TO_L1_DIS;
9622         if (!disable_l0s)
9623                 data |= LC_L0S_INACTIVITY(7);
9624
9625         if (!disable_l1) {
9626                 data |= LC_L1_INACTIVITY(7);
9627                 data &= ~LC_PMI_TO_L1_DIS;
9628                 if (orig != data)
9629                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9630
9631                 if (!disable_plloff_in_l1) {
9632                         bool clk_req_support;
9633
9634                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9635                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9636                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9637                         if (orig != data)
9638                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9639
9640                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9641                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9642                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9643                         if (orig != data)
9644                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9645
9646                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9647                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9648                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9649                         if (orig != data)
9650                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9651
9652                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9653                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9654                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9655                         if (orig != data)
9656                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9657
9658                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9659                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9660                         data |= LC_DYN_LANES_PWR_STATE(3);
9661                         if (orig != data)
9662                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9663
9664                         if (!disable_clkreq &&
9665                             !pci_is_root_bus(rdev->pdev->bus)) {
9666                                 struct pci_dev *root = rdev->pdev->bus->self;
9667                                 u32 lnkcap;
9668
9669                                 clk_req_support = false;
9670                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9671                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9672                                         clk_req_support = true;
9673                         } else {
9674                                 clk_req_support = false;
9675                         }
9676
9677                         if (clk_req_support) {
9678                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9679                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9680                                 if (orig != data)
9681                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9682
9683                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9684                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9685                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9686                                 if (orig != data)
9687                                         WREG32_SMC(THM_CLK_CNTL, data);
9688
9689                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9690                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9691                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9692                                 if (orig != data)
9693                                         WREG32_SMC(MISC_CLK_CTRL, data);
9694
9695                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9696                                 data &= ~BCLK_AS_XCLK;
9697                                 if (orig != data)
9698                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9699
9700                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9701                                 data &= ~FORCE_BIF_REFCLK_EN;
9702                                 if (orig != data)
9703                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9704
9705                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9706                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9707                                 data |= MPLL_CLKOUT_SEL(4);
9708                                 if (orig != data)
9709                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9710                         }
9711                 }
9712         } else {
9713                 if (orig != data)
9714                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9715         }
9716
9717         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9718         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9719         if (orig != data)
9720                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9721
9722         if (!disable_l0s) {
9723                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9724                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9725                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9726                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9727                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9728                                 data &= ~LC_L0S_INACTIVITY_MASK;
9729                                 if (orig != data)
9730                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9731                         }
9732                 }
9733         }
9734 }