GNU Linux-libre 5.19-rc6-gnu
[releases.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include <linux/slab.h>
29
30 #include <drm/drm_vblank.h>
31 #include <drm/radeon_drm.h>
32
33 #include "atom.h"
34 #include "clearstate_si.h"
35 #include "evergreen.h"
36 #include "r600.h"
37 #include "radeon.h"
38 #include "radeon_asic.h"
39 #include "radeon_audio.h"
40 #include "radeon_ucode.h"
41 #include "si_blit_shaders.h"
42 #include "si.h"
43 #include "sid.h"
44
45
46 /*(DEBLOBBED)*/
47
48 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
49 static void si_pcie_gen3_enable(struct radeon_device *rdev);
50 static void si_program_aspm(struct radeon_device *rdev);
51 extern void sumo_rlc_fini(struct radeon_device *rdev);
52 extern int sumo_rlc_init(struct radeon_device *rdev);
53 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
54                                          bool enable);
55 static void si_init_pg(struct radeon_device *rdev);
56 static void si_init_cg(struct radeon_device *rdev);
57 static void si_fini_pg(struct radeon_device *rdev);
58 static void si_fini_cg(struct radeon_device *rdev);
59 static void si_rlc_stop(struct radeon_device *rdev);
60
61 static const u32 crtc_offsets[] =
62 {
63         EVERGREEN_CRTC0_REGISTER_OFFSET,
64         EVERGREEN_CRTC1_REGISTER_OFFSET,
65         EVERGREEN_CRTC2_REGISTER_OFFSET,
66         EVERGREEN_CRTC3_REGISTER_OFFSET,
67         EVERGREEN_CRTC4_REGISTER_OFFSET,
68         EVERGREEN_CRTC5_REGISTER_OFFSET
69 };
70
71 static const u32 si_disp_int_status[] =
72 {
73         DISP_INTERRUPT_STATUS,
74         DISP_INTERRUPT_STATUS_CONTINUE,
75         DISP_INTERRUPT_STATUS_CONTINUE2,
76         DISP_INTERRUPT_STATUS_CONTINUE3,
77         DISP_INTERRUPT_STATUS_CONTINUE4,
78         DISP_INTERRUPT_STATUS_CONTINUE5
79 };
80
81 #define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
82 #define DC_HPDx_INT_CONTROL(x)    (DC_HPD1_INT_CONTROL + (x * 0xc))
83 #define DC_HPDx_INT_STATUS_REG(x) (DC_HPD1_INT_STATUS  + (x * 0xc))
84
85 static const u32 verde_rlc_save_restore_register_list[] =
86 {
87         (0x8000 << 16) | (0x98f4 >> 2),
88         0x00000000,
89         (0x8040 << 16) | (0x98f4 >> 2),
90         0x00000000,
91         (0x8000 << 16) | (0xe80 >> 2),
92         0x00000000,
93         (0x8040 << 16) | (0xe80 >> 2),
94         0x00000000,
95         (0x8000 << 16) | (0x89bc >> 2),
96         0x00000000,
97         (0x8040 << 16) | (0x89bc >> 2),
98         0x00000000,
99         (0x8000 << 16) | (0x8c1c >> 2),
100         0x00000000,
101         (0x8040 << 16) | (0x8c1c >> 2),
102         0x00000000,
103         (0x9c00 << 16) | (0x98f0 >> 2),
104         0x00000000,
105         (0x9c00 << 16) | (0xe7c >> 2),
106         0x00000000,
107         (0x8000 << 16) | (0x9148 >> 2),
108         0x00000000,
109         (0x8040 << 16) | (0x9148 >> 2),
110         0x00000000,
111         (0x9c00 << 16) | (0x9150 >> 2),
112         0x00000000,
113         (0x9c00 << 16) | (0x897c >> 2),
114         0x00000000,
115         (0x9c00 << 16) | (0x8d8c >> 2),
116         0x00000000,
117         (0x9c00 << 16) | (0xac54 >> 2),
118         0X00000000,
119         0x3,
120         (0x9c00 << 16) | (0x98f8 >> 2),
121         0x00000000,
122         (0x9c00 << 16) | (0x9910 >> 2),
123         0x00000000,
124         (0x9c00 << 16) | (0x9914 >> 2),
125         0x00000000,
126         (0x9c00 << 16) | (0x9918 >> 2),
127         0x00000000,
128         (0x9c00 << 16) | (0x991c >> 2),
129         0x00000000,
130         (0x9c00 << 16) | (0x9920 >> 2),
131         0x00000000,
132         (0x9c00 << 16) | (0x9924 >> 2),
133         0x00000000,
134         (0x9c00 << 16) | (0x9928 >> 2),
135         0x00000000,
136         (0x9c00 << 16) | (0x992c >> 2),
137         0x00000000,
138         (0x9c00 << 16) | (0x9930 >> 2),
139         0x00000000,
140         (0x9c00 << 16) | (0x9934 >> 2),
141         0x00000000,
142         (0x9c00 << 16) | (0x9938 >> 2),
143         0x00000000,
144         (0x9c00 << 16) | (0x993c >> 2),
145         0x00000000,
146         (0x9c00 << 16) | (0x9940 >> 2),
147         0x00000000,
148         (0x9c00 << 16) | (0x9944 >> 2),
149         0x00000000,
150         (0x9c00 << 16) | (0x9948 >> 2),
151         0x00000000,
152         (0x9c00 << 16) | (0x994c >> 2),
153         0x00000000,
154         (0x9c00 << 16) | (0x9950 >> 2),
155         0x00000000,
156         (0x9c00 << 16) | (0x9954 >> 2),
157         0x00000000,
158         (0x9c00 << 16) | (0x9958 >> 2),
159         0x00000000,
160         (0x9c00 << 16) | (0x995c >> 2),
161         0x00000000,
162         (0x9c00 << 16) | (0x9960 >> 2),
163         0x00000000,
164         (0x9c00 << 16) | (0x9964 >> 2),
165         0x00000000,
166         (0x9c00 << 16) | (0x9968 >> 2),
167         0x00000000,
168         (0x9c00 << 16) | (0x996c >> 2),
169         0x00000000,
170         (0x9c00 << 16) | (0x9970 >> 2),
171         0x00000000,
172         (0x9c00 << 16) | (0x9974 >> 2),
173         0x00000000,
174         (0x9c00 << 16) | (0x9978 >> 2),
175         0x00000000,
176         (0x9c00 << 16) | (0x997c >> 2),
177         0x00000000,
178         (0x9c00 << 16) | (0x9980 >> 2),
179         0x00000000,
180         (0x9c00 << 16) | (0x9984 >> 2),
181         0x00000000,
182         (0x9c00 << 16) | (0x9988 >> 2),
183         0x00000000,
184         (0x9c00 << 16) | (0x998c >> 2),
185         0x00000000,
186         (0x9c00 << 16) | (0x8c00 >> 2),
187         0x00000000,
188         (0x9c00 << 16) | (0x8c14 >> 2),
189         0x00000000,
190         (0x9c00 << 16) | (0x8c04 >> 2),
191         0x00000000,
192         (0x9c00 << 16) | (0x8c08 >> 2),
193         0x00000000,
194         (0x8000 << 16) | (0x9b7c >> 2),
195         0x00000000,
196         (0x8040 << 16) | (0x9b7c >> 2),
197         0x00000000,
198         (0x8000 << 16) | (0xe84 >> 2),
199         0x00000000,
200         (0x8040 << 16) | (0xe84 >> 2),
201         0x00000000,
202         (0x8000 << 16) | (0x89c0 >> 2),
203         0x00000000,
204         (0x8040 << 16) | (0x89c0 >> 2),
205         0x00000000,
206         (0x8000 << 16) | (0x914c >> 2),
207         0x00000000,
208         (0x8040 << 16) | (0x914c >> 2),
209         0x00000000,
210         (0x8000 << 16) | (0x8c20 >> 2),
211         0x00000000,
212         (0x8040 << 16) | (0x8c20 >> 2),
213         0x00000000,
214         (0x8000 << 16) | (0x9354 >> 2),
215         0x00000000,
216         (0x8040 << 16) | (0x9354 >> 2),
217         0x00000000,
218         (0x9c00 << 16) | (0x9060 >> 2),
219         0x00000000,
220         (0x9c00 << 16) | (0x9364 >> 2),
221         0x00000000,
222         (0x9c00 << 16) | (0x9100 >> 2),
223         0x00000000,
224         (0x9c00 << 16) | (0x913c >> 2),
225         0x00000000,
226         (0x8000 << 16) | (0x90e0 >> 2),
227         0x00000000,
228         (0x8000 << 16) | (0x90e4 >> 2),
229         0x00000000,
230         (0x8000 << 16) | (0x90e8 >> 2),
231         0x00000000,
232         (0x8040 << 16) | (0x90e0 >> 2),
233         0x00000000,
234         (0x8040 << 16) | (0x90e4 >> 2),
235         0x00000000,
236         (0x8040 << 16) | (0x90e8 >> 2),
237         0x00000000,
238         (0x9c00 << 16) | (0x8bcc >> 2),
239         0x00000000,
240         (0x9c00 << 16) | (0x8b24 >> 2),
241         0x00000000,
242         (0x9c00 << 16) | (0x88c4 >> 2),
243         0x00000000,
244         (0x9c00 << 16) | (0x8e50 >> 2),
245         0x00000000,
246         (0x9c00 << 16) | (0x8c0c >> 2),
247         0x00000000,
248         (0x9c00 << 16) | (0x8e58 >> 2),
249         0x00000000,
250         (0x9c00 << 16) | (0x8e5c >> 2),
251         0x00000000,
252         (0x9c00 << 16) | (0x9508 >> 2),
253         0x00000000,
254         (0x9c00 << 16) | (0x950c >> 2),
255         0x00000000,
256         (0x9c00 << 16) | (0x9494 >> 2),
257         0x00000000,
258         (0x9c00 << 16) | (0xac0c >> 2),
259         0x00000000,
260         (0x9c00 << 16) | (0xac10 >> 2),
261         0x00000000,
262         (0x9c00 << 16) | (0xac14 >> 2),
263         0x00000000,
264         (0x9c00 << 16) | (0xae00 >> 2),
265         0x00000000,
266         (0x9c00 << 16) | (0xac08 >> 2),
267         0x00000000,
268         (0x9c00 << 16) | (0x88d4 >> 2),
269         0x00000000,
270         (0x9c00 << 16) | (0x88c8 >> 2),
271         0x00000000,
272         (0x9c00 << 16) | (0x88cc >> 2),
273         0x00000000,
274         (0x9c00 << 16) | (0x89b0 >> 2),
275         0x00000000,
276         (0x9c00 << 16) | (0x8b10 >> 2),
277         0x00000000,
278         (0x9c00 << 16) | (0x8a14 >> 2),
279         0x00000000,
280         (0x9c00 << 16) | (0x9830 >> 2),
281         0x00000000,
282         (0x9c00 << 16) | (0x9834 >> 2),
283         0x00000000,
284         (0x9c00 << 16) | (0x9838 >> 2),
285         0x00000000,
286         (0x9c00 << 16) | (0x9a10 >> 2),
287         0x00000000,
288         (0x8000 << 16) | (0x9870 >> 2),
289         0x00000000,
290         (0x8000 << 16) | (0x9874 >> 2),
291         0x00000000,
292         (0x8001 << 16) | (0x9870 >> 2),
293         0x00000000,
294         (0x8001 << 16) | (0x9874 >> 2),
295         0x00000000,
296         (0x8040 << 16) | (0x9870 >> 2),
297         0x00000000,
298         (0x8040 << 16) | (0x9874 >> 2),
299         0x00000000,
300         (0x8041 << 16) | (0x9870 >> 2),
301         0x00000000,
302         (0x8041 << 16) | (0x9874 >> 2),
303         0x00000000,
304         0x00000000
305 };
306
307 static const u32 tahiti_golden_rlc_registers[] =
308 {
309         0xc424, 0xffffffff, 0x00601005,
310         0xc47c, 0xffffffff, 0x10104040,
311         0xc488, 0xffffffff, 0x0100000a,
312         0xc314, 0xffffffff, 0x00000800,
313         0xc30c, 0xffffffff, 0x800000f4,
314         0xf4a8, 0xffffffff, 0x00000000
315 };
316
317 static const u32 tahiti_golden_registers[] =
318 {
319         0x9a10, 0x00010000, 0x00018208,
320         0x9830, 0xffffffff, 0x00000000,
321         0x9834, 0xf00fffff, 0x00000400,
322         0x9838, 0x0002021c, 0x00020200,
323         0xc78, 0x00000080, 0x00000000,
324         0xd030, 0x000300c0, 0x00800040,
325         0xd830, 0x000300c0, 0x00800040,
326         0x5bb0, 0x000000f0, 0x00000070,
327         0x5bc0, 0x00200000, 0x50100000,
328         0x7030, 0x31000311, 0x00000011,
329         0x277c, 0x00000003, 0x000007ff,
330         0x240c, 0x000007ff, 0x00000000,
331         0x8a14, 0xf000001f, 0x00000007,
332         0x8b24, 0xffffffff, 0x00ffffff,
333         0x8b10, 0x0000ff0f, 0x00000000,
334         0x28a4c, 0x07ffffff, 0x4e000000,
335         0x28350, 0x3f3f3fff, 0x2a00126a,
336         0x30, 0x000000ff, 0x0040,
337         0x34, 0x00000040, 0x00004040,
338         0x9100, 0x07ffffff, 0x03000000,
339         0x8e88, 0x01ff1f3f, 0x00000000,
340         0x8e84, 0x01ff1f3f, 0x00000000,
341         0x9060, 0x0000007f, 0x00000020,
342         0x9508, 0x00010000, 0x00010000,
343         0xac14, 0x00000200, 0x000002fb,
344         0xac10, 0xffffffff, 0x0000543b,
345         0xac0c, 0xffffffff, 0xa9210876,
346         0x88d0, 0xffffffff, 0x000fff40,
347         0x88d4, 0x0000001f, 0x00000010,
348         0x1410, 0x20000000, 0x20fffed8,
349         0x15c0, 0x000c0fc0, 0x000c0400
350 };
351
352 static const u32 tahiti_golden_registers2[] =
353 {
354         0xc64, 0x00000001, 0x00000001
355 };
356
357 static const u32 pitcairn_golden_rlc_registers[] =
358 {
359         0xc424, 0xffffffff, 0x00601004,
360         0xc47c, 0xffffffff, 0x10102020,
361         0xc488, 0xffffffff, 0x01000020,
362         0xc314, 0xffffffff, 0x00000800,
363         0xc30c, 0xffffffff, 0x800000a4
364 };
365
366 static const u32 pitcairn_golden_registers[] =
367 {
368         0x9a10, 0x00010000, 0x00018208,
369         0x9830, 0xffffffff, 0x00000000,
370         0x9834, 0xf00fffff, 0x00000400,
371         0x9838, 0x0002021c, 0x00020200,
372         0xc78, 0x00000080, 0x00000000,
373         0xd030, 0x000300c0, 0x00800040,
374         0xd830, 0x000300c0, 0x00800040,
375         0x5bb0, 0x000000f0, 0x00000070,
376         0x5bc0, 0x00200000, 0x50100000,
377         0x7030, 0x31000311, 0x00000011,
378         0x2ae4, 0x00073ffe, 0x000022a2,
379         0x240c, 0x000007ff, 0x00000000,
380         0x8a14, 0xf000001f, 0x00000007,
381         0x8b24, 0xffffffff, 0x00ffffff,
382         0x8b10, 0x0000ff0f, 0x00000000,
383         0x28a4c, 0x07ffffff, 0x4e000000,
384         0x28350, 0x3f3f3fff, 0x2a00126a,
385         0x30, 0x000000ff, 0x0040,
386         0x34, 0x00000040, 0x00004040,
387         0x9100, 0x07ffffff, 0x03000000,
388         0x9060, 0x0000007f, 0x00000020,
389         0x9508, 0x00010000, 0x00010000,
390         0xac14, 0x000003ff, 0x000000f7,
391         0xac10, 0xffffffff, 0x00000000,
392         0xac0c, 0xffffffff, 0x32761054,
393         0x88d4, 0x0000001f, 0x00000010,
394         0x15c0, 0x000c0fc0, 0x000c0400
395 };
396
397 static const u32 verde_golden_rlc_registers[] =
398 {
399         0xc424, 0xffffffff, 0x033f1005,
400         0xc47c, 0xffffffff, 0x10808020,
401         0xc488, 0xffffffff, 0x00800008,
402         0xc314, 0xffffffff, 0x00001000,
403         0xc30c, 0xffffffff, 0x80010014
404 };
405
406 static const u32 verde_golden_registers[] =
407 {
408         0x9a10, 0x00010000, 0x00018208,
409         0x9830, 0xffffffff, 0x00000000,
410         0x9834, 0xf00fffff, 0x00000400,
411         0x9838, 0x0002021c, 0x00020200,
412         0xc78, 0x00000080, 0x00000000,
413         0xd030, 0x000300c0, 0x00800040,
414         0xd030, 0x000300c0, 0x00800040,
415         0xd830, 0x000300c0, 0x00800040,
416         0xd830, 0x000300c0, 0x00800040,
417         0x5bb0, 0x000000f0, 0x00000070,
418         0x5bc0, 0x00200000, 0x50100000,
419         0x7030, 0x31000311, 0x00000011,
420         0x2ae4, 0x00073ffe, 0x000022a2,
421         0x2ae4, 0x00073ffe, 0x000022a2,
422         0x2ae4, 0x00073ffe, 0x000022a2,
423         0x240c, 0x000007ff, 0x00000000,
424         0x240c, 0x000007ff, 0x00000000,
425         0x240c, 0x000007ff, 0x00000000,
426         0x8a14, 0xf000001f, 0x00000007,
427         0x8a14, 0xf000001f, 0x00000007,
428         0x8a14, 0xf000001f, 0x00000007,
429         0x8b24, 0xffffffff, 0x00ffffff,
430         0x8b10, 0x0000ff0f, 0x00000000,
431         0x28a4c, 0x07ffffff, 0x4e000000,
432         0x28350, 0x3f3f3fff, 0x0000124a,
433         0x28350, 0x3f3f3fff, 0x0000124a,
434         0x28350, 0x3f3f3fff, 0x0000124a,
435         0x30, 0x000000ff, 0x0040,
436         0x34, 0x00000040, 0x00004040,
437         0x9100, 0x07ffffff, 0x03000000,
438         0x9100, 0x07ffffff, 0x03000000,
439         0x8e88, 0x01ff1f3f, 0x00000000,
440         0x8e88, 0x01ff1f3f, 0x00000000,
441         0x8e88, 0x01ff1f3f, 0x00000000,
442         0x8e84, 0x01ff1f3f, 0x00000000,
443         0x8e84, 0x01ff1f3f, 0x00000000,
444         0x8e84, 0x01ff1f3f, 0x00000000,
445         0x9060, 0x0000007f, 0x00000020,
446         0x9508, 0x00010000, 0x00010000,
447         0xac14, 0x000003ff, 0x00000003,
448         0xac14, 0x000003ff, 0x00000003,
449         0xac14, 0x000003ff, 0x00000003,
450         0xac10, 0xffffffff, 0x00000000,
451         0xac10, 0xffffffff, 0x00000000,
452         0xac10, 0xffffffff, 0x00000000,
453         0xac0c, 0xffffffff, 0x00001032,
454         0xac0c, 0xffffffff, 0x00001032,
455         0xac0c, 0xffffffff, 0x00001032,
456         0x88d4, 0x0000001f, 0x00000010,
457         0x88d4, 0x0000001f, 0x00000010,
458         0x88d4, 0x0000001f, 0x00000010,
459         0x15c0, 0x000c0fc0, 0x000c0400
460 };
461
462 static const u32 oland_golden_rlc_registers[] =
463 {
464         0xc424, 0xffffffff, 0x00601005,
465         0xc47c, 0xffffffff, 0x10104040,
466         0xc488, 0xffffffff, 0x0100000a,
467         0xc314, 0xffffffff, 0x00000800,
468         0xc30c, 0xffffffff, 0x800000f4
469 };
470
471 static const u32 oland_golden_registers[] =
472 {
473         0x9a10, 0x00010000, 0x00018208,
474         0x9830, 0xffffffff, 0x00000000,
475         0x9834, 0xf00fffff, 0x00000400,
476         0x9838, 0x0002021c, 0x00020200,
477         0xc78, 0x00000080, 0x00000000,
478         0xd030, 0x000300c0, 0x00800040,
479         0xd830, 0x000300c0, 0x00800040,
480         0x5bb0, 0x000000f0, 0x00000070,
481         0x5bc0, 0x00200000, 0x50100000,
482         0x7030, 0x31000311, 0x00000011,
483         0x2ae4, 0x00073ffe, 0x000022a2,
484         0x240c, 0x000007ff, 0x00000000,
485         0x8a14, 0xf000001f, 0x00000007,
486         0x8b24, 0xffffffff, 0x00ffffff,
487         0x8b10, 0x0000ff0f, 0x00000000,
488         0x28a4c, 0x07ffffff, 0x4e000000,
489         0x28350, 0x3f3f3fff, 0x00000082,
490         0x30, 0x000000ff, 0x0040,
491         0x34, 0x00000040, 0x00004040,
492         0x9100, 0x07ffffff, 0x03000000,
493         0x9060, 0x0000007f, 0x00000020,
494         0x9508, 0x00010000, 0x00010000,
495         0xac14, 0x000003ff, 0x000000f3,
496         0xac10, 0xffffffff, 0x00000000,
497         0xac0c, 0xffffffff, 0x00003210,
498         0x88d4, 0x0000001f, 0x00000010,
499         0x15c0, 0x000c0fc0, 0x000c0400
500 };
501
502 static const u32 hainan_golden_registers[] =
503 {
504         0x9a10, 0x00010000, 0x00018208,
505         0x9830, 0xffffffff, 0x00000000,
506         0x9834, 0xf00fffff, 0x00000400,
507         0x9838, 0x0002021c, 0x00020200,
508         0xd0c0, 0xff000fff, 0x00000100,
509         0xd030, 0x000300c0, 0x00800040,
510         0xd8c0, 0xff000fff, 0x00000100,
511         0xd830, 0x000300c0, 0x00800040,
512         0x2ae4, 0x00073ffe, 0x000022a2,
513         0x240c, 0x000007ff, 0x00000000,
514         0x8a14, 0xf000001f, 0x00000007,
515         0x8b24, 0xffffffff, 0x00ffffff,
516         0x8b10, 0x0000ff0f, 0x00000000,
517         0x28a4c, 0x07ffffff, 0x4e000000,
518         0x28350, 0x3f3f3fff, 0x00000000,
519         0x30, 0x000000ff, 0x0040,
520         0x34, 0x00000040, 0x00004040,
521         0x9100, 0x03e00000, 0x03600000,
522         0x9060, 0x0000007f, 0x00000020,
523         0x9508, 0x00010000, 0x00010000,
524         0xac14, 0x000003ff, 0x000000f1,
525         0xac10, 0xffffffff, 0x00000000,
526         0xac0c, 0xffffffff, 0x00003210,
527         0x88d4, 0x0000001f, 0x00000010,
528         0x15c0, 0x000c0fc0, 0x000c0400
529 };
530
531 static const u32 hainan_golden_registers2[] =
532 {
533         0x98f8, 0xffffffff, 0x02010001
534 };
535
536 static const u32 tahiti_mgcg_cgcg_init[] =
537 {
538         0xc400, 0xffffffff, 0xfffffffc,
539         0x802c, 0xffffffff, 0xe0000000,
540         0x9a60, 0xffffffff, 0x00000100,
541         0x92a4, 0xffffffff, 0x00000100,
542         0xc164, 0xffffffff, 0x00000100,
543         0x9774, 0xffffffff, 0x00000100,
544         0x8984, 0xffffffff, 0x06000100,
545         0x8a18, 0xffffffff, 0x00000100,
546         0x92a0, 0xffffffff, 0x00000100,
547         0xc380, 0xffffffff, 0x00000100,
548         0x8b28, 0xffffffff, 0x00000100,
549         0x9144, 0xffffffff, 0x00000100,
550         0x8d88, 0xffffffff, 0x00000100,
551         0x8d8c, 0xffffffff, 0x00000100,
552         0x9030, 0xffffffff, 0x00000100,
553         0x9034, 0xffffffff, 0x00000100,
554         0x9038, 0xffffffff, 0x00000100,
555         0x903c, 0xffffffff, 0x00000100,
556         0xad80, 0xffffffff, 0x00000100,
557         0xac54, 0xffffffff, 0x00000100,
558         0x897c, 0xffffffff, 0x06000100,
559         0x9868, 0xffffffff, 0x00000100,
560         0x9510, 0xffffffff, 0x00000100,
561         0xaf04, 0xffffffff, 0x00000100,
562         0xae04, 0xffffffff, 0x00000100,
563         0x949c, 0xffffffff, 0x00000100,
564         0x802c, 0xffffffff, 0xe0000000,
565         0x9160, 0xffffffff, 0x00010000,
566         0x9164, 0xffffffff, 0x00030002,
567         0x9168, 0xffffffff, 0x00040007,
568         0x916c, 0xffffffff, 0x00060005,
569         0x9170, 0xffffffff, 0x00090008,
570         0x9174, 0xffffffff, 0x00020001,
571         0x9178, 0xffffffff, 0x00040003,
572         0x917c, 0xffffffff, 0x00000007,
573         0x9180, 0xffffffff, 0x00060005,
574         0x9184, 0xffffffff, 0x00090008,
575         0x9188, 0xffffffff, 0x00030002,
576         0x918c, 0xffffffff, 0x00050004,
577         0x9190, 0xffffffff, 0x00000008,
578         0x9194, 0xffffffff, 0x00070006,
579         0x9198, 0xffffffff, 0x000a0009,
580         0x919c, 0xffffffff, 0x00040003,
581         0x91a0, 0xffffffff, 0x00060005,
582         0x91a4, 0xffffffff, 0x00000009,
583         0x91a8, 0xffffffff, 0x00080007,
584         0x91ac, 0xffffffff, 0x000b000a,
585         0x91b0, 0xffffffff, 0x00050004,
586         0x91b4, 0xffffffff, 0x00070006,
587         0x91b8, 0xffffffff, 0x0008000b,
588         0x91bc, 0xffffffff, 0x000a0009,
589         0x91c0, 0xffffffff, 0x000d000c,
590         0x91c4, 0xffffffff, 0x00060005,
591         0x91c8, 0xffffffff, 0x00080007,
592         0x91cc, 0xffffffff, 0x0000000b,
593         0x91d0, 0xffffffff, 0x000a0009,
594         0x91d4, 0xffffffff, 0x000d000c,
595         0x91d8, 0xffffffff, 0x00070006,
596         0x91dc, 0xffffffff, 0x00090008,
597         0x91e0, 0xffffffff, 0x0000000c,
598         0x91e4, 0xffffffff, 0x000b000a,
599         0x91e8, 0xffffffff, 0x000e000d,
600         0x91ec, 0xffffffff, 0x00080007,
601         0x91f0, 0xffffffff, 0x000a0009,
602         0x91f4, 0xffffffff, 0x0000000d,
603         0x91f8, 0xffffffff, 0x000c000b,
604         0x91fc, 0xffffffff, 0x000f000e,
605         0x9200, 0xffffffff, 0x00090008,
606         0x9204, 0xffffffff, 0x000b000a,
607         0x9208, 0xffffffff, 0x000c000f,
608         0x920c, 0xffffffff, 0x000e000d,
609         0x9210, 0xffffffff, 0x00110010,
610         0x9214, 0xffffffff, 0x000a0009,
611         0x9218, 0xffffffff, 0x000c000b,
612         0x921c, 0xffffffff, 0x0000000f,
613         0x9220, 0xffffffff, 0x000e000d,
614         0x9224, 0xffffffff, 0x00110010,
615         0x9228, 0xffffffff, 0x000b000a,
616         0x922c, 0xffffffff, 0x000d000c,
617         0x9230, 0xffffffff, 0x00000010,
618         0x9234, 0xffffffff, 0x000f000e,
619         0x9238, 0xffffffff, 0x00120011,
620         0x923c, 0xffffffff, 0x000c000b,
621         0x9240, 0xffffffff, 0x000e000d,
622         0x9244, 0xffffffff, 0x00000011,
623         0x9248, 0xffffffff, 0x0010000f,
624         0x924c, 0xffffffff, 0x00130012,
625         0x9250, 0xffffffff, 0x000d000c,
626         0x9254, 0xffffffff, 0x000f000e,
627         0x9258, 0xffffffff, 0x00100013,
628         0x925c, 0xffffffff, 0x00120011,
629         0x9260, 0xffffffff, 0x00150014,
630         0x9264, 0xffffffff, 0x000e000d,
631         0x9268, 0xffffffff, 0x0010000f,
632         0x926c, 0xffffffff, 0x00000013,
633         0x9270, 0xffffffff, 0x00120011,
634         0x9274, 0xffffffff, 0x00150014,
635         0x9278, 0xffffffff, 0x000f000e,
636         0x927c, 0xffffffff, 0x00110010,
637         0x9280, 0xffffffff, 0x00000014,
638         0x9284, 0xffffffff, 0x00130012,
639         0x9288, 0xffffffff, 0x00160015,
640         0x928c, 0xffffffff, 0x0010000f,
641         0x9290, 0xffffffff, 0x00120011,
642         0x9294, 0xffffffff, 0x00000015,
643         0x9298, 0xffffffff, 0x00140013,
644         0x929c, 0xffffffff, 0x00170016,
645         0x9150, 0xffffffff, 0x96940200,
646         0x8708, 0xffffffff, 0x00900100,
647         0xc478, 0xffffffff, 0x00000080,
648         0xc404, 0xffffffff, 0x0020003f,
649         0x30, 0xffffffff, 0x0000001c,
650         0x34, 0x000f0000, 0x000f0000,
651         0x160c, 0xffffffff, 0x00000100,
652         0x1024, 0xffffffff, 0x00000100,
653         0x102c, 0x00000101, 0x00000000,
654         0x20a8, 0xffffffff, 0x00000104,
655         0x264c, 0x000c0000, 0x000c0000,
656         0x2648, 0x000c0000, 0x000c0000,
657         0x55e4, 0xff000fff, 0x00000100,
658         0x55e8, 0x00000001, 0x00000001,
659         0x2f50, 0x00000001, 0x00000001,
660         0x30cc, 0xc0000fff, 0x00000104,
661         0xc1e4, 0x00000001, 0x00000001,
662         0xd0c0, 0xfffffff0, 0x00000100,
663         0xd8c0, 0xfffffff0, 0x00000100
664 };
665
666 static const u32 pitcairn_mgcg_cgcg_init[] =
667 {
668         0xc400, 0xffffffff, 0xfffffffc,
669         0x802c, 0xffffffff, 0xe0000000,
670         0x9a60, 0xffffffff, 0x00000100,
671         0x92a4, 0xffffffff, 0x00000100,
672         0xc164, 0xffffffff, 0x00000100,
673         0x9774, 0xffffffff, 0x00000100,
674         0x8984, 0xffffffff, 0x06000100,
675         0x8a18, 0xffffffff, 0x00000100,
676         0x92a0, 0xffffffff, 0x00000100,
677         0xc380, 0xffffffff, 0x00000100,
678         0x8b28, 0xffffffff, 0x00000100,
679         0x9144, 0xffffffff, 0x00000100,
680         0x8d88, 0xffffffff, 0x00000100,
681         0x8d8c, 0xffffffff, 0x00000100,
682         0x9030, 0xffffffff, 0x00000100,
683         0x9034, 0xffffffff, 0x00000100,
684         0x9038, 0xffffffff, 0x00000100,
685         0x903c, 0xffffffff, 0x00000100,
686         0xad80, 0xffffffff, 0x00000100,
687         0xac54, 0xffffffff, 0x00000100,
688         0x897c, 0xffffffff, 0x06000100,
689         0x9868, 0xffffffff, 0x00000100,
690         0x9510, 0xffffffff, 0x00000100,
691         0xaf04, 0xffffffff, 0x00000100,
692         0xae04, 0xffffffff, 0x00000100,
693         0x949c, 0xffffffff, 0x00000100,
694         0x802c, 0xffffffff, 0xe0000000,
695         0x9160, 0xffffffff, 0x00010000,
696         0x9164, 0xffffffff, 0x00030002,
697         0x9168, 0xffffffff, 0x00040007,
698         0x916c, 0xffffffff, 0x00060005,
699         0x9170, 0xffffffff, 0x00090008,
700         0x9174, 0xffffffff, 0x00020001,
701         0x9178, 0xffffffff, 0x00040003,
702         0x917c, 0xffffffff, 0x00000007,
703         0x9180, 0xffffffff, 0x00060005,
704         0x9184, 0xffffffff, 0x00090008,
705         0x9188, 0xffffffff, 0x00030002,
706         0x918c, 0xffffffff, 0x00050004,
707         0x9190, 0xffffffff, 0x00000008,
708         0x9194, 0xffffffff, 0x00070006,
709         0x9198, 0xffffffff, 0x000a0009,
710         0x919c, 0xffffffff, 0x00040003,
711         0x91a0, 0xffffffff, 0x00060005,
712         0x91a4, 0xffffffff, 0x00000009,
713         0x91a8, 0xffffffff, 0x00080007,
714         0x91ac, 0xffffffff, 0x000b000a,
715         0x91b0, 0xffffffff, 0x00050004,
716         0x91b4, 0xffffffff, 0x00070006,
717         0x91b8, 0xffffffff, 0x0008000b,
718         0x91bc, 0xffffffff, 0x000a0009,
719         0x91c0, 0xffffffff, 0x000d000c,
720         0x9200, 0xffffffff, 0x00090008,
721         0x9204, 0xffffffff, 0x000b000a,
722         0x9208, 0xffffffff, 0x000c000f,
723         0x920c, 0xffffffff, 0x000e000d,
724         0x9210, 0xffffffff, 0x00110010,
725         0x9214, 0xffffffff, 0x000a0009,
726         0x9218, 0xffffffff, 0x000c000b,
727         0x921c, 0xffffffff, 0x0000000f,
728         0x9220, 0xffffffff, 0x000e000d,
729         0x9224, 0xffffffff, 0x00110010,
730         0x9228, 0xffffffff, 0x000b000a,
731         0x922c, 0xffffffff, 0x000d000c,
732         0x9230, 0xffffffff, 0x00000010,
733         0x9234, 0xffffffff, 0x000f000e,
734         0x9238, 0xffffffff, 0x00120011,
735         0x923c, 0xffffffff, 0x000c000b,
736         0x9240, 0xffffffff, 0x000e000d,
737         0x9244, 0xffffffff, 0x00000011,
738         0x9248, 0xffffffff, 0x0010000f,
739         0x924c, 0xffffffff, 0x00130012,
740         0x9250, 0xffffffff, 0x000d000c,
741         0x9254, 0xffffffff, 0x000f000e,
742         0x9258, 0xffffffff, 0x00100013,
743         0x925c, 0xffffffff, 0x00120011,
744         0x9260, 0xffffffff, 0x00150014,
745         0x9150, 0xffffffff, 0x96940200,
746         0x8708, 0xffffffff, 0x00900100,
747         0xc478, 0xffffffff, 0x00000080,
748         0xc404, 0xffffffff, 0x0020003f,
749         0x30, 0xffffffff, 0x0000001c,
750         0x34, 0x000f0000, 0x000f0000,
751         0x160c, 0xffffffff, 0x00000100,
752         0x1024, 0xffffffff, 0x00000100,
753         0x102c, 0x00000101, 0x00000000,
754         0x20a8, 0xffffffff, 0x00000104,
755         0x55e4, 0xff000fff, 0x00000100,
756         0x55e8, 0x00000001, 0x00000001,
757         0x2f50, 0x00000001, 0x00000001,
758         0x30cc, 0xc0000fff, 0x00000104,
759         0xc1e4, 0x00000001, 0x00000001,
760         0xd0c0, 0xfffffff0, 0x00000100,
761         0xd8c0, 0xfffffff0, 0x00000100
762 };
763
764 static const u32 verde_mgcg_cgcg_init[] =
765 {
766         0xc400, 0xffffffff, 0xfffffffc,
767         0x802c, 0xffffffff, 0xe0000000,
768         0x9a60, 0xffffffff, 0x00000100,
769         0x92a4, 0xffffffff, 0x00000100,
770         0xc164, 0xffffffff, 0x00000100,
771         0x9774, 0xffffffff, 0x00000100,
772         0x8984, 0xffffffff, 0x06000100,
773         0x8a18, 0xffffffff, 0x00000100,
774         0x92a0, 0xffffffff, 0x00000100,
775         0xc380, 0xffffffff, 0x00000100,
776         0x8b28, 0xffffffff, 0x00000100,
777         0x9144, 0xffffffff, 0x00000100,
778         0x8d88, 0xffffffff, 0x00000100,
779         0x8d8c, 0xffffffff, 0x00000100,
780         0x9030, 0xffffffff, 0x00000100,
781         0x9034, 0xffffffff, 0x00000100,
782         0x9038, 0xffffffff, 0x00000100,
783         0x903c, 0xffffffff, 0x00000100,
784         0xad80, 0xffffffff, 0x00000100,
785         0xac54, 0xffffffff, 0x00000100,
786         0x897c, 0xffffffff, 0x06000100,
787         0x9868, 0xffffffff, 0x00000100,
788         0x9510, 0xffffffff, 0x00000100,
789         0xaf04, 0xffffffff, 0x00000100,
790         0xae04, 0xffffffff, 0x00000100,
791         0x949c, 0xffffffff, 0x00000100,
792         0x802c, 0xffffffff, 0xe0000000,
793         0x9160, 0xffffffff, 0x00010000,
794         0x9164, 0xffffffff, 0x00030002,
795         0x9168, 0xffffffff, 0x00040007,
796         0x916c, 0xffffffff, 0x00060005,
797         0x9170, 0xffffffff, 0x00090008,
798         0x9174, 0xffffffff, 0x00020001,
799         0x9178, 0xffffffff, 0x00040003,
800         0x917c, 0xffffffff, 0x00000007,
801         0x9180, 0xffffffff, 0x00060005,
802         0x9184, 0xffffffff, 0x00090008,
803         0x9188, 0xffffffff, 0x00030002,
804         0x918c, 0xffffffff, 0x00050004,
805         0x9190, 0xffffffff, 0x00000008,
806         0x9194, 0xffffffff, 0x00070006,
807         0x9198, 0xffffffff, 0x000a0009,
808         0x919c, 0xffffffff, 0x00040003,
809         0x91a0, 0xffffffff, 0x00060005,
810         0x91a4, 0xffffffff, 0x00000009,
811         0x91a8, 0xffffffff, 0x00080007,
812         0x91ac, 0xffffffff, 0x000b000a,
813         0x91b0, 0xffffffff, 0x00050004,
814         0x91b4, 0xffffffff, 0x00070006,
815         0x91b8, 0xffffffff, 0x0008000b,
816         0x91bc, 0xffffffff, 0x000a0009,
817         0x91c0, 0xffffffff, 0x000d000c,
818         0x9200, 0xffffffff, 0x00090008,
819         0x9204, 0xffffffff, 0x000b000a,
820         0x9208, 0xffffffff, 0x000c000f,
821         0x920c, 0xffffffff, 0x000e000d,
822         0x9210, 0xffffffff, 0x00110010,
823         0x9214, 0xffffffff, 0x000a0009,
824         0x9218, 0xffffffff, 0x000c000b,
825         0x921c, 0xffffffff, 0x0000000f,
826         0x9220, 0xffffffff, 0x000e000d,
827         0x9224, 0xffffffff, 0x00110010,
828         0x9228, 0xffffffff, 0x000b000a,
829         0x922c, 0xffffffff, 0x000d000c,
830         0x9230, 0xffffffff, 0x00000010,
831         0x9234, 0xffffffff, 0x000f000e,
832         0x9238, 0xffffffff, 0x00120011,
833         0x923c, 0xffffffff, 0x000c000b,
834         0x9240, 0xffffffff, 0x000e000d,
835         0x9244, 0xffffffff, 0x00000011,
836         0x9248, 0xffffffff, 0x0010000f,
837         0x924c, 0xffffffff, 0x00130012,
838         0x9250, 0xffffffff, 0x000d000c,
839         0x9254, 0xffffffff, 0x000f000e,
840         0x9258, 0xffffffff, 0x00100013,
841         0x925c, 0xffffffff, 0x00120011,
842         0x9260, 0xffffffff, 0x00150014,
843         0x9150, 0xffffffff, 0x96940200,
844         0x8708, 0xffffffff, 0x00900100,
845         0xc478, 0xffffffff, 0x00000080,
846         0xc404, 0xffffffff, 0x0020003f,
847         0x30, 0xffffffff, 0x0000001c,
848         0x34, 0x000f0000, 0x000f0000,
849         0x160c, 0xffffffff, 0x00000100,
850         0x1024, 0xffffffff, 0x00000100,
851         0x102c, 0x00000101, 0x00000000,
852         0x20a8, 0xffffffff, 0x00000104,
853         0x264c, 0x000c0000, 0x000c0000,
854         0x2648, 0x000c0000, 0x000c0000,
855         0x55e4, 0xff000fff, 0x00000100,
856         0x55e8, 0x00000001, 0x00000001,
857         0x2f50, 0x00000001, 0x00000001,
858         0x30cc, 0xc0000fff, 0x00000104,
859         0xc1e4, 0x00000001, 0x00000001,
860         0xd0c0, 0xfffffff0, 0x00000100,
861         0xd8c0, 0xfffffff0, 0x00000100
862 };
863
864 static const u32 oland_mgcg_cgcg_init[] =
865 {
866         0xc400, 0xffffffff, 0xfffffffc,
867         0x802c, 0xffffffff, 0xe0000000,
868         0x9a60, 0xffffffff, 0x00000100,
869         0x92a4, 0xffffffff, 0x00000100,
870         0xc164, 0xffffffff, 0x00000100,
871         0x9774, 0xffffffff, 0x00000100,
872         0x8984, 0xffffffff, 0x06000100,
873         0x8a18, 0xffffffff, 0x00000100,
874         0x92a0, 0xffffffff, 0x00000100,
875         0xc380, 0xffffffff, 0x00000100,
876         0x8b28, 0xffffffff, 0x00000100,
877         0x9144, 0xffffffff, 0x00000100,
878         0x8d88, 0xffffffff, 0x00000100,
879         0x8d8c, 0xffffffff, 0x00000100,
880         0x9030, 0xffffffff, 0x00000100,
881         0x9034, 0xffffffff, 0x00000100,
882         0x9038, 0xffffffff, 0x00000100,
883         0x903c, 0xffffffff, 0x00000100,
884         0xad80, 0xffffffff, 0x00000100,
885         0xac54, 0xffffffff, 0x00000100,
886         0x897c, 0xffffffff, 0x06000100,
887         0x9868, 0xffffffff, 0x00000100,
888         0x9510, 0xffffffff, 0x00000100,
889         0xaf04, 0xffffffff, 0x00000100,
890         0xae04, 0xffffffff, 0x00000100,
891         0x949c, 0xffffffff, 0x00000100,
892         0x802c, 0xffffffff, 0xe0000000,
893         0x9160, 0xffffffff, 0x00010000,
894         0x9164, 0xffffffff, 0x00030002,
895         0x9168, 0xffffffff, 0x00040007,
896         0x916c, 0xffffffff, 0x00060005,
897         0x9170, 0xffffffff, 0x00090008,
898         0x9174, 0xffffffff, 0x00020001,
899         0x9178, 0xffffffff, 0x00040003,
900         0x917c, 0xffffffff, 0x00000007,
901         0x9180, 0xffffffff, 0x00060005,
902         0x9184, 0xffffffff, 0x00090008,
903         0x9188, 0xffffffff, 0x00030002,
904         0x918c, 0xffffffff, 0x00050004,
905         0x9190, 0xffffffff, 0x00000008,
906         0x9194, 0xffffffff, 0x00070006,
907         0x9198, 0xffffffff, 0x000a0009,
908         0x919c, 0xffffffff, 0x00040003,
909         0x91a0, 0xffffffff, 0x00060005,
910         0x91a4, 0xffffffff, 0x00000009,
911         0x91a8, 0xffffffff, 0x00080007,
912         0x91ac, 0xffffffff, 0x000b000a,
913         0x91b0, 0xffffffff, 0x00050004,
914         0x91b4, 0xffffffff, 0x00070006,
915         0x91b8, 0xffffffff, 0x0008000b,
916         0x91bc, 0xffffffff, 0x000a0009,
917         0x91c0, 0xffffffff, 0x000d000c,
918         0x91c4, 0xffffffff, 0x00060005,
919         0x91c8, 0xffffffff, 0x00080007,
920         0x91cc, 0xffffffff, 0x0000000b,
921         0x91d0, 0xffffffff, 0x000a0009,
922         0x91d4, 0xffffffff, 0x000d000c,
923         0x9150, 0xffffffff, 0x96940200,
924         0x8708, 0xffffffff, 0x00900100,
925         0xc478, 0xffffffff, 0x00000080,
926         0xc404, 0xffffffff, 0x0020003f,
927         0x30, 0xffffffff, 0x0000001c,
928         0x34, 0x000f0000, 0x000f0000,
929         0x160c, 0xffffffff, 0x00000100,
930         0x1024, 0xffffffff, 0x00000100,
931         0x102c, 0x00000101, 0x00000000,
932         0x20a8, 0xffffffff, 0x00000104,
933         0x264c, 0x000c0000, 0x000c0000,
934         0x2648, 0x000c0000, 0x000c0000,
935         0x55e4, 0xff000fff, 0x00000100,
936         0x55e8, 0x00000001, 0x00000001,
937         0x2f50, 0x00000001, 0x00000001,
938         0x30cc, 0xc0000fff, 0x00000104,
939         0xc1e4, 0x00000001, 0x00000001,
940         0xd0c0, 0xfffffff0, 0x00000100,
941         0xd8c0, 0xfffffff0, 0x00000100
942 };
943
944 static const u32 hainan_mgcg_cgcg_init[] =
945 {
946         0xc400, 0xffffffff, 0xfffffffc,
947         0x802c, 0xffffffff, 0xe0000000,
948         0x9a60, 0xffffffff, 0x00000100,
949         0x92a4, 0xffffffff, 0x00000100,
950         0xc164, 0xffffffff, 0x00000100,
951         0x9774, 0xffffffff, 0x00000100,
952         0x8984, 0xffffffff, 0x06000100,
953         0x8a18, 0xffffffff, 0x00000100,
954         0x92a0, 0xffffffff, 0x00000100,
955         0xc380, 0xffffffff, 0x00000100,
956         0x8b28, 0xffffffff, 0x00000100,
957         0x9144, 0xffffffff, 0x00000100,
958         0x8d88, 0xffffffff, 0x00000100,
959         0x8d8c, 0xffffffff, 0x00000100,
960         0x9030, 0xffffffff, 0x00000100,
961         0x9034, 0xffffffff, 0x00000100,
962         0x9038, 0xffffffff, 0x00000100,
963         0x903c, 0xffffffff, 0x00000100,
964         0xad80, 0xffffffff, 0x00000100,
965         0xac54, 0xffffffff, 0x00000100,
966         0x897c, 0xffffffff, 0x06000100,
967         0x9868, 0xffffffff, 0x00000100,
968         0x9510, 0xffffffff, 0x00000100,
969         0xaf04, 0xffffffff, 0x00000100,
970         0xae04, 0xffffffff, 0x00000100,
971         0x949c, 0xffffffff, 0x00000100,
972         0x802c, 0xffffffff, 0xe0000000,
973         0x9160, 0xffffffff, 0x00010000,
974         0x9164, 0xffffffff, 0x00030002,
975         0x9168, 0xffffffff, 0x00040007,
976         0x916c, 0xffffffff, 0x00060005,
977         0x9170, 0xffffffff, 0x00090008,
978         0x9174, 0xffffffff, 0x00020001,
979         0x9178, 0xffffffff, 0x00040003,
980         0x917c, 0xffffffff, 0x00000007,
981         0x9180, 0xffffffff, 0x00060005,
982         0x9184, 0xffffffff, 0x00090008,
983         0x9188, 0xffffffff, 0x00030002,
984         0x918c, 0xffffffff, 0x00050004,
985         0x9190, 0xffffffff, 0x00000008,
986         0x9194, 0xffffffff, 0x00070006,
987         0x9198, 0xffffffff, 0x000a0009,
988         0x919c, 0xffffffff, 0x00040003,
989         0x91a0, 0xffffffff, 0x00060005,
990         0x91a4, 0xffffffff, 0x00000009,
991         0x91a8, 0xffffffff, 0x00080007,
992         0x91ac, 0xffffffff, 0x000b000a,
993         0x91b0, 0xffffffff, 0x00050004,
994         0x91b4, 0xffffffff, 0x00070006,
995         0x91b8, 0xffffffff, 0x0008000b,
996         0x91bc, 0xffffffff, 0x000a0009,
997         0x91c0, 0xffffffff, 0x000d000c,
998         0x91c4, 0xffffffff, 0x00060005,
999         0x91c8, 0xffffffff, 0x00080007,
1000         0x91cc, 0xffffffff, 0x0000000b,
1001         0x91d0, 0xffffffff, 0x000a0009,
1002         0x91d4, 0xffffffff, 0x000d000c,
1003         0x9150, 0xffffffff, 0x96940200,
1004         0x8708, 0xffffffff, 0x00900100,
1005         0xc478, 0xffffffff, 0x00000080,
1006         0xc404, 0xffffffff, 0x0020003f,
1007         0x30, 0xffffffff, 0x0000001c,
1008         0x34, 0x000f0000, 0x000f0000,
1009         0x160c, 0xffffffff, 0x00000100,
1010         0x1024, 0xffffffff, 0x00000100,
1011         0x20a8, 0xffffffff, 0x00000104,
1012         0x264c, 0x000c0000, 0x000c0000,
1013         0x2648, 0x000c0000, 0x000c0000,
1014         0x2f50, 0x00000001, 0x00000001,
1015         0x30cc, 0xc0000fff, 0x00000104,
1016         0xc1e4, 0x00000001, 0x00000001,
1017         0xd0c0, 0xfffffff0, 0x00000100,
1018         0xd8c0, 0xfffffff0, 0x00000100
1019 };
1020
1021 static u32 verde_pg_init[] =
1022 {
1023         0x353c, 0xffffffff, 0x40000,
1024         0x3538, 0xffffffff, 0x200010ff,
1025         0x353c, 0xffffffff, 0x0,
1026         0x353c, 0xffffffff, 0x0,
1027         0x353c, 0xffffffff, 0x0,
1028         0x353c, 0xffffffff, 0x0,
1029         0x353c, 0xffffffff, 0x0,
1030         0x353c, 0xffffffff, 0x7007,
1031         0x3538, 0xffffffff, 0x300010ff,
1032         0x353c, 0xffffffff, 0x0,
1033         0x353c, 0xffffffff, 0x0,
1034         0x353c, 0xffffffff, 0x0,
1035         0x353c, 0xffffffff, 0x0,
1036         0x353c, 0xffffffff, 0x0,
1037         0x353c, 0xffffffff, 0x400000,
1038         0x3538, 0xffffffff, 0x100010ff,
1039         0x353c, 0xffffffff, 0x0,
1040         0x353c, 0xffffffff, 0x0,
1041         0x353c, 0xffffffff, 0x0,
1042         0x353c, 0xffffffff, 0x0,
1043         0x353c, 0xffffffff, 0x0,
1044         0x353c, 0xffffffff, 0x120200,
1045         0x3538, 0xffffffff, 0x500010ff,
1046         0x353c, 0xffffffff, 0x0,
1047         0x353c, 0xffffffff, 0x0,
1048         0x353c, 0xffffffff, 0x0,
1049         0x353c, 0xffffffff, 0x0,
1050         0x353c, 0xffffffff, 0x0,
1051         0x353c, 0xffffffff, 0x1e1e16,
1052         0x3538, 0xffffffff, 0x600010ff,
1053         0x353c, 0xffffffff, 0x0,
1054         0x353c, 0xffffffff, 0x0,
1055         0x353c, 0xffffffff, 0x0,
1056         0x353c, 0xffffffff, 0x0,
1057         0x353c, 0xffffffff, 0x0,
1058         0x353c, 0xffffffff, 0x171f1e,
1059         0x3538, 0xffffffff, 0x700010ff,
1060         0x353c, 0xffffffff, 0x0,
1061         0x353c, 0xffffffff, 0x0,
1062         0x353c, 0xffffffff, 0x0,
1063         0x353c, 0xffffffff, 0x0,
1064         0x353c, 0xffffffff, 0x0,
1065         0x353c, 0xffffffff, 0x0,
1066         0x3538, 0xffffffff, 0x9ff,
1067         0x3500, 0xffffffff, 0x0,
1068         0x3504, 0xffffffff, 0x10000800,
1069         0x3504, 0xffffffff, 0xf,
1070         0x3504, 0xffffffff, 0xf,
1071         0x3500, 0xffffffff, 0x4,
1072         0x3504, 0xffffffff, 0x1000051e,
1073         0x3504, 0xffffffff, 0xffff,
1074         0x3504, 0xffffffff, 0xffff,
1075         0x3500, 0xffffffff, 0x8,
1076         0x3504, 0xffffffff, 0x80500,
1077         0x3500, 0xffffffff, 0x12,
1078         0x3504, 0xffffffff, 0x9050c,
1079         0x3500, 0xffffffff, 0x1d,
1080         0x3504, 0xffffffff, 0xb052c,
1081         0x3500, 0xffffffff, 0x2a,
1082         0x3504, 0xffffffff, 0x1053e,
1083         0x3500, 0xffffffff, 0x2d,
1084         0x3504, 0xffffffff, 0x10546,
1085         0x3500, 0xffffffff, 0x30,
1086         0x3504, 0xffffffff, 0xa054e,
1087         0x3500, 0xffffffff, 0x3c,
1088         0x3504, 0xffffffff, 0x1055f,
1089         0x3500, 0xffffffff, 0x3f,
1090         0x3504, 0xffffffff, 0x10567,
1091         0x3500, 0xffffffff, 0x42,
1092         0x3504, 0xffffffff, 0x1056f,
1093         0x3500, 0xffffffff, 0x45,
1094         0x3504, 0xffffffff, 0x10572,
1095         0x3500, 0xffffffff, 0x48,
1096         0x3504, 0xffffffff, 0x20575,
1097         0x3500, 0xffffffff, 0x4c,
1098         0x3504, 0xffffffff, 0x190801,
1099         0x3500, 0xffffffff, 0x67,
1100         0x3504, 0xffffffff, 0x1082a,
1101         0x3500, 0xffffffff, 0x6a,
1102         0x3504, 0xffffffff, 0x1b082d,
1103         0x3500, 0xffffffff, 0x87,
1104         0x3504, 0xffffffff, 0x310851,
1105         0x3500, 0xffffffff, 0xba,
1106         0x3504, 0xffffffff, 0x891,
1107         0x3500, 0xffffffff, 0xbc,
1108         0x3504, 0xffffffff, 0x893,
1109         0x3500, 0xffffffff, 0xbe,
1110         0x3504, 0xffffffff, 0x20895,
1111         0x3500, 0xffffffff, 0xc2,
1112         0x3504, 0xffffffff, 0x20899,
1113         0x3500, 0xffffffff, 0xc6,
1114         0x3504, 0xffffffff, 0x2089d,
1115         0x3500, 0xffffffff, 0xca,
1116         0x3504, 0xffffffff, 0x8a1,
1117         0x3500, 0xffffffff, 0xcc,
1118         0x3504, 0xffffffff, 0x8a3,
1119         0x3500, 0xffffffff, 0xce,
1120         0x3504, 0xffffffff, 0x308a5,
1121         0x3500, 0xffffffff, 0xd3,
1122         0x3504, 0xffffffff, 0x6d08cd,
1123         0x3500, 0xffffffff, 0x142,
1124         0x3504, 0xffffffff, 0x2000095a,
1125         0x3504, 0xffffffff, 0x1,
1126         0x3500, 0xffffffff, 0x144,
1127         0x3504, 0xffffffff, 0x301f095b,
1128         0x3500, 0xffffffff, 0x165,
1129         0x3504, 0xffffffff, 0xc094d,
1130         0x3500, 0xffffffff, 0x173,
1131         0x3504, 0xffffffff, 0xf096d,
1132         0x3500, 0xffffffff, 0x184,
1133         0x3504, 0xffffffff, 0x15097f,
1134         0x3500, 0xffffffff, 0x19b,
1135         0x3504, 0xffffffff, 0xc0998,
1136         0x3500, 0xffffffff, 0x1a9,
1137         0x3504, 0xffffffff, 0x409a7,
1138         0x3500, 0xffffffff, 0x1af,
1139         0x3504, 0xffffffff, 0xcdc,
1140         0x3500, 0xffffffff, 0x1b1,
1141         0x3504, 0xffffffff, 0x800,
1142         0x3508, 0xffffffff, 0x6c9b2000,
1143         0x3510, 0xfc00, 0x2000,
1144         0x3544, 0xffffffff, 0xfc0,
1145         0x28d4, 0x00000100, 0x100
1146 };
1147
1148 static void si_init_golden_registers(struct radeon_device *rdev)
1149 {
1150         switch (rdev->family) {
1151         case CHIP_TAHITI:
1152                 radeon_program_register_sequence(rdev,
1153                                                  tahiti_golden_registers,
1154                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1155                 radeon_program_register_sequence(rdev,
1156                                                  tahiti_golden_rlc_registers,
1157                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1158                 radeon_program_register_sequence(rdev,
1159                                                  tahiti_mgcg_cgcg_init,
1160                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1161                 radeon_program_register_sequence(rdev,
1162                                                  tahiti_golden_registers2,
1163                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1164                 break;
1165         case CHIP_PITCAIRN:
1166                 radeon_program_register_sequence(rdev,
1167                                                  pitcairn_golden_registers,
1168                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1169                 radeon_program_register_sequence(rdev,
1170                                                  pitcairn_golden_rlc_registers,
1171                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1172                 radeon_program_register_sequence(rdev,
1173                                                  pitcairn_mgcg_cgcg_init,
1174                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1175                 break;
1176         case CHIP_VERDE:
1177                 radeon_program_register_sequence(rdev,
1178                                                  verde_golden_registers,
1179                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1180                 radeon_program_register_sequence(rdev,
1181                                                  verde_golden_rlc_registers,
1182                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1183                 radeon_program_register_sequence(rdev,
1184                                                  verde_mgcg_cgcg_init,
1185                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1186                 radeon_program_register_sequence(rdev,
1187                                                  verde_pg_init,
1188                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1189                 break;
1190         case CHIP_OLAND:
1191                 radeon_program_register_sequence(rdev,
1192                                                  oland_golden_registers,
1193                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1194                 radeon_program_register_sequence(rdev,
1195                                                  oland_golden_rlc_registers,
1196                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1197                 radeon_program_register_sequence(rdev,
1198                                                  oland_mgcg_cgcg_init,
1199                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1200                 break;
1201         case CHIP_HAINAN:
1202                 radeon_program_register_sequence(rdev,
1203                                                  hainan_golden_registers,
1204                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1205                 radeon_program_register_sequence(rdev,
1206                                                  hainan_golden_registers2,
1207                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1208                 radeon_program_register_sequence(rdev,
1209                                                  hainan_mgcg_cgcg_init,
1210                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1211                 break;
1212         default:
1213                 break;
1214         }
1215 }
1216
1217 /**
1218  * si_get_allowed_info_register - fetch the register for the info ioctl
1219  *
1220  * @rdev: radeon_device pointer
1221  * @reg: register offset in bytes
1222  * @val: register value
1223  *
1224  * Returns 0 for success or -EINVAL for an invalid register
1225  *
1226  */
1227 int si_get_allowed_info_register(struct radeon_device *rdev,
1228                                  u32 reg, u32 *val)
1229 {
1230         switch (reg) {
1231         case GRBM_STATUS:
1232         case GRBM_STATUS2:
1233         case GRBM_STATUS_SE0:
1234         case GRBM_STATUS_SE1:
1235         case SRBM_STATUS:
1236         case SRBM_STATUS2:
1237         case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1238         case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1239         case UVD_STATUS:
1240                 *val = RREG32(reg);
1241                 return 0;
1242         default:
1243                 return -EINVAL;
1244         }
1245 }
1246
1247 #define PCIE_BUS_CLK                10000
1248 #define TCLK                        (PCIE_BUS_CLK / 10)
1249
1250 /**
1251  * si_get_xclk - get the xclk
1252  *
1253  * @rdev: radeon_device pointer
1254  *
1255  * Returns the reference clock used by the gfx engine
1256  * (SI).
1257  */
1258 u32 si_get_xclk(struct radeon_device *rdev)
1259 {
1260         u32 reference_clock = rdev->clock.spll.reference_freq;
1261         u32 tmp;
1262
1263         tmp = RREG32(CG_CLKPIN_CNTL_2);
1264         if (tmp & MUX_TCLK_TO_XCLK)
1265                 return TCLK;
1266
1267         tmp = RREG32(CG_CLKPIN_CNTL);
1268         if (tmp & XTALIN_DIVIDE)
1269                 return reference_clock / 4;
1270
1271         return reference_clock;
1272 }
1273
1274 /* get temperature in millidegrees */
1275 int si_get_temp(struct radeon_device *rdev)
1276 {
1277         u32 temp;
1278         int actual_temp = 0;
1279
1280         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1281                 CTF_TEMP_SHIFT;
1282
1283         if (temp & 0x200)
1284                 actual_temp = 255;
1285         else
1286                 actual_temp = temp & 0x1ff;
1287
1288         actual_temp = (actual_temp * 1000);
1289
1290         return actual_temp;
1291 }
1292
1293 #define TAHITI_IO_MC_REGS_SIZE 36
1294
1295 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1296         {0x0000006f, 0x03044000},
1297         {0x00000070, 0x0480c018},
1298         {0x00000071, 0x00000040},
1299         {0x00000072, 0x01000000},
1300         {0x00000074, 0x000000ff},
1301         {0x00000075, 0x00143400},
1302         {0x00000076, 0x08ec0800},
1303         {0x00000077, 0x040000cc},
1304         {0x00000079, 0x00000000},
1305         {0x0000007a, 0x21000409},
1306         {0x0000007c, 0x00000000},
1307         {0x0000007d, 0xe8000000},
1308         {0x0000007e, 0x044408a8},
1309         {0x0000007f, 0x00000003},
1310         {0x00000080, 0x00000000},
1311         {0x00000081, 0x01000000},
1312         {0x00000082, 0x02000000},
1313         {0x00000083, 0x00000000},
1314         {0x00000084, 0xe3f3e4f4},
1315         {0x00000085, 0x00052024},
1316         {0x00000087, 0x00000000},
1317         {0x00000088, 0x66036603},
1318         {0x00000089, 0x01000000},
1319         {0x0000008b, 0x1c0a0000},
1320         {0x0000008c, 0xff010000},
1321         {0x0000008e, 0xffffefff},
1322         {0x0000008f, 0xfff3efff},
1323         {0x00000090, 0xfff3efbf},
1324         {0x00000094, 0x00101101},
1325         {0x00000095, 0x00000fff},
1326         {0x00000096, 0x00116fff},
1327         {0x00000097, 0x60010000},
1328         {0x00000098, 0x10010000},
1329         {0x00000099, 0x00006000},
1330         {0x0000009a, 0x00001000},
1331         {0x0000009f, 0x00a77400}
1332 };
1333
1334 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1335         {0x0000006f, 0x03044000},
1336         {0x00000070, 0x0480c018},
1337         {0x00000071, 0x00000040},
1338         {0x00000072, 0x01000000},
1339         {0x00000074, 0x000000ff},
1340         {0x00000075, 0x00143400},
1341         {0x00000076, 0x08ec0800},
1342         {0x00000077, 0x040000cc},
1343         {0x00000079, 0x00000000},
1344         {0x0000007a, 0x21000409},
1345         {0x0000007c, 0x00000000},
1346         {0x0000007d, 0xe8000000},
1347         {0x0000007e, 0x044408a8},
1348         {0x0000007f, 0x00000003},
1349         {0x00000080, 0x00000000},
1350         {0x00000081, 0x01000000},
1351         {0x00000082, 0x02000000},
1352         {0x00000083, 0x00000000},
1353         {0x00000084, 0xe3f3e4f4},
1354         {0x00000085, 0x00052024},
1355         {0x00000087, 0x00000000},
1356         {0x00000088, 0x66036603},
1357         {0x00000089, 0x01000000},
1358         {0x0000008b, 0x1c0a0000},
1359         {0x0000008c, 0xff010000},
1360         {0x0000008e, 0xffffefff},
1361         {0x0000008f, 0xfff3efff},
1362         {0x00000090, 0xfff3efbf},
1363         {0x00000094, 0x00101101},
1364         {0x00000095, 0x00000fff},
1365         {0x00000096, 0x00116fff},
1366         {0x00000097, 0x60010000},
1367         {0x00000098, 0x10010000},
1368         {0x00000099, 0x00006000},
1369         {0x0000009a, 0x00001000},
1370         {0x0000009f, 0x00a47400}
1371 };
1372
1373 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1374         {0x0000006f, 0x03044000},
1375         {0x00000070, 0x0480c018},
1376         {0x00000071, 0x00000040},
1377         {0x00000072, 0x01000000},
1378         {0x00000074, 0x000000ff},
1379         {0x00000075, 0x00143400},
1380         {0x00000076, 0x08ec0800},
1381         {0x00000077, 0x040000cc},
1382         {0x00000079, 0x00000000},
1383         {0x0000007a, 0x21000409},
1384         {0x0000007c, 0x00000000},
1385         {0x0000007d, 0xe8000000},
1386         {0x0000007e, 0x044408a8},
1387         {0x0000007f, 0x00000003},
1388         {0x00000080, 0x00000000},
1389         {0x00000081, 0x01000000},
1390         {0x00000082, 0x02000000},
1391         {0x00000083, 0x00000000},
1392         {0x00000084, 0xe3f3e4f4},
1393         {0x00000085, 0x00052024},
1394         {0x00000087, 0x00000000},
1395         {0x00000088, 0x66036603},
1396         {0x00000089, 0x01000000},
1397         {0x0000008b, 0x1c0a0000},
1398         {0x0000008c, 0xff010000},
1399         {0x0000008e, 0xffffefff},
1400         {0x0000008f, 0xfff3efff},
1401         {0x00000090, 0xfff3efbf},
1402         {0x00000094, 0x00101101},
1403         {0x00000095, 0x00000fff},
1404         {0x00000096, 0x00116fff},
1405         {0x00000097, 0x60010000},
1406         {0x00000098, 0x10010000},
1407         {0x00000099, 0x00006000},
1408         {0x0000009a, 0x00001000},
1409         {0x0000009f, 0x00a37400}
1410 };
1411
1412 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1413         {0x0000006f, 0x03044000},
1414         {0x00000070, 0x0480c018},
1415         {0x00000071, 0x00000040},
1416         {0x00000072, 0x01000000},
1417         {0x00000074, 0x000000ff},
1418         {0x00000075, 0x00143400},
1419         {0x00000076, 0x08ec0800},
1420         {0x00000077, 0x040000cc},
1421         {0x00000079, 0x00000000},
1422         {0x0000007a, 0x21000409},
1423         {0x0000007c, 0x00000000},
1424         {0x0000007d, 0xe8000000},
1425         {0x0000007e, 0x044408a8},
1426         {0x0000007f, 0x00000003},
1427         {0x00000080, 0x00000000},
1428         {0x00000081, 0x01000000},
1429         {0x00000082, 0x02000000},
1430         {0x00000083, 0x00000000},
1431         {0x00000084, 0xe3f3e4f4},
1432         {0x00000085, 0x00052024},
1433         {0x00000087, 0x00000000},
1434         {0x00000088, 0x66036603},
1435         {0x00000089, 0x01000000},
1436         {0x0000008b, 0x1c0a0000},
1437         {0x0000008c, 0xff010000},
1438         {0x0000008e, 0xffffefff},
1439         {0x0000008f, 0xfff3efff},
1440         {0x00000090, 0xfff3efbf},
1441         {0x00000094, 0x00101101},
1442         {0x00000095, 0x00000fff},
1443         {0x00000096, 0x00116fff},
1444         {0x00000097, 0x60010000},
1445         {0x00000098, 0x10010000},
1446         {0x00000099, 0x00006000},
1447         {0x0000009a, 0x00001000},
1448         {0x0000009f, 0x00a17730}
1449 };
1450
1451 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1452         {0x0000006f, 0x03044000},
1453         {0x00000070, 0x0480c018},
1454         {0x00000071, 0x00000040},
1455         {0x00000072, 0x01000000},
1456         {0x00000074, 0x000000ff},
1457         {0x00000075, 0x00143400},
1458         {0x00000076, 0x08ec0800},
1459         {0x00000077, 0x040000cc},
1460         {0x00000079, 0x00000000},
1461         {0x0000007a, 0x21000409},
1462         {0x0000007c, 0x00000000},
1463         {0x0000007d, 0xe8000000},
1464         {0x0000007e, 0x044408a8},
1465         {0x0000007f, 0x00000003},
1466         {0x00000080, 0x00000000},
1467         {0x00000081, 0x01000000},
1468         {0x00000082, 0x02000000},
1469         {0x00000083, 0x00000000},
1470         {0x00000084, 0xe3f3e4f4},
1471         {0x00000085, 0x00052024},
1472         {0x00000087, 0x00000000},
1473         {0x00000088, 0x66036603},
1474         {0x00000089, 0x01000000},
1475         {0x0000008b, 0x1c0a0000},
1476         {0x0000008c, 0xff010000},
1477         {0x0000008e, 0xffffefff},
1478         {0x0000008f, 0xfff3efff},
1479         {0x00000090, 0xfff3efbf},
1480         {0x00000094, 0x00101101},
1481         {0x00000095, 0x00000fff},
1482         {0x00000096, 0x00116fff},
1483         {0x00000097, 0x60010000},
1484         {0x00000098, 0x10010000},
1485         {0x00000099, 0x00006000},
1486         {0x0000009a, 0x00001000},
1487         {0x0000009f, 0x00a07730}
1488 };
1489
1490 /* ucode loading */
1491 int si_mc_load_microcode(struct radeon_device *rdev)
1492 {
1493         const __be32 *fw_data = NULL;
1494         const __le32 *new_fw_data = NULL;
1495         u32 running;
1496         u32 *io_mc_regs = NULL;
1497         const __le32 *new_io_mc_regs = NULL;
1498         int i, regs_size, ucode_size;
1499
1500         if (!rdev->mc_fw)
1501                 return -EINVAL;
1502
1503         if (rdev->new_fw) {
1504                 const struct mc_firmware_header_v1_0 *hdr =
1505                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1506
1507                 radeon_ucode_print_mc_hdr(&hdr->header);
1508                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1509                 new_io_mc_regs = (const __le32 *)
1510                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1511                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1512                 new_fw_data = (const __le32 *)
1513                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1514         } else {
1515                 ucode_size = rdev->mc_fw->size / 4;
1516
1517                 switch (rdev->family) {
1518                 case CHIP_TAHITI:
1519                         io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1520                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1521                         break;
1522                 case CHIP_PITCAIRN:
1523                         io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1524                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1525                         break;
1526                 case CHIP_VERDE:
1527                 default:
1528                         io_mc_regs = (u32 *)&verde_io_mc_regs;
1529                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1530                         break;
1531                 case CHIP_OLAND:
1532                         io_mc_regs = (u32 *)&oland_io_mc_regs;
1533                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1534                         break;
1535                 case CHIP_HAINAN:
1536                         io_mc_regs = (u32 *)&hainan_io_mc_regs;
1537                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1538                         break;
1539                 }
1540                 fw_data = (const __be32 *)rdev->mc_fw->data;
1541         }
1542
1543         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1544
1545         if (running == 0) {
1546                 /* reset the engine and set to writable */
1547                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1548                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1549
1550                 /* load mc io regs */
1551                 for (i = 0; i < regs_size; i++) {
1552                         if (rdev->new_fw) {
1553                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1554                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1555                         } else {
1556                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1557                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1558                         }
1559                 }
1560                 /* load the MC ucode */
1561                 for (i = 0; i < ucode_size; i++) {
1562                         if (rdev->new_fw)
1563                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1564                         else
1565                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1566                 }
1567
1568                 /* put the engine back into the active state */
1569                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1570                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1571                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1572
1573                 /* wait for training to complete */
1574                 for (i = 0; i < rdev->usec_timeout; i++) {
1575                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1576                                 break;
1577                         udelay(1);
1578                 }
1579                 for (i = 0; i < rdev->usec_timeout; i++) {
1580                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1581                                 break;
1582                         udelay(1);
1583                 }
1584         }
1585
1586         return 0;
1587 }
1588
1589 static int si_init_microcode(struct radeon_device *rdev)
1590 {
1591         const char *chip_name;
1592         const char *new_chip_name;
1593         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1594         size_t smc_req_size, mc2_req_size;
1595         char fw_name[30];
1596         int err;
1597         int new_fw = 0;
1598         bool new_smc = false;
1599         bool si58_fw = false;
1600         bool banks2_fw = false;
1601
1602         DRM_DEBUG("\n");
1603
1604         switch (rdev->family) {
1605         case CHIP_TAHITI:
1606                 chip_name = "TAHITI";
1607                 new_chip_name = "tahiti";
1608                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1609                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1610                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1611                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1612                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1613                 mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1614                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1615                 break;
1616         case CHIP_PITCAIRN:
1617                 chip_name = "PITCAIRN";
1618                 if ((rdev->pdev->revision == 0x81) &&
1619                     ((rdev->pdev->device == 0x6810) ||
1620                      (rdev->pdev->device == 0x6811)))
1621                         new_smc = true;
1622                 new_chip_name = "pitcairn";
1623                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1624                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1625                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1626                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1627                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1628                 mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1629                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1630                 break;
1631         case CHIP_VERDE:
1632                 chip_name = "VERDE";
1633                 if (((rdev->pdev->device == 0x6820) &&
1634                      ((rdev->pdev->revision == 0x81) ||
1635                       (rdev->pdev->revision == 0x83))) ||
1636                     ((rdev->pdev->device == 0x6821) &&
1637                      ((rdev->pdev->revision == 0x83) ||
1638                       (rdev->pdev->revision == 0x87))) ||
1639                     ((rdev->pdev->revision == 0x87) &&
1640                      ((rdev->pdev->device == 0x6823) ||
1641                       (rdev->pdev->device == 0x682b))))
1642                         new_smc = true;
1643                 new_chip_name = "verde";
1644                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1645                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1646                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1647                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1648                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1649                 mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1650                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1651                 break;
1652         case CHIP_OLAND:
1653                 chip_name = "OLAND";
1654                 if (((rdev->pdev->revision == 0x81) &&
1655                      ((rdev->pdev->device == 0x6600) ||
1656                       (rdev->pdev->device == 0x6604) ||
1657                       (rdev->pdev->device == 0x6605) ||
1658                       (rdev->pdev->device == 0x6610))) ||
1659                     ((rdev->pdev->revision == 0x83) &&
1660                      (rdev->pdev->device == 0x6610)))
1661                         new_smc = true;
1662                 new_chip_name = "oland";
1663                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1664                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1665                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1666                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1667                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1668                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1669                 break;
1670         case CHIP_HAINAN:
1671                 chip_name = "HAINAN";
1672                 if (((rdev->pdev->revision == 0x81) &&
1673                      (rdev->pdev->device == 0x6660)) ||
1674                     ((rdev->pdev->revision == 0x83) &&
1675                      ((rdev->pdev->device == 0x6660) ||
1676                       (rdev->pdev->device == 0x6663) ||
1677                       (rdev->pdev->device == 0x6665) ||
1678                       (rdev->pdev->device == 0x6667))))
1679                         new_smc = true;
1680                 else if ((rdev->pdev->revision == 0xc3) &&
1681                          (rdev->pdev->device == 0x6665))
1682                         banks2_fw = true;
1683                 new_chip_name = "hainan";
1684                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1685                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1686                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1687                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1688                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1689                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1690                 break;
1691         default: BUG();
1692         }
1693
1694         /* this memory configuration requires special firmware */
1695         if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1696                 si58_fw = true;
1697
1698         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1699
1700         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1701         err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1702         if (err) {
1703                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1704                 err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1705                 if (err)
1706                         goto out;
1707                 if (rdev->pfp_fw->size != pfp_req_size) {
1708                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1709                                rdev->pfp_fw->size, fw_name);
1710                         err = -EINVAL;
1711                         goto out;
1712                 }
1713         } else {
1714                 err = radeon_ucode_validate(rdev->pfp_fw);
1715                 if (err) {
1716                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1717                                fw_name);
1718                         goto out;
1719                 } else {
1720                         new_fw++;
1721                 }
1722         }
1723
1724         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1725         err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
1726         if (err) {
1727                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1728                 err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
1729                 if (err)
1730                         goto out;
1731                 if (rdev->me_fw->size != me_req_size) {
1732                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1733                                rdev->me_fw->size, fw_name);
1734                         err = -EINVAL;
1735                 }
1736         } else {
1737                 err = radeon_ucode_validate(rdev->me_fw);
1738                 if (err) {
1739                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1740                                fw_name);
1741                         goto out;
1742                 } else {
1743                         new_fw++;
1744                 }
1745         }
1746
1747         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1748         err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1749         if (err) {
1750                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1751                 err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1752                 if (err)
1753                         goto out;
1754                 if (rdev->ce_fw->size != ce_req_size) {
1755                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1756                                rdev->ce_fw->size, fw_name);
1757                         err = -EINVAL;
1758                 }
1759         } else {
1760                 err = radeon_ucode_validate(rdev->ce_fw);
1761                 if (err) {
1762                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1763                                fw_name);
1764                         goto out;
1765                 } else {
1766                         new_fw++;
1767                 }
1768         }
1769
1770         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1771         err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1772         if (err) {
1773                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1774                 err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1775                 if (err)
1776                         goto out;
1777                 if (rdev->rlc_fw->size != rlc_req_size) {
1778                         pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1779                                rdev->rlc_fw->size, fw_name);
1780                         err = -EINVAL;
1781                 }
1782         } else {
1783                 err = radeon_ucode_validate(rdev->rlc_fw);
1784                 if (err) {
1785                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1786                                fw_name);
1787                         goto out;
1788                 } else {
1789                         new_fw++;
1790                 }
1791         }
1792
1793         if (si58_fw)
1794                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/");
1795         else
1796                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1797         err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1798         if (err) {
1799                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1800                 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1801                 if (err) {
1802                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1803                         err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1804                         if (err)
1805                                 goto out;
1806                 }
1807                 if ((rdev->mc_fw->size != mc_req_size) &&
1808                     (rdev->mc_fw->size != mc2_req_size)) {
1809                         pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1810                                rdev->mc_fw->size, fw_name);
1811                         err = -EINVAL;
1812                 }
1813                 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1814         } else {
1815                 err = radeon_ucode_validate(rdev->mc_fw);
1816                 if (err) {
1817                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1818                                fw_name);
1819                         goto out;
1820                 } else {
1821                         new_fw++;
1822                 }
1823         }
1824
1825         if (banks2_fw)
1826                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/");
1827         else if (new_smc)
1828                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1829         else
1830                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1831         err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1832         if (err) {
1833                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1834                 err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1835                 if (err) {
1836                         pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1837                         release_firmware(rdev->smc_fw);
1838                         rdev->smc_fw = NULL;
1839                         err = 0;
1840                 } else if (rdev->smc_fw->size != smc_req_size) {
1841                         pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1842                                rdev->smc_fw->size, fw_name);
1843                         err = -EINVAL;
1844                 }
1845         } else {
1846                 err = radeon_ucode_validate(rdev->smc_fw);
1847                 if (err) {
1848                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1849                                fw_name);
1850                         goto out;
1851                 } else {
1852                         new_fw++;
1853                 }
1854         }
1855
1856         if (new_fw == 0) {
1857                 rdev->new_fw = false;
1858         } else if (new_fw < 6) {
1859                 pr_err("si_fw: mixing new and old firmware!\n");
1860                 err = -EINVAL;
1861         } else {
1862                 rdev->new_fw = true;
1863         }
1864 out:
1865         if (err) {
1866                 if (err != -EINVAL)
1867                         pr_err("si_cp: Failed to load firmware \"%s\"\n",
1868                                fw_name);
1869                 release_firmware(rdev->pfp_fw);
1870                 rdev->pfp_fw = NULL;
1871                 release_firmware(rdev->me_fw);
1872                 rdev->me_fw = NULL;
1873                 release_firmware(rdev->ce_fw);
1874                 rdev->ce_fw = NULL;
1875                 release_firmware(rdev->rlc_fw);
1876                 rdev->rlc_fw = NULL;
1877                 release_firmware(rdev->mc_fw);
1878                 rdev->mc_fw = NULL;
1879                 release_firmware(rdev->smc_fw);
1880                 rdev->smc_fw = NULL;
1881         }
1882         return err;
1883 }
1884
1885 /* watermark setup */
1886 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1887                                    struct radeon_crtc *radeon_crtc,
1888                                    struct drm_display_mode *mode,
1889                                    struct drm_display_mode *other_mode)
1890 {
1891         u32 tmp, buffer_alloc, i;
1892         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1893         /*
1894          * Line Buffer Setup
1895          * There are 3 line buffers, each one shared by 2 display controllers.
1896          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1897          * the display controllers.  The paritioning is done via one of four
1898          * preset allocations specified in bits 21:20:
1899          *  0 - half lb
1900          *  2 - whole lb, other crtc must be disabled
1901          */
1902         /* this can get tricky if we have two large displays on a paired group
1903          * of crtcs.  Ideally for multiple large displays we'd assign them to
1904          * non-linked crtcs for maximum line buffer allocation.
1905          */
1906         if (radeon_crtc->base.enabled && mode) {
1907                 if (other_mode) {
1908                         tmp = 0; /* 1/2 */
1909                         buffer_alloc = 1;
1910                 } else {
1911                         tmp = 2; /* whole */
1912                         buffer_alloc = 2;
1913                 }
1914         } else {
1915                 tmp = 0;
1916                 buffer_alloc = 0;
1917         }
1918
1919         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1920                DC_LB_MEMORY_CONFIG(tmp));
1921
1922         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1923                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1924         for (i = 0; i < rdev->usec_timeout; i++) {
1925                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1926                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1927                         break;
1928                 udelay(1);
1929         }
1930
1931         if (radeon_crtc->base.enabled && mode) {
1932                 switch (tmp) {
1933                 case 0:
1934                 default:
1935                         return 4096 * 2;
1936                 case 2:
1937                         return 8192 * 2;
1938                 }
1939         }
1940
1941         /* controller not enabled, so no lb used */
1942         return 0;
1943 }
1944
1945 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1946 {
1947         u32 tmp = RREG32(MC_SHARED_CHMAP);
1948
1949         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1950         case 0:
1951         default:
1952                 return 1;
1953         case 1:
1954                 return 2;
1955         case 2:
1956                 return 4;
1957         case 3:
1958                 return 8;
1959         case 4:
1960                 return 3;
1961         case 5:
1962                 return 6;
1963         case 6:
1964                 return 10;
1965         case 7:
1966                 return 12;
1967         case 8:
1968                 return 16;
1969         }
1970 }
1971
1972 struct dce6_wm_params {
1973         u32 dram_channels; /* number of dram channels */
1974         u32 yclk;          /* bandwidth per dram data pin in kHz */
1975         u32 sclk;          /* engine clock in kHz */
1976         u32 disp_clk;      /* display clock in kHz */
1977         u32 src_width;     /* viewport width */
1978         u32 active_time;   /* active display time in ns */
1979         u32 blank_time;    /* blank time in ns */
1980         bool interlaced;    /* mode is interlaced */
1981         fixed20_12 vsc;    /* vertical scale ratio */
1982         u32 num_heads;     /* number of active crtcs */
1983         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1984         u32 lb_size;       /* line buffer allocated to pipe */
1985         u32 vtaps;         /* vertical scaler taps */
1986 };
1987
1988 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1989 {
1990         /* Calculate raw DRAM Bandwidth */
1991         fixed20_12 dram_efficiency; /* 0.7 */
1992         fixed20_12 yclk, dram_channels, bandwidth;
1993         fixed20_12 a;
1994
1995         a.full = dfixed_const(1000);
1996         yclk.full = dfixed_const(wm->yclk);
1997         yclk.full = dfixed_div(yclk, a);
1998         dram_channels.full = dfixed_const(wm->dram_channels * 4);
1999         a.full = dfixed_const(10);
2000         dram_efficiency.full = dfixed_const(7);
2001         dram_efficiency.full = dfixed_div(dram_efficiency, a);
2002         bandwidth.full = dfixed_mul(dram_channels, yclk);
2003         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2004
2005         return dfixed_trunc(bandwidth);
2006 }
2007
2008 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2009 {
2010         /* Calculate DRAM Bandwidth and the part allocated to display. */
2011         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2012         fixed20_12 yclk, dram_channels, bandwidth;
2013         fixed20_12 a;
2014
2015         a.full = dfixed_const(1000);
2016         yclk.full = dfixed_const(wm->yclk);
2017         yclk.full = dfixed_div(yclk, a);
2018         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2019         a.full = dfixed_const(10);
2020         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2021         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2022         bandwidth.full = dfixed_mul(dram_channels, yclk);
2023         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2024
2025         return dfixed_trunc(bandwidth);
2026 }
2027
2028 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2029 {
2030         /* Calculate the display Data return Bandwidth */
2031         fixed20_12 return_efficiency; /* 0.8 */
2032         fixed20_12 sclk, bandwidth;
2033         fixed20_12 a;
2034
2035         a.full = dfixed_const(1000);
2036         sclk.full = dfixed_const(wm->sclk);
2037         sclk.full = dfixed_div(sclk, a);
2038         a.full = dfixed_const(10);
2039         return_efficiency.full = dfixed_const(8);
2040         return_efficiency.full = dfixed_div(return_efficiency, a);
2041         a.full = dfixed_const(32);
2042         bandwidth.full = dfixed_mul(a, sclk);
2043         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2044
2045         return dfixed_trunc(bandwidth);
2046 }
2047
2048 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2049 {
2050         return 32;
2051 }
2052
2053 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2054 {
2055         /* Calculate the DMIF Request Bandwidth */
2056         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2057         fixed20_12 disp_clk, sclk, bandwidth;
2058         fixed20_12 a, b1, b2;
2059         u32 min_bandwidth;
2060
2061         a.full = dfixed_const(1000);
2062         disp_clk.full = dfixed_const(wm->disp_clk);
2063         disp_clk.full = dfixed_div(disp_clk, a);
2064         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2065         b1.full = dfixed_mul(a, disp_clk);
2066
2067         a.full = dfixed_const(1000);
2068         sclk.full = dfixed_const(wm->sclk);
2069         sclk.full = dfixed_div(sclk, a);
2070         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2071         b2.full = dfixed_mul(a, sclk);
2072
2073         a.full = dfixed_const(10);
2074         disp_clk_request_efficiency.full = dfixed_const(8);
2075         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2076
2077         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2078
2079         a.full = dfixed_const(min_bandwidth);
2080         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2081
2082         return dfixed_trunc(bandwidth);
2083 }
2084
2085 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2086 {
2087         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2088         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2089         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2090         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2091
2092         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2093 }
2094
2095 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2096 {
2097         /* Calculate the display mode Average Bandwidth
2098          * DisplayMode should contain the source and destination dimensions,
2099          * timing, etc.
2100          */
2101         fixed20_12 bpp;
2102         fixed20_12 line_time;
2103         fixed20_12 src_width;
2104         fixed20_12 bandwidth;
2105         fixed20_12 a;
2106
2107         a.full = dfixed_const(1000);
2108         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2109         line_time.full = dfixed_div(line_time, a);
2110         bpp.full = dfixed_const(wm->bytes_per_pixel);
2111         src_width.full = dfixed_const(wm->src_width);
2112         bandwidth.full = dfixed_mul(src_width, bpp);
2113         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2114         bandwidth.full = dfixed_div(bandwidth, line_time);
2115
2116         return dfixed_trunc(bandwidth);
2117 }
2118
2119 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2120 {
2121         /* First calcualte the latency in ns */
2122         u32 mc_latency = 2000; /* 2000 ns. */
2123         u32 available_bandwidth = dce6_available_bandwidth(wm);
2124         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2125         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2126         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2127         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2128                 (wm->num_heads * cursor_line_pair_return_time);
2129         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2130         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2131         u32 tmp, dmif_size = 12288;
2132         fixed20_12 a, b, c;
2133
2134         if (wm->num_heads == 0)
2135                 return 0;
2136
2137         a.full = dfixed_const(2);
2138         b.full = dfixed_const(1);
2139         if ((wm->vsc.full > a.full) ||
2140             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2141             (wm->vtaps >= 5) ||
2142             ((wm->vsc.full >= a.full) && wm->interlaced))
2143                 max_src_lines_per_dst_line = 4;
2144         else
2145                 max_src_lines_per_dst_line = 2;
2146
2147         a.full = dfixed_const(available_bandwidth);
2148         b.full = dfixed_const(wm->num_heads);
2149         a.full = dfixed_div(a, b);
2150         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2151         tmp = min(dfixed_trunc(a), tmp);
2152
2153         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2154
2155         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2156         b.full = dfixed_const(1000);
2157         c.full = dfixed_const(lb_fill_bw);
2158         b.full = dfixed_div(c, b);
2159         a.full = dfixed_div(a, b);
2160         line_fill_time = dfixed_trunc(a);
2161
2162         if (line_fill_time < wm->active_time)
2163                 return latency;
2164         else
2165                 return latency + (line_fill_time - wm->active_time);
2166
2167 }
2168
2169 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2170 {
2171         if (dce6_average_bandwidth(wm) <=
2172             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2173                 return true;
2174         else
2175                 return false;
2176 };
2177
2178 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2179 {
2180         if (dce6_average_bandwidth(wm) <=
2181             (dce6_available_bandwidth(wm) / wm->num_heads))
2182                 return true;
2183         else
2184                 return false;
2185 };
2186
2187 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2188 {
2189         u32 lb_partitions = wm->lb_size / wm->src_width;
2190         u32 line_time = wm->active_time + wm->blank_time;
2191         u32 latency_tolerant_lines;
2192         u32 latency_hiding;
2193         fixed20_12 a;
2194
2195         a.full = dfixed_const(1);
2196         if (wm->vsc.full > a.full)
2197                 latency_tolerant_lines = 1;
2198         else {
2199                 if (lb_partitions <= (wm->vtaps + 1))
2200                         latency_tolerant_lines = 1;
2201                 else
2202                         latency_tolerant_lines = 2;
2203         }
2204
2205         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2206
2207         if (dce6_latency_watermark(wm) <= latency_hiding)
2208                 return true;
2209         else
2210                 return false;
2211 }
2212
2213 static void dce6_program_watermarks(struct radeon_device *rdev,
2214                                          struct radeon_crtc *radeon_crtc,
2215                                          u32 lb_size, u32 num_heads)
2216 {
2217         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2218         struct dce6_wm_params wm_low, wm_high;
2219         u32 dram_channels;
2220         u32 active_time;
2221         u32 line_time = 0;
2222         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2223         u32 priority_a_mark = 0, priority_b_mark = 0;
2224         u32 priority_a_cnt = PRIORITY_OFF;
2225         u32 priority_b_cnt = PRIORITY_OFF;
2226         u32 tmp, arb_control3;
2227         fixed20_12 a, b, c;
2228
2229         if (radeon_crtc->base.enabled && num_heads && mode) {
2230                 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2231                                             (u32)mode->clock);
2232                 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2233                                           (u32)mode->clock);
2234                 line_time = min(line_time, (u32)65535);
2235                 priority_a_cnt = 0;
2236                 priority_b_cnt = 0;
2237
2238                 if (rdev->family == CHIP_ARUBA)
2239                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2240                 else
2241                         dram_channels = si_get_number_of_dram_channels(rdev);
2242
2243                 /* watermark for high clocks */
2244                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2245                         wm_high.yclk =
2246                                 radeon_dpm_get_mclk(rdev, false) * 10;
2247                         wm_high.sclk =
2248                                 radeon_dpm_get_sclk(rdev, false) * 10;
2249                 } else {
2250                         wm_high.yclk = rdev->pm.current_mclk * 10;
2251                         wm_high.sclk = rdev->pm.current_sclk * 10;
2252                 }
2253
2254                 wm_high.disp_clk = mode->clock;
2255                 wm_high.src_width = mode->crtc_hdisplay;
2256                 wm_high.active_time = active_time;
2257                 wm_high.blank_time = line_time - wm_high.active_time;
2258                 wm_high.interlaced = false;
2259                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2260                         wm_high.interlaced = true;
2261                 wm_high.vsc = radeon_crtc->vsc;
2262                 wm_high.vtaps = 1;
2263                 if (radeon_crtc->rmx_type != RMX_OFF)
2264                         wm_high.vtaps = 2;
2265                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2266                 wm_high.lb_size = lb_size;
2267                 wm_high.dram_channels = dram_channels;
2268                 wm_high.num_heads = num_heads;
2269
2270                 /* watermark for low clocks */
2271                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2272                         wm_low.yclk =
2273                                 radeon_dpm_get_mclk(rdev, true) * 10;
2274                         wm_low.sclk =
2275                                 radeon_dpm_get_sclk(rdev, true) * 10;
2276                 } else {
2277                         wm_low.yclk = rdev->pm.current_mclk * 10;
2278                         wm_low.sclk = rdev->pm.current_sclk * 10;
2279                 }
2280
2281                 wm_low.disp_clk = mode->clock;
2282                 wm_low.src_width = mode->crtc_hdisplay;
2283                 wm_low.active_time = active_time;
2284                 wm_low.blank_time = line_time - wm_low.active_time;
2285                 wm_low.interlaced = false;
2286                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2287                         wm_low.interlaced = true;
2288                 wm_low.vsc = radeon_crtc->vsc;
2289                 wm_low.vtaps = 1;
2290                 if (radeon_crtc->rmx_type != RMX_OFF)
2291                         wm_low.vtaps = 2;
2292                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2293                 wm_low.lb_size = lb_size;
2294                 wm_low.dram_channels = dram_channels;
2295                 wm_low.num_heads = num_heads;
2296
2297                 /* set for high clocks */
2298                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2299                 /* set for low clocks */
2300                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2301
2302                 /* possibly force display priority to high */
2303                 /* should really do this at mode validation time... */
2304                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2305                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2306                     !dce6_check_latency_hiding(&wm_high) ||
2307                     (rdev->disp_priority == 2)) {
2308                         DRM_DEBUG_KMS("force priority to high\n");
2309                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2310                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2311                 }
2312                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2313                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2314                     !dce6_check_latency_hiding(&wm_low) ||
2315                     (rdev->disp_priority == 2)) {
2316                         DRM_DEBUG_KMS("force priority to high\n");
2317                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2318                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2319                 }
2320
2321                 a.full = dfixed_const(1000);
2322                 b.full = dfixed_const(mode->clock);
2323                 b.full = dfixed_div(b, a);
2324                 c.full = dfixed_const(latency_watermark_a);
2325                 c.full = dfixed_mul(c, b);
2326                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2327                 c.full = dfixed_div(c, a);
2328                 a.full = dfixed_const(16);
2329                 c.full = dfixed_div(c, a);
2330                 priority_a_mark = dfixed_trunc(c);
2331                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2332
2333                 a.full = dfixed_const(1000);
2334                 b.full = dfixed_const(mode->clock);
2335                 b.full = dfixed_div(b, a);
2336                 c.full = dfixed_const(latency_watermark_b);
2337                 c.full = dfixed_mul(c, b);
2338                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2339                 c.full = dfixed_div(c, a);
2340                 a.full = dfixed_const(16);
2341                 c.full = dfixed_div(c, a);
2342                 priority_b_mark = dfixed_trunc(c);
2343                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2344
2345                 /* Save number of lines the linebuffer leads before the scanout */
2346                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2347         }
2348
2349         /* select wm A */
2350         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2351         tmp = arb_control3;
2352         tmp &= ~LATENCY_WATERMARK_MASK(3);
2353         tmp |= LATENCY_WATERMARK_MASK(1);
2354         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2355         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2356                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2357                 LATENCY_HIGH_WATERMARK(line_time)));
2358         /* select wm B */
2359         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2360         tmp &= ~LATENCY_WATERMARK_MASK(3);
2361         tmp |= LATENCY_WATERMARK_MASK(2);
2362         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2363         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2364                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2365                 LATENCY_HIGH_WATERMARK(line_time)));
2366         /* restore original selection */
2367         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2368
2369         /* write the priority marks */
2370         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2371         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2372
2373         /* save values for DPM */
2374         radeon_crtc->line_time = line_time;
2375         radeon_crtc->wm_high = latency_watermark_a;
2376         radeon_crtc->wm_low = latency_watermark_b;
2377 }
2378
2379 void dce6_bandwidth_update(struct radeon_device *rdev)
2380 {
2381         struct drm_display_mode *mode0 = NULL;
2382         struct drm_display_mode *mode1 = NULL;
2383         u32 num_heads = 0, lb_size;
2384         int i;
2385
2386         if (!rdev->mode_info.mode_config_initialized)
2387                 return;
2388
2389         radeon_update_display_priority(rdev);
2390
2391         for (i = 0; i < rdev->num_crtc; i++) {
2392                 if (rdev->mode_info.crtcs[i]->base.enabled)
2393                         num_heads++;
2394         }
2395         for (i = 0; i < rdev->num_crtc; i += 2) {
2396                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2397                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2398                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2399                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2400                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2401                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2402         }
2403 }
2404
2405 /*
2406  * Core functions
2407  */
2408 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2409 {
2410         u32 *tile = rdev->config.si.tile_mode_array;
2411         const u32 num_tile_mode_states =
2412                         ARRAY_SIZE(rdev->config.si.tile_mode_array);
2413         u32 reg_offset, split_equal_to_row_size;
2414
2415         switch (rdev->config.si.mem_row_size_in_kb) {
2416         case 1:
2417                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2418                 break;
2419         case 2:
2420         default:
2421                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2422                 break;
2423         case 4:
2424                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2425                 break;
2426         }
2427
2428         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2429                 tile[reg_offset] = 0;
2430
2431         switch(rdev->family) {
2432         case CHIP_TAHITI:
2433         case CHIP_PITCAIRN:
2434                 /* non-AA compressed depth or any compressed stencil */
2435                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2436                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2437                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2438                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2439                            NUM_BANKS(ADDR_SURF_16_BANK) |
2440                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2442                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2443                 /* 2xAA/4xAA compressed depth only */
2444                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2446                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2447                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2448                            NUM_BANKS(ADDR_SURF_16_BANK) |
2449                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2452                 /* 8xAA compressed depth only */
2453                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2455                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2456                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2457                            NUM_BANKS(ADDR_SURF_16_BANK) |
2458                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2460                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2461                 /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2462                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2464                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2465                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2466                            NUM_BANKS(ADDR_SURF_16_BANK) |
2467                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2469                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2470                 /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2471                 tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2472                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2473                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2474                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2475                            NUM_BANKS(ADDR_SURF_16_BANK) |
2476                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2478                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2479                 /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2480                 tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2481                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2482                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2483                            TILE_SPLIT(split_equal_to_row_size) |
2484                            NUM_BANKS(ADDR_SURF_16_BANK) |
2485                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2487                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2488                 /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2489                 tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2490                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2491                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2492                            TILE_SPLIT(split_equal_to_row_size) |
2493                            NUM_BANKS(ADDR_SURF_16_BANK) |
2494                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2497                 /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2498                 tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2500                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2501                            TILE_SPLIT(split_equal_to_row_size) |
2502                            NUM_BANKS(ADDR_SURF_16_BANK) |
2503                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2506                 /* 1D and 1D Array Surfaces */
2507                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2508                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2509                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2510                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2511                            NUM_BANKS(ADDR_SURF_16_BANK) |
2512                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2514                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2515                 /* Displayable maps. */
2516                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2517                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2518                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2519                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2520                            NUM_BANKS(ADDR_SURF_16_BANK) |
2521                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2523                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2524                 /* Display 8bpp. */
2525                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2527                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2528                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2529                            NUM_BANKS(ADDR_SURF_16_BANK) |
2530                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2532                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2533                 /* Display 16bpp. */
2534                 tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2536                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2537                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2538                            NUM_BANKS(ADDR_SURF_16_BANK) |
2539                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2541                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2542                 /* Display 32bpp. */
2543                 tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2546                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2547                            NUM_BANKS(ADDR_SURF_16_BANK) |
2548                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2550                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2551                 /* Thin. */
2552                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2553                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2554                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2555                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2556                            NUM_BANKS(ADDR_SURF_16_BANK) |
2557                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2559                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2560                 /* Thin 8 bpp. */
2561                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2563                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2564                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2565                            NUM_BANKS(ADDR_SURF_16_BANK) |
2566                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2568                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2569                 /* Thin 16 bpp. */
2570                 tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2572                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2573                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2574                            NUM_BANKS(ADDR_SURF_16_BANK) |
2575                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2577                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2578                 /* Thin 32 bpp. */
2579                 tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2581                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2582                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2583                            NUM_BANKS(ADDR_SURF_16_BANK) |
2584                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2585                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2586                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2587                 /* Thin 64 bpp. */
2588                 tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2590                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2591                            TILE_SPLIT(split_equal_to_row_size) |
2592                            NUM_BANKS(ADDR_SURF_16_BANK) |
2593                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2595                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2596                 /* 8 bpp PRT. */
2597                 tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2599                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2600                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2601                            NUM_BANKS(ADDR_SURF_16_BANK) |
2602                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2603                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2604                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2605                 /* 16 bpp PRT */
2606                 tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2607                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2608                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2609                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2610                            NUM_BANKS(ADDR_SURF_16_BANK) |
2611                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2613                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2614                 /* 32 bpp PRT */
2615                 tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2617                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2618                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2619                            NUM_BANKS(ADDR_SURF_16_BANK) |
2620                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2622                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2623                 /* 64 bpp PRT */
2624                 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2626                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2627                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2628                            NUM_BANKS(ADDR_SURF_16_BANK) |
2629                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2631                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2632                 /* 128 bpp PRT */
2633                 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2634                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2635                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2636                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2637                            NUM_BANKS(ADDR_SURF_8_BANK) |
2638                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2640                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2641
2642                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2643                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2644                 break;
2645
2646         case CHIP_VERDE:
2647         case CHIP_OLAND:
2648         case CHIP_HAINAN:
2649                 /* non-AA compressed depth or any compressed stencil */
2650                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2651                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2652                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2653                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2654                            NUM_BANKS(ADDR_SURF_16_BANK) |
2655                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2657                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2658                 /* 2xAA/4xAA compressed depth only */
2659                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2661                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2662                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2663                            NUM_BANKS(ADDR_SURF_16_BANK) |
2664                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2666                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2667                 /* 8xAA compressed depth only */
2668                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2670                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2672                            NUM_BANKS(ADDR_SURF_16_BANK) |
2673                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2675                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2676                 /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2677                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2679                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2680                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2681                            NUM_BANKS(ADDR_SURF_16_BANK) |
2682                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2683                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2684                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2685                 /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2686                 tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2687                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2688                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2689                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2690                            NUM_BANKS(ADDR_SURF_16_BANK) |
2691                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2693                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2694                 /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2695                 tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2697                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2698                            TILE_SPLIT(split_equal_to_row_size) |
2699                            NUM_BANKS(ADDR_SURF_16_BANK) |
2700                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2702                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2703                 /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2704                 tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2706                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2707                            TILE_SPLIT(split_equal_to_row_size) |
2708                            NUM_BANKS(ADDR_SURF_16_BANK) |
2709                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2711                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2712                 /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2713                 tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2715                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2716                            TILE_SPLIT(split_equal_to_row_size) |
2717                            NUM_BANKS(ADDR_SURF_16_BANK) |
2718                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2720                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2721                 /* 1D and 1D Array Surfaces */
2722                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2723                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2724                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2725                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2726                            NUM_BANKS(ADDR_SURF_16_BANK) |
2727                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2729                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2730                 /* Displayable maps. */
2731                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2732                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2733                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2734                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2735                            NUM_BANKS(ADDR_SURF_16_BANK) |
2736                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2738                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2739                 /* Display 8bpp. */
2740                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2742                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2744                            NUM_BANKS(ADDR_SURF_16_BANK) |
2745                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2747                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2748                 /* Display 16bpp. */
2749                 tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2751                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2753                            NUM_BANKS(ADDR_SURF_16_BANK) |
2754                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2755                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2756                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2757                 /* Display 32bpp. */
2758                 tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2759                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2760                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2761                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2762                            NUM_BANKS(ADDR_SURF_16_BANK) |
2763                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2765                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2766                 /* Thin. */
2767                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2768                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2769                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2770                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2771                            NUM_BANKS(ADDR_SURF_16_BANK) |
2772                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2774                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2775                 /* Thin 8 bpp. */
2776                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2777                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2778                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2779                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2780                            NUM_BANKS(ADDR_SURF_16_BANK) |
2781                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2782                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2783                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2784                 /* Thin 16 bpp. */
2785                 tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2787                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2788                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2789                            NUM_BANKS(ADDR_SURF_16_BANK) |
2790                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2792                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2793                 /* Thin 32 bpp. */
2794                 tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2795                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2796                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2797                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2798                            NUM_BANKS(ADDR_SURF_16_BANK) |
2799                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2800                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2801                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2802                 /* Thin 64 bpp. */
2803                 tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2805                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806                            TILE_SPLIT(split_equal_to_row_size) |
2807                            NUM_BANKS(ADDR_SURF_16_BANK) |
2808                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2810                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2811                 /* 8 bpp PRT. */
2812                 tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2813                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2814                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2815                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2816                            NUM_BANKS(ADDR_SURF_16_BANK) |
2817                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2818                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2820                 /* 16 bpp PRT */
2821                 tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2823                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2824                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2825                            NUM_BANKS(ADDR_SURF_16_BANK) |
2826                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2828                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2829                 /* 32 bpp PRT */
2830                 tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2831                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2832                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2833                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2834                            NUM_BANKS(ADDR_SURF_16_BANK) |
2835                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2836                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2837                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2838                 /* 64 bpp PRT */
2839                 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2841                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2842                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2843                            NUM_BANKS(ADDR_SURF_16_BANK) |
2844                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2847                 /* 128 bpp PRT */
2848                 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2849                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2850                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2851                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2852                            NUM_BANKS(ADDR_SURF_8_BANK) |
2853                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2855                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2856
2857                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2858                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2859                 break;
2860
2861         default:
2862                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2863         }
2864 }
2865
2866 static void si_select_se_sh(struct radeon_device *rdev,
2867                             u32 se_num, u32 sh_num)
2868 {
2869         u32 data = INSTANCE_BROADCAST_WRITES;
2870
2871         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2872                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2873         else if (se_num == 0xffffffff)
2874                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2875         else if (sh_num == 0xffffffff)
2876                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2877         else
2878                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2879         WREG32(GRBM_GFX_INDEX, data);
2880 }
2881
2882 static u32 si_create_bitmask(u32 bit_width)
2883 {
2884         u32 i, mask = 0;
2885
2886         for (i = 0; i < bit_width; i++) {
2887                 mask <<= 1;
2888                 mask |= 1;
2889         }
2890         return mask;
2891 }
2892
2893 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2894 {
2895         u32 data, mask;
2896
2897         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2898         if (data & 1)
2899                 data &= INACTIVE_CUS_MASK;
2900         else
2901                 data = 0;
2902         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2903
2904         data >>= INACTIVE_CUS_SHIFT;
2905
2906         mask = si_create_bitmask(cu_per_sh);
2907
2908         return ~data & mask;
2909 }
2910
2911 static void si_setup_spi(struct radeon_device *rdev,
2912                          u32 se_num, u32 sh_per_se,
2913                          u32 cu_per_sh)
2914 {
2915         int i, j, k;
2916         u32 data, mask, active_cu;
2917
2918         for (i = 0; i < se_num; i++) {
2919                 for (j = 0; j < sh_per_se; j++) {
2920                         si_select_se_sh(rdev, i, j);
2921                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2922                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2923
2924                         mask = 1;
2925                         for (k = 0; k < 16; k++) {
2926                                 mask <<= k;
2927                                 if (active_cu & mask) {
2928                                         data &= ~mask;
2929                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2930                                         break;
2931                                 }
2932                         }
2933                 }
2934         }
2935         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2936 }
2937
2938 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2939                               u32 max_rb_num_per_se,
2940                               u32 sh_per_se)
2941 {
2942         u32 data, mask;
2943
2944         data = RREG32(CC_RB_BACKEND_DISABLE);
2945         if (data & 1)
2946                 data &= BACKEND_DISABLE_MASK;
2947         else
2948                 data = 0;
2949         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2950
2951         data >>= BACKEND_DISABLE_SHIFT;
2952
2953         mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
2954
2955         return data & mask;
2956 }
2957
2958 static void si_setup_rb(struct radeon_device *rdev,
2959                         u32 se_num, u32 sh_per_se,
2960                         u32 max_rb_num_per_se)
2961 {
2962         int i, j;
2963         u32 data, mask;
2964         u32 disabled_rbs = 0;
2965         u32 enabled_rbs = 0;
2966
2967         for (i = 0; i < se_num; i++) {
2968                 for (j = 0; j < sh_per_se; j++) {
2969                         si_select_se_sh(rdev, i, j);
2970                         data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
2971                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2972                 }
2973         }
2974         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2975
2976         mask = 1;
2977         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2978                 if (!(disabled_rbs & mask))
2979                         enabled_rbs |= mask;
2980                 mask <<= 1;
2981         }
2982
2983         rdev->config.si.backend_enable_mask = enabled_rbs;
2984
2985         for (i = 0; i < se_num; i++) {
2986                 si_select_se_sh(rdev, i, 0xffffffff);
2987                 data = 0;
2988                 for (j = 0; j < sh_per_se; j++) {
2989                         switch (enabled_rbs & 3) {
2990                         case 1:
2991                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2992                                 break;
2993                         case 2:
2994                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2995                                 break;
2996                         case 3:
2997                         default:
2998                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2999                                 break;
3000                         }
3001                         enabled_rbs >>= 2;
3002                 }
3003                 WREG32(PA_SC_RASTER_CONFIG, data);
3004         }
3005         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3006 }
3007
3008 static void si_gpu_init(struct radeon_device *rdev)
3009 {
3010         u32 gb_addr_config = 0;
3011         u32 mc_arb_ramcfg;
3012         u32 sx_debug_1;
3013         u32 hdp_host_path_cntl;
3014         u32 tmp;
3015         int i, j;
3016
3017         switch (rdev->family) {
3018         case CHIP_TAHITI:
3019                 rdev->config.si.max_shader_engines = 2;
3020                 rdev->config.si.max_tile_pipes = 12;
3021                 rdev->config.si.max_cu_per_sh = 8;
3022                 rdev->config.si.max_sh_per_se = 2;
3023                 rdev->config.si.max_backends_per_se = 4;
3024                 rdev->config.si.max_texture_channel_caches = 12;
3025                 rdev->config.si.max_gprs = 256;
3026                 rdev->config.si.max_gs_threads = 32;
3027                 rdev->config.si.max_hw_contexts = 8;
3028
3029                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3030                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3031                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3032                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3033                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3034                 break;
3035         case CHIP_PITCAIRN:
3036                 rdev->config.si.max_shader_engines = 2;
3037                 rdev->config.si.max_tile_pipes = 8;
3038                 rdev->config.si.max_cu_per_sh = 5;
3039                 rdev->config.si.max_sh_per_se = 2;
3040                 rdev->config.si.max_backends_per_se = 4;
3041                 rdev->config.si.max_texture_channel_caches = 8;
3042                 rdev->config.si.max_gprs = 256;
3043                 rdev->config.si.max_gs_threads = 32;
3044                 rdev->config.si.max_hw_contexts = 8;
3045
3046                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3047                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3048                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3049                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3050                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3051                 break;
3052         case CHIP_VERDE:
3053         default:
3054                 rdev->config.si.max_shader_engines = 1;
3055                 rdev->config.si.max_tile_pipes = 4;
3056                 rdev->config.si.max_cu_per_sh = 5;
3057                 rdev->config.si.max_sh_per_se = 2;
3058                 rdev->config.si.max_backends_per_se = 4;
3059                 rdev->config.si.max_texture_channel_caches = 4;
3060                 rdev->config.si.max_gprs = 256;
3061                 rdev->config.si.max_gs_threads = 32;
3062                 rdev->config.si.max_hw_contexts = 8;
3063
3064                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3065                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3066                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3067                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3068                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3069                 break;
3070         case CHIP_OLAND:
3071                 rdev->config.si.max_shader_engines = 1;
3072                 rdev->config.si.max_tile_pipes = 4;
3073                 rdev->config.si.max_cu_per_sh = 6;
3074                 rdev->config.si.max_sh_per_se = 1;
3075                 rdev->config.si.max_backends_per_se = 2;
3076                 rdev->config.si.max_texture_channel_caches = 4;
3077                 rdev->config.si.max_gprs = 256;
3078                 rdev->config.si.max_gs_threads = 16;
3079                 rdev->config.si.max_hw_contexts = 8;
3080
3081                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3082                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3083                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3084                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3085                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3086                 break;
3087         case CHIP_HAINAN:
3088                 rdev->config.si.max_shader_engines = 1;
3089                 rdev->config.si.max_tile_pipes = 4;
3090                 rdev->config.si.max_cu_per_sh = 5;
3091                 rdev->config.si.max_sh_per_se = 1;
3092                 rdev->config.si.max_backends_per_se = 1;
3093                 rdev->config.si.max_texture_channel_caches = 2;
3094                 rdev->config.si.max_gprs = 256;
3095                 rdev->config.si.max_gs_threads = 16;
3096                 rdev->config.si.max_hw_contexts = 8;
3097
3098                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3099                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3100                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3101                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3102                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3103                 break;
3104         }
3105
3106         /* Initialize HDP */
3107         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3108                 WREG32((0x2c14 + j), 0x00000000);
3109                 WREG32((0x2c18 + j), 0x00000000);
3110                 WREG32((0x2c1c + j), 0x00000000);
3111                 WREG32((0x2c20 + j), 0x00000000);
3112                 WREG32((0x2c24 + j), 0x00000000);
3113         }
3114
3115         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3116         WREG32(SRBM_INT_CNTL, 1);
3117         WREG32(SRBM_INT_ACK, 1);
3118
3119         evergreen_fix_pci_max_read_req_size(rdev);
3120
3121         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3122
3123         RREG32(MC_SHARED_CHMAP);
3124         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3125
3126         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3127         rdev->config.si.mem_max_burst_length_bytes = 256;
3128         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3129         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3130         if (rdev->config.si.mem_row_size_in_kb > 4)
3131                 rdev->config.si.mem_row_size_in_kb = 4;
3132         /* XXX use MC settings? */
3133         rdev->config.si.shader_engine_tile_size = 32;
3134         rdev->config.si.num_gpus = 1;
3135         rdev->config.si.multi_gpu_tile_size = 64;
3136
3137         /* fix up row size */
3138         gb_addr_config &= ~ROW_SIZE_MASK;
3139         switch (rdev->config.si.mem_row_size_in_kb) {
3140         case 1:
3141         default:
3142                 gb_addr_config |= ROW_SIZE(0);
3143                 break;
3144         case 2:
3145                 gb_addr_config |= ROW_SIZE(1);
3146                 break;
3147         case 4:
3148                 gb_addr_config |= ROW_SIZE(2);
3149                 break;
3150         }
3151
3152         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3153          * not have bank info, so create a custom tiling dword.
3154          * bits 3:0   num_pipes
3155          * bits 7:4   num_banks
3156          * bits 11:8  group_size
3157          * bits 15:12 row_size
3158          */
3159         rdev->config.si.tile_config = 0;
3160         switch (rdev->config.si.num_tile_pipes) {
3161         case 1:
3162                 rdev->config.si.tile_config |= (0 << 0);
3163                 break;
3164         case 2:
3165                 rdev->config.si.tile_config |= (1 << 0);
3166                 break;
3167         case 4:
3168                 rdev->config.si.tile_config |= (2 << 0);
3169                 break;
3170         case 8:
3171         default:
3172                 /* XXX what about 12? */
3173                 rdev->config.si.tile_config |= (3 << 0);
3174                 break;
3175         }
3176         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3177         case 0: /* four banks */
3178                 rdev->config.si.tile_config |= 0 << 4;
3179                 break;
3180         case 1: /* eight banks */
3181                 rdev->config.si.tile_config |= 1 << 4;
3182                 break;
3183         case 2: /* sixteen banks */
3184         default:
3185                 rdev->config.si.tile_config |= 2 << 4;
3186                 break;
3187         }
3188         rdev->config.si.tile_config |=
3189                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3190         rdev->config.si.tile_config |=
3191                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3192
3193         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3194         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3195         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3196         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3197         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3198         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3199         if (rdev->has_uvd) {
3200                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3201                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3202                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3203         }
3204
3205         si_tiling_mode_table_init(rdev);
3206
3207         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3208                     rdev->config.si.max_sh_per_se,
3209                     rdev->config.si.max_backends_per_se);
3210
3211         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3212                      rdev->config.si.max_sh_per_se,
3213                      rdev->config.si.max_cu_per_sh);
3214
3215         rdev->config.si.active_cus = 0;
3216         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3217                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3218                         rdev->config.si.active_cus +=
3219                                 hweight32(si_get_cu_active_bitmap(rdev, i, j));
3220                 }
3221         }
3222
3223         /* set HW defaults for 3D engine */
3224         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3225                                      ROQ_IB2_START(0x2b)));
3226         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3227
3228         sx_debug_1 = RREG32(SX_DEBUG_1);
3229         WREG32(SX_DEBUG_1, sx_debug_1);
3230
3231         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3232
3233         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3234                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3235                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3236                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3237
3238         WREG32(VGT_NUM_INSTANCES, 1);
3239
3240         WREG32(CP_PERFMON_CNTL, 0);
3241
3242         WREG32(SQ_CONFIG, 0);
3243
3244         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3245                                           FORCE_EOV_MAX_REZ_CNT(255)));
3246
3247         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3248                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3249
3250         WREG32(VGT_GS_VERTEX_REUSE, 16);
3251         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3252
3253         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3254         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3255         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3256         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3257         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3258         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3259         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3260         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3261
3262         tmp = RREG32(HDP_MISC_CNTL);
3263         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3264         WREG32(HDP_MISC_CNTL, tmp);
3265
3266         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3267         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3268
3269         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3270
3271         udelay(50);
3272 }
3273
3274 /*
3275  * GPU scratch registers helpers function.
3276  */
3277 static void si_scratch_init(struct radeon_device *rdev)
3278 {
3279         int i;
3280
3281         rdev->scratch.num_reg = 7;
3282         rdev->scratch.reg_base = SCRATCH_REG0;
3283         for (i = 0; i < rdev->scratch.num_reg; i++) {
3284                 rdev->scratch.free[i] = true;
3285                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3286         }
3287 }
3288
3289 void si_fence_ring_emit(struct radeon_device *rdev,
3290                         struct radeon_fence *fence)
3291 {
3292         struct radeon_ring *ring = &rdev->ring[fence->ring];
3293         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3294
3295         /* flush read cache over gart */
3296         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3297         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3298         radeon_ring_write(ring, 0);
3299         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3300         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3301                           PACKET3_TC_ACTION_ENA |
3302                           PACKET3_SH_KCACHE_ACTION_ENA |
3303                           PACKET3_SH_ICACHE_ACTION_ENA);
3304         radeon_ring_write(ring, 0xFFFFFFFF);
3305         radeon_ring_write(ring, 0);
3306         radeon_ring_write(ring, 10); /* poll interval */
3307         /* EVENT_WRITE_EOP - flush caches, send int */
3308         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3309         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3310         radeon_ring_write(ring, lower_32_bits(addr));
3311         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3312         radeon_ring_write(ring, fence->seq);
3313         radeon_ring_write(ring, 0);
3314 }
3315
3316 /*
3317  * IB stuff
3318  */
3319 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3320 {
3321         struct radeon_ring *ring = &rdev->ring[ib->ring];
3322         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3323         u32 header;
3324
3325         if (ib->is_const_ib) {
3326                 /* set switch buffer packet before const IB */
3327                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3328                 radeon_ring_write(ring, 0);
3329
3330                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3331         } else {
3332                 u32 next_rptr;
3333                 if (ring->rptr_save_reg) {
3334                         next_rptr = ring->wptr + 3 + 4 + 8;
3335                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3336                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3337                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3338                         radeon_ring_write(ring, next_rptr);
3339                 } else if (rdev->wb.enabled) {
3340                         next_rptr = ring->wptr + 5 + 4 + 8;
3341                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3342                         radeon_ring_write(ring, (1 << 8));
3343                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3344                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3345                         radeon_ring_write(ring, next_rptr);
3346                 }
3347
3348                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3349         }
3350
3351         radeon_ring_write(ring, header);
3352         radeon_ring_write(ring,
3353 #ifdef __BIG_ENDIAN
3354                           (2 << 0) |
3355 #endif
3356                           (ib->gpu_addr & 0xFFFFFFFC));
3357         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3358         radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3359
3360         if (!ib->is_const_ib) {
3361                 /* flush read cache over gart for this vmid */
3362                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3363                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3364                 radeon_ring_write(ring, vm_id);
3365                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3366                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3367                                   PACKET3_TC_ACTION_ENA |
3368                                   PACKET3_SH_KCACHE_ACTION_ENA |
3369                                   PACKET3_SH_ICACHE_ACTION_ENA);
3370                 radeon_ring_write(ring, 0xFFFFFFFF);
3371                 radeon_ring_write(ring, 0);
3372                 radeon_ring_write(ring, 10); /* poll interval */
3373         }
3374 }
3375
3376 /*
3377  * CP.
3378  */
3379 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3380 {
3381         if (enable)
3382                 WREG32(CP_ME_CNTL, 0);
3383         else {
3384                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3385                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3386                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3387                 WREG32(SCRATCH_UMSK, 0);
3388                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3389                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3390                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3391         }
3392         udelay(50);
3393 }
3394
3395 static int si_cp_load_microcode(struct radeon_device *rdev)
3396 {
3397         int i;
3398
3399         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3400                 return -EINVAL;
3401
3402         si_cp_enable(rdev, false);
3403
3404         if (rdev->new_fw) {
3405                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3406                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3407                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3408                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3409                 const struct gfx_firmware_header_v1_0 *me_hdr =
3410                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3411                 const __le32 *fw_data;
3412                 u32 fw_size;
3413
3414                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3415                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3416                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3417
3418                 /* PFP */
3419                 fw_data = (const __le32 *)
3420                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3421                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3422                 WREG32(CP_PFP_UCODE_ADDR, 0);
3423                 for (i = 0; i < fw_size; i++)
3424                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3425                 WREG32(CP_PFP_UCODE_ADDR, 0);
3426
3427                 /* CE */
3428                 fw_data = (const __le32 *)
3429                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3430                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3431                 WREG32(CP_CE_UCODE_ADDR, 0);
3432                 for (i = 0; i < fw_size; i++)
3433                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3434                 WREG32(CP_CE_UCODE_ADDR, 0);
3435
3436                 /* ME */
3437                 fw_data = (const __be32 *)
3438                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3439                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3440                 WREG32(CP_ME_RAM_WADDR, 0);
3441                 for (i = 0; i < fw_size; i++)
3442                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3443                 WREG32(CP_ME_RAM_WADDR, 0);
3444         } else {
3445                 const __be32 *fw_data;
3446
3447                 /* PFP */
3448                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3449                 WREG32(CP_PFP_UCODE_ADDR, 0);
3450                 for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3451                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3452                 WREG32(CP_PFP_UCODE_ADDR, 0);
3453
3454                 /* CE */
3455                 fw_data = (const __be32 *)rdev->ce_fw->data;
3456                 WREG32(CP_CE_UCODE_ADDR, 0);
3457                 for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3458                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3459                 WREG32(CP_CE_UCODE_ADDR, 0);
3460
3461                 /* ME */
3462                 fw_data = (const __be32 *)rdev->me_fw->data;
3463                 WREG32(CP_ME_RAM_WADDR, 0);
3464                 for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3465                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3466                 WREG32(CP_ME_RAM_WADDR, 0);
3467         }
3468
3469         WREG32(CP_PFP_UCODE_ADDR, 0);
3470         WREG32(CP_CE_UCODE_ADDR, 0);
3471         WREG32(CP_ME_RAM_WADDR, 0);
3472         WREG32(CP_ME_RAM_RADDR, 0);
3473         return 0;
3474 }
3475
3476 static int si_cp_start(struct radeon_device *rdev)
3477 {
3478         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3479         int r, i;
3480
3481         r = radeon_ring_lock(rdev, ring, 7 + 4);
3482         if (r) {
3483                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3484                 return r;
3485         }
3486         /* init the CP */
3487         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3488         radeon_ring_write(ring, 0x1);
3489         radeon_ring_write(ring, 0x0);
3490         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3491         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3492         radeon_ring_write(ring, 0);
3493         radeon_ring_write(ring, 0);
3494
3495         /* init the CE partitions */
3496         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3497         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3498         radeon_ring_write(ring, 0xc000);
3499         radeon_ring_write(ring, 0xe000);
3500         radeon_ring_unlock_commit(rdev, ring, false);
3501
3502         si_cp_enable(rdev, true);
3503
3504         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3505         if (r) {
3506                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3507                 return r;
3508         }
3509
3510         /* setup clear context state */
3511         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3512         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3513
3514         for (i = 0; i < si_default_size; i++)
3515                 radeon_ring_write(ring, si_default_state[i]);
3516
3517         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3518         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3519
3520         /* set clear context state */
3521         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3522         radeon_ring_write(ring, 0);
3523
3524         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3525         radeon_ring_write(ring, 0x00000316);
3526         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3527         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3528
3529         radeon_ring_unlock_commit(rdev, ring, false);
3530
3531         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3532                 ring = &rdev->ring[i];
3533                 r = radeon_ring_lock(rdev, ring, 2);
3534
3535                 /* clear the compute context state */
3536                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3537                 radeon_ring_write(ring, 0);
3538
3539                 radeon_ring_unlock_commit(rdev, ring, false);
3540         }
3541
3542         return 0;
3543 }
3544
3545 static void si_cp_fini(struct radeon_device *rdev)
3546 {
3547         struct radeon_ring *ring;
3548         si_cp_enable(rdev, false);
3549
3550         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3551         radeon_ring_fini(rdev, ring);
3552         radeon_scratch_free(rdev, ring->rptr_save_reg);
3553
3554         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3555         radeon_ring_fini(rdev, ring);
3556         radeon_scratch_free(rdev, ring->rptr_save_reg);
3557
3558         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3559         radeon_ring_fini(rdev, ring);
3560         radeon_scratch_free(rdev, ring->rptr_save_reg);
3561 }
3562
3563 static int si_cp_resume(struct radeon_device *rdev)
3564 {
3565         struct radeon_ring *ring;
3566         u32 tmp;
3567         u32 rb_bufsz;
3568         int r;
3569
3570         si_enable_gui_idle_interrupt(rdev, false);
3571
3572         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3573         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3574
3575         /* Set the write pointer delay */
3576         WREG32(CP_RB_WPTR_DELAY, 0);
3577
3578         WREG32(CP_DEBUG, 0);
3579         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3580
3581         /* ring 0 - compute and gfx */
3582         /* Set ring buffer size */
3583         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3584         rb_bufsz = order_base_2(ring->ring_size / 8);
3585         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3586 #ifdef __BIG_ENDIAN
3587         tmp |= BUF_SWAP_32BIT;
3588 #endif
3589         WREG32(CP_RB0_CNTL, tmp);
3590
3591         /* Initialize the ring buffer's read and write pointers */
3592         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3593         ring->wptr = 0;
3594         WREG32(CP_RB0_WPTR, ring->wptr);
3595
3596         /* set the wb address whether it's enabled or not */
3597         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3598         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3599
3600         if (rdev->wb.enabled)
3601                 WREG32(SCRATCH_UMSK, 0xff);
3602         else {
3603                 tmp |= RB_NO_UPDATE;
3604                 WREG32(SCRATCH_UMSK, 0);
3605         }
3606
3607         mdelay(1);
3608         WREG32(CP_RB0_CNTL, tmp);
3609
3610         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3611
3612         /* ring1  - compute only */
3613         /* Set ring buffer size */
3614         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3615         rb_bufsz = order_base_2(ring->ring_size / 8);
3616         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3617 #ifdef __BIG_ENDIAN
3618         tmp |= BUF_SWAP_32BIT;
3619 #endif
3620         WREG32(CP_RB1_CNTL, tmp);
3621
3622         /* Initialize the ring buffer's read and write pointers */
3623         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3624         ring->wptr = 0;
3625         WREG32(CP_RB1_WPTR, ring->wptr);
3626
3627         /* set the wb address whether it's enabled or not */
3628         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3629         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3630
3631         mdelay(1);
3632         WREG32(CP_RB1_CNTL, tmp);
3633
3634         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3635
3636         /* ring2 - compute only */
3637         /* Set ring buffer size */
3638         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3639         rb_bufsz = order_base_2(ring->ring_size / 8);
3640         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3641 #ifdef __BIG_ENDIAN
3642         tmp |= BUF_SWAP_32BIT;
3643 #endif
3644         WREG32(CP_RB2_CNTL, tmp);
3645
3646         /* Initialize the ring buffer's read and write pointers */
3647         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3648         ring->wptr = 0;
3649         WREG32(CP_RB2_WPTR, ring->wptr);
3650
3651         /* set the wb address whether it's enabled or not */
3652         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3653         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3654
3655         mdelay(1);
3656         WREG32(CP_RB2_CNTL, tmp);
3657
3658         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3659
3660         /* start the rings */
3661         si_cp_start(rdev);
3662         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3663         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3664         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3665         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3666         if (r) {
3667                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3668                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3669                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3670                 return r;
3671         }
3672         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3673         if (r) {
3674                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3675         }
3676         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3677         if (r) {
3678                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3679         }
3680
3681         si_enable_gui_idle_interrupt(rdev, true);
3682
3683         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3684                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3685
3686         return 0;
3687 }
3688
3689 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3690 {
3691         u32 reset_mask = 0;
3692         u32 tmp;
3693
3694         /* GRBM_STATUS */
3695         tmp = RREG32(GRBM_STATUS);
3696         if (tmp & (PA_BUSY | SC_BUSY |
3697                    BCI_BUSY | SX_BUSY |
3698                    TA_BUSY | VGT_BUSY |
3699                    DB_BUSY | CB_BUSY |
3700                    GDS_BUSY | SPI_BUSY |
3701                    IA_BUSY | IA_BUSY_NO_DMA))
3702                 reset_mask |= RADEON_RESET_GFX;
3703
3704         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3705                    CP_BUSY | CP_COHERENCY_BUSY))
3706                 reset_mask |= RADEON_RESET_CP;
3707
3708         if (tmp & GRBM_EE_BUSY)
3709                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3710
3711         /* GRBM_STATUS2 */
3712         tmp = RREG32(GRBM_STATUS2);
3713         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3714                 reset_mask |= RADEON_RESET_RLC;
3715
3716         /* DMA_STATUS_REG 0 */
3717         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3718         if (!(tmp & DMA_IDLE))
3719                 reset_mask |= RADEON_RESET_DMA;
3720
3721         /* DMA_STATUS_REG 1 */
3722         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3723         if (!(tmp & DMA_IDLE))
3724                 reset_mask |= RADEON_RESET_DMA1;
3725
3726         /* SRBM_STATUS2 */
3727         tmp = RREG32(SRBM_STATUS2);
3728         if (tmp & DMA_BUSY)
3729                 reset_mask |= RADEON_RESET_DMA;
3730
3731         if (tmp & DMA1_BUSY)
3732                 reset_mask |= RADEON_RESET_DMA1;
3733
3734         /* SRBM_STATUS */
3735         tmp = RREG32(SRBM_STATUS);
3736
3737         if (tmp & IH_BUSY)
3738                 reset_mask |= RADEON_RESET_IH;
3739
3740         if (tmp & SEM_BUSY)
3741                 reset_mask |= RADEON_RESET_SEM;
3742
3743         if (tmp & GRBM_RQ_PENDING)
3744                 reset_mask |= RADEON_RESET_GRBM;
3745
3746         if (tmp & VMC_BUSY)
3747                 reset_mask |= RADEON_RESET_VMC;
3748
3749         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3750                    MCC_BUSY | MCD_BUSY))
3751                 reset_mask |= RADEON_RESET_MC;
3752
3753         if (evergreen_is_display_hung(rdev))
3754                 reset_mask |= RADEON_RESET_DISPLAY;
3755
3756         /* VM_L2_STATUS */
3757         tmp = RREG32(VM_L2_STATUS);
3758         if (tmp & L2_BUSY)
3759                 reset_mask |= RADEON_RESET_VMC;
3760
3761         /* Skip MC reset as it's mostly likely not hung, just busy */
3762         if (reset_mask & RADEON_RESET_MC) {
3763                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3764                 reset_mask &= ~RADEON_RESET_MC;
3765         }
3766
3767         return reset_mask;
3768 }
3769
3770 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3771 {
3772         struct evergreen_mc_save save;
3773         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3774         u32 tmp;
3775
3776         if (reset_mask == 0)
3777                 return;
3778
3779         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3780
3781         evergreen_print_gpu_status_regs(rdev);
3782         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3783                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3784         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3785                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3786
3787         /* disable PG/CG */
3788         si_fini_pg(rdev);
3789         si_fini_cg(rdev);
3790
3791         /* stop the rlc */
3792         si_rlc_stop(rdev);
3793
3794         /* Disable CP parsing/prefetching */
3795         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3796
3797         if (reset_mask & RADEON_RESET_DMA) {
3798                 /* dma0 */
3799                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3800                 tmp &= ~DMA_RB_ENABLE;
3801                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3802         }
3803         if (reset_mask & RADEON_RESET_DMA1) {
3804                 /* dma1 */
3805                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3806                 tmp &= ~DMA_RB_ENABLE;
3807                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3808         }
3809
3810         udelay(50);
3811
3812         evergreen_mc_stop(rdev, &save);
3813         if (evergreen_mc_wait_for_idle(rdev)) {
3814                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3815         }
3816
3817         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3818                 grbm_soft_reset = SOFT_RESET_CB |
3819                         SOFT_RESET_DB |
3820                         SOFT_RESET_GDS |
3821                         SOFT_RESET_PA |
3822                         SOFT_RESET_SC |
3823                         SOFT_RESET_BCI |
3824                         SOFT_RESET_SPI |
3825                         SOFT_RESET_SX |
3826                         SOFT_RESET_TC |
3827                         SOFT_RESET_TA |
3828                         SOFT_RESET_VGT |
3829                         SOFT_RESET_IA;
3830         }
3831
3832         if (reset_mask & RADEON_RESET_CP) {
3833                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3834
3835                 srbm_soft_reset |= SOFT_RESET_GRBM;
3836         }
3837
3838         if (reset_mask & RADEON_RESET_DMA)
3839                 srbm_soft_reset |= SOFT_RESET_DMA;
3840
3841         if (reset_mask & RADEON_RESET_DMA1)
3842                 srbm_soft_reset |= SOFT_RESET_DMA1;
3843
3844         if (reset_mask & RADEON_RESET_DISPLAY)
3845                 srbm_soft_reset |= SOFT_RESET_DC;
3846
3847         if (reset_mask & RADEON_RESET_RLC)
3848                 grbm_soft_reset |= SOFT_RESET_RLC;
3849
3850         if (reset_mask & RADEON_RESET_SEM)
3851                 srbm_soft_reset |= SOFT_RESET_SEM;
3852
3853         if (reset_mask & RADEON_RESET_IH)
3854                 srbm_soft_reset |= SOFT_RESET_IH;
3855
3856         if (reset_mask & RADEON_RESET_GRBM)
3857                 srbm_soft_reset |= SOFT_RESET_GRBM;
3858
3859         if (reset_mask & RADEON_RESET_VMC)
3860                 srbm_soft_reset |= SOFT_RESET_VMC;
3861
3862         if (reset_mask & RADEON_RESET_MC)
3863                 srbm_soft_reset |= SOFT_RESET_MC;
3864
3865         if (grbm_soft_reset) {
3866                 tmp = RREG32(GRBM_SOFT_RESET);
3867                 tmp |= grbm_soft_reset;
3868                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3869                 WREG32(GRBM_SOFT_RESET, tmp);
3870                 tmp = RREG32(GRBM_SOFT_RESET);
3871
3872                 udelay(50);
3873
3874                 tmp &= ~grbm_soft_reset;
3875                 WREG32(GRBM_SOFT_RESET, tmp);
3876                 tmp = RREG32(GRBM_SOFT_RESET);
3877         }
3878
3879         if (srbm_soft_reset) {
3880                 tmp = RREG32(SRBM_SOFT_RESET);
3881                 tmp |= srbm_soft_reset;
3882                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3883                 WREG32(SRBM_SOFT_RESET, tmp);
3884                 tmp = RREG32(SRBM_SOFT_RESET);
3885
3886                 udelay(50);
3887
3888                 tmp &= ~srbm_soft_reset;
3889                 WREG32(SRBM_SOFT_RESET, tmp);
3890                 tmp = RREG32(SRBM_SOFT_RESET);
3891         }
3892
3893         /* Wait a little for things to settle down */
3894         udelay(50);
3895
3896         evergreen_mc_resume(rdev, &save);
3897         udelay(50);
3898
3899         evergreen_print_gpu_status_regs(rdev);
3900 }
3901
3902 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3903 {
3904         u32 tmp, i;
3905
3906         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3907         tmp |= SPLL_BYPASS_EN;
3908         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3909
3910         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3911         tmp |= SPLL_CTLREQ_CHG;
3912         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3913
3914         for (i = 0; i < rdev->usec_timeout; i++) {
3915                 if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3916                         break;
3917                 udelay(1);
3918         }
3919
3920         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3921         tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3922         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3923
3924         tmp = RREG32(MPLL_CNTL_MODE);
3925         tmp &= ~MPLL_MCLK_SEL;
3926         WREG32(MPLL_CNTL_MODE, tmp);
3927 }
3928
3929 static void si_spll_powerdown(struct radeon_device *rdev)
3930 {
3931         u32 tmp;
3932
3933         tmp = RREG32(SPLL_CNTL_MODE);
3934         tmp |= SPLL_SW_DIR_CONTROL;
3935         WREG32(SPLL_CNTL_MODE, tmp);
3936
3937         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3938         tmp |= SPLL_RESET;
3939         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3940
3941         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3942         tmp |= SPLL_SLEEP;
3943         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3944
3945         tmp = RREG32(SPLL_CNTL_MODE);
3946         tmp &= ~SPLL_SW_DIR_CONTROL;
3947         WREG32(SPLL_CNTL_MODE, tmp);
3948 }
3949
3950 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
3951 {
3952         struct evergreen_mc_save save;
3953         u32 tmp, i;
3954
3955         dev_info(rdev->dev, "GPU pci config reset\n");
3956
3957         /* disable dpm? */
3958
3959         /* disable cg/pg */
3960         si_fini_pg(rdev);
3961         si_fini_cg(rdev);
3962
3963         /* Disable CP parsing/prefetching */
3964         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3965         /* dma0 */
3966         tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3967         tmp &= ~DMA_RB_ENABLE;
3968         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3969         /* dma1 */
3970         tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3971         tmp &= ~DMA_RB_ENABLE;
3972         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3973         /* XXX other engines? */
3974
3975         /* halt the rlc, disable cp internal ints */
3976         si_rlc_stop(rdev);
3977
3978         udelay(50);
3979
3980         /* disable mem access */
3981         evergreen_mc_stop(rdev, &save);
3982         if (evergreen_mc_wait_for_idle(rdev)) {
3983                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
3984         }
3985
3986         /* set mclk/sclk to bypass */
3987         si_set_clk_bypass_mode(rdev);
3988         /* powerdown spll */
3989         si_spll_powerdown(rdev);
3990         /* disable BM */
3991         pci_clear_master(rdev->pdev);
3992         /* reset */
3993         radeon_pci_config_reset(rdev);
3994         /* wait for asic to come out of reset */
3995         for (i = 0; i < rdev->usec_timeout; i++) {
3996                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
3997                         break;
3998                 udelay(1);
3999         }
4000 }
4001
4002 int si_asic_reset(struct radeon_device *rdev, bool hard)
4003 {
4004         u32 reset_mask;
4005
4006         if (hard) {
4007                 si_gpu_pci_config_reset(rdev);
4008                 return 0;
4009         }
4010
4011         reset_mask = si_gpu_check_soft_reset(rdev);
4012
4013         if (reset_mask)
4014                 r600_set_bios_scratch_engine_hung(rdev, true);
4015
4016         /* try soft reset */
4017         si_gpu_soft_reset(rdev, reset_mask);
4018
4019         reset_mask = si_gpu_check_soft_reset(rdev);
4020
4021         /* try pci config reset */
4022         if (reset_mask && radeon_hard_reset)
4023                 si_gpu_pci_config_reset(rdev);
4024
4025         reset_mask = si_gpu_check_soft_reset(rdev);
4026
4027         if (!reset_mask)
4028                 r600_set_bios_scratch_engine_hung(rdev, false);
4029
4030         return 0;
4031 }
4032
4033 /**
4034  * si_gfx_is_lockup - Check if the GFX engine is locked up
4035  *
4036  * @rdev: radeon_device pointer
4037  * @ring: radeon_ring structure holding ring information
4038  *
4039  * Check if the GFX engine is locked up.
4040  * Returns true if the engine appears to be locked up, false if not.
4041  */
4042 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4043 {
4044         u32 reset_mask = si_gpu_check_soft_reset(rdev);
4045
4046         if (!(reset_mask & (RADEON_RESET_GFX |
4047                             RADEON_RESET_COMPUTE |
4048                             RADEON_RESET_CP))) {
4049                 radeon_ring_lockup_update(rdev, ring);
4050                 return false;
4051         }
4052         return radeon_ring_test_lockup(rdev, ring);
4053 }
4054
4055 /* MC */
4056 static void si_mc_program(struct radeon_device *rdev)
4057 {
4058         struct evergreen_mc_save save;
4059         u32 tmp;
4060         int i, j;
4061
4062         /* Initialize HDP */
4063         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4064                 WREG32((0x2c14 + j), 0x00000000);
4065                 WREG32((0x2c18 + j), 0x00000000);
4066                 WREG32((0x2c1c + j), 0x00000000);
4067                 WREG32((0x2c20 + j), 0x00000000);
4068                 WREG32((0x2c24 + j), 0x00000000);
4069         }
4070         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4071
4072         evergreen_mc_stop(rdev, &save);
4073         if (radeon_mc_wait_for_idle(rdev)) {
4074                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4075         }
4076         if (!ASIC_IS_NODCE(rdev))
4077                 /* Lockout access through VGA aperture*/
4078                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4079         /* Update configuration */
4080         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4081                rdev->mc.vram_start >> 12);
4082         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4083                rdev->mc.vram_end >> 12);
4084         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4085                rdev->vram_scratch.gpu_addr >> 12);
4086         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4087         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4088         WREG32(MC_VM_FB_LOCATION, tmp);
4089         /* XXX double check these! */
4090         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4091         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4092         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4093         WREG32(MC_VM_AGP_BASE, 0);
4094         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4095         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4096         if (radeon_mc_wait_for_idle(rdev)) {
4097                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4098         }
4099         evergreen_mc_resume(rdev, &save);
4100         if (!ASIC_IS_NODCE(rdev)) {
4101                 /* we need to own VRAM, so turn off the VGA renderer here
4102                  * to stop it overwriting our objects */
4103                 rv515_vga_render_disable(rdev);
4104         }
4105 }
4106
4107 void si_vram_gtt_location(struct radeon_device *rdev,
4108                           struct radeon_mc *mc)
4109 {
4110         if (mc->mc_vram_size > 0xFFC0000000ULL) {
4111                 /* leave room for at least 1024M GTT */
4112                 dev_warn(rdev->dev, "limiting VRAM\n");
4113                 mc->real_vram_size = 0xFFC0000000ULL;
4114                 mc->mc_vram_size = 0xFFC0000000ULL;
4115         }
4116         radeon_vram_location(rdev, &rdev->mc, 0);
4117         rdev->mc.gtt_base_align = 0;
4118         radeon_gtt_location(rdev, mc);
4119 }
4120
4121 static int si_mc_init(struct radeon_device *rdev)
4122 {
4123         u32 tmp;
4124         int chansize, numchan;
4125
4126         /* Get VRAM informations */
4127         rdev->mc.vram_is_ddr = true;
4128         tmp = RREG32(MC_ARB_RAMCFG);
4129         if (tmp & CHANSIZE_OVERRIDE) {
4130                 chansize = 16;
4131         } else if (tmp & CHANSIZE_MASK) {
4132                 chansize = 64;
4133         } else {
4134                 chansize = 32;
4135         }
4136         tmp = RREG32(MC_SHARED_CHMAP);
4137         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4138         case 0:
4139         default:
4140                 numchan = 1;
4141                 break;
4142         case 1:
4143                 numchan = 2;
4144                 break;
4145         case 2:
4146                 numchan = 4;
4147                 break;
4148         case 3:
4149                 numchan = 8;
4150                 break;
4151         case 4:
4152                 numchan = 3;
4153                 break;
4154         case 5:
4155                 numchan = 6;
4156                 break;
4157         case 6:
4158                 numchan = 10;
4159                 break;
4160         case 7:
4161                 numchan = 12;
4162                 break;
4163         case 8:
4164                 numchan = 16;
4165                 break;
4166         }
4167         rdev->mc.vram_width = numchan * chansize;
4168         /* Could aper size report 0 ? */
4169         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4170         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4171         /* size in MB on si */
4172         tmp = RREG32(CONFIG_MEMSIZE);
4173         /* some boards may have garbage in the upper 16 bits */
4174         if (tmp & 0xffff0000) {
4175                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4176                 if (tmp & 0xffff)
4177                         tmp &= 0xffff;
4178         }
4179         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4180         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4181         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4182         si_vram_gtt_location(rdev, &rdev->mc);
4183         radeon_update_bandwidth_info(rdev);
4184
4185         return 0;
4186 }
4187
4188 /*
4189  * GART
4190  */
4191 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4192 {
4193         /* flush hdp cache */
4194         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4195
4196         /* bits 0-15 are the VM contexts0-15 */
4197         WREG32(VM_INVALIDATE_REQUEST, 1);
4198 }
4199
4200 static int si_pcie_gart_enable(struct radeon_device *rdev)
4201 {
4202         int r, i;
4203
4204         if (rdev->gart.robj == NULL) {
4205                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4206                 return -EINVAL;
4207         }
4208         r = radeon_gart_table_vram_pin(rdev);
4209         if (r)
4210                 return r;
4211         /* Setup TLB control */
4212         WREG32(MC_VM_MX_L1_TLB_CNTL,
4213                (0xA << 7) |
4214                ENABLE_L1_TLB |
4215                ENABLE_L1_FRAGMENT_PROCESSING |
4216                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4217                ENABLE_ADVANCED_DRIVER_MODEL |
4218                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4219         /* Setup L2 cache */
4220         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4221                ENABLE_L2_FRAGMENT_PROCESSING |
4222                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4223                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4224                EFFECTIVE_L2_QUEUE_SIZE(7) |
4225                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4226         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4227         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4228                BANK_SELECT(4) |
4229                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4230         /* setup context0 */
4231         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4232         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4233         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4234         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4235                         (u32)(rdev->dummy_page.addr >> 12));
4236         WREG32(VM_CONTEXT0_CNTL2, 0);
4237         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4238                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4239
4240         WREG32(0x15D4, 0);
4241         WREG32(0x15D8, 0);
4242         WREG32(0x15DC, 0);
4243
4244         /* empty context1-15 */
4245         /* set vm size, must be a multiple of 4 */
4246         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4247         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4248         /* Assign the pt base to something valid for now; the pts used for
4249          * the VMs are determined by the application and setup and assigned
4250          * on the fly in the vm part of radeon_gart.c
4251          */
4252         for (i = 1; i < 16; i++) {
4253                 if (i < 8)
4254                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4255                                rdev->vm_manager.saved_table_addr[i]);
4256                 else
4257                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4258                                rdev->vm_manager.saved_table_addr[i]);
4259         }
4260
4261         /* enable context1-15 */
4262         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4263                (u32)(rdev->dummy_page.addr >> 12));
4264         WREG32(VM_CONTEXT1_CNTL2, 4);
4265         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4266                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4267                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4268                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4269                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4270                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4271                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4272                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4273                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4274                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4275                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4276                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4277                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4278                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4279
4280         si_pcie_gart_tlb_flush(rdev);
4281         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4282                  (unsigned)(rdev->mc.gtt_size >> 20),
4283                  (unsigned long long)rdev->gart.table_addr);
4284         rdev->gart.ready = true;
4285         return 0;
4286 }
4287
4288 static void si_pcie_gart_disable(struct radeon_device *rdev)
4289 {
4290         unsigned i;
4291
4292         for (i = 1; i < 16; ++i) {
4293                 uint32_t reg;
4294                 if (i < 8)
4295                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4296                 else
4297                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4298                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4299         }
4300
4301         /* Disable all tables */
4302         WREG32(VM_CONTEXT0_CNTL, 0);
4303         WREG32(VM_CONTEXT1_CNTL, 0);
4304         /* Setup TLB control */
4305         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4306                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4307         /* Setup L2 cache */
4308         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4309                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4310                EFFECTIVE_L2_QUEUE_SIZE(7) |
4311                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4312         WREG32(VM_L2_CNTL2, 0);
4313         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4314                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4315         radeon_gart_table_vram_unpin(rdev);
4316 }
4317
4318 static void si_pcie_gart_fini(struct radeon_device *rdev)
4319 {
4320         si_pcie_gart_disable(rdev);
4321         radeon_gart_table_vram_free(rdev);
4322         radeon_gart_fini(rdev);
4323 }
4324
4325 /* vm parser */
4326 static bool si_vm_reg_valid(u32 reg)
4327 {
4328         /* context regs are fine */
4329         if (reg >= 0x28000)
4330                 return true;
4331
4332         /* shader regs are also fine */
4333         if (reg >= 0xB000 && reg < 0xC000)
4334                 return true;
4335
4336         /* check config regs */
4337         switch (reg) {
4338         case GRBM_GFX_INDEX:
4339         case CP_STRMOUT_CNTL:
4340         case VGT_VTX_VECT_EJECT_REG:
4341         case VGT_CACHE_INVALIDATION:
4342         case VGT_ESGS_RING_SIZE:
4343         case VGT_GSVS_RING_SIZE:
4344         case VGT_GS_VERTEX_REUSE:
4345         case VGT_PRIMITIVE_TYPE:
4346         case VGT_INDEX_TYPE:
4347         case VGT_NUM_INDICES:
4348         case VGT_NUM_INSTANCES:
4349         case VGT_TF_RING_SIZE:
4350         case VGT_HS_OFFCHIP_PARAM:
4351         case VGT_TF_MEMORY_BASE:
4352         case PA_CL_ENHANCE:
4353         case PA_SU_LINE_STIPPLE_VALUE:
4354         case PA_SC_LINE_STIPPLE_STATE:
4355         case PA_SC_ENHANCE:
4356         case SQC_CACHES:
4357         case SPI_STATIC_THREAD_MGMT_1:
4358         case SPI_STATIC_THREAD_MGMT_2:
4359         case SPI_STATIC_THREAD_MGMT_3:
4360         case SPI_PS_MAX_WAVE_ID:
4361         case SPI_CONFIG_CNTL:
4362         case SPI_CONFIG_CNTL_1:
4363         case TA_CNTL_AUX:
4364         case TA_CS_BC_BASE_ADDR:
4365                 return true;
4366         default:
4367                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4368                 return false;
4369         }
4370 }
4371
4372 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4373                                   u32 *ib, struct radeon_cs_packet *pkt)
4374 {
4375         switch (pkt->opcode) {
4376         case PACKET3_NOP:
4377         case PACKET3_SET_BASE:
4378         case PACKET3_SET_CE_DE_COUNTERS:
4379         case PACKET3_LOAD_CONST_RAM:
4380         case PACKET3_WRITE_CONST_RAM:
4381         case PACKET3_WRITE_CONST_RAM_OFFSET:
4382         case PACKET3_DUMP_CONST_RAM:
4383         case PACKET3_INCREMENT_CE_COUNTER:
4384         case PACKET3_WAIT_ON_DE_COUNTER:
4385         case PACKET3_CE_WRITE:
4386                 break;
4387         default:
4388                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4389                 return -EINVAL;
4390         }
4391         return 0;
4392 }
4393
4394 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4395 {
4396         u32 start_reg, reg, i;
4397         u32 command = ib[idx + 4];
4398         u32 info = ib[idx + 1];
4399         u32 idx_value = ib[idx];
4400         if (command & PACKET3_CP_DMA_CMD_SAS) {
4401                 /* src address space is register */
4402                 if (((info & 0x60000000) >> 29) == 0) {
4403                         start_reg = idx_value << 2;
4404                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4405                                 reg = start_reg;
4406                                 if (!si_vm_reg_valid(reg)) {
4407                                         DRM_ERROR("CP DMA Bad SRC register\n");
4408                                         return -EINVAL;
4409                                 }
4410                         } else {
4411                                 for (i = 0; i < (command & 0x1fffff); i++) {
4412                                         reg = start_reg + (4 * i);
4413                                         if (!si_vm_reg_valid(reg)) {
4414                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4415                                                 return -EINVAL;
4416                                         }
4417                                 }
4418                         }
4419                 }
4420         }
4421         if (command & PACKET3_CP_DMA_CMD_DAS) {
4422                 /* dst address space is register */
4423                 if (((info & 0x00300000) >> 20) == 0) {
4424                         start_reg = ib[idx + 2];
4425                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4426                                 reg = start_reg;
4427                                 if (!si_vm_reg_valid(reg)) {
4428                                         DRM_ERROR("CP DMA Bad DST register\n");
4429                                         return -EINVAL;
4430                                 }
4431                         } else {
4432                                 for (i = 0; i < (command & 0x1fffff); i++) {
4433                                         reg = start_reg + (4 * i);
4434                                         if (!si_vm_reg_valid(reg)) {
4435                                                 DRM_ERROR("CP DMA Bad DST register\n");
4436                                                 return -EINVAL;
4437                                         }
4438                                 }
4439                         }
4440                 }
4441         }
4442         return 0;
4443 }
4444
4445 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4446                                    u32 *ib, struct radeon_cs_packet *pkt)
4447 {
4448         int r;
4449         u32 idx = pkt->idx + 1;
4450         u32 idx_value = ib[idx];
4451         u32 start_reg, end_reg, reg, i;
4452
4453         switch (pkt->opcode) {
4454         case PACKET3_NOP:
4455         case PACKET3_SET_BASE:
4456         case PACKET3_CLEAR_STATE:
4457         case PACKET3_INDEX_BUFFER_SIZE:
4458         case PACKET3_DISPATCH_DIRECT:
4459         case PACKET3_DISPATCH_INDIRECT:
4460         case PACKET3_ALLOC_GDS:
4461         case PACKET3_WRITE_GDS_RAM:
4462         case PACKET3_ATOMIC_GDS:
4463         case PACKET3_ATOMIC:
4464         case PACKET3_OCCLUSION_QUERY:
4465         case PACKET3_SET_PREDICATION:
4466         case PACKET3_COND_EXEC:
4467         case PACKET3_PRED_EXEC:
4468         case PACKET3_DRAW_INDIRECT:
4469         case PACKET3_DRAW_INDEX_INDIRECT:
4470         case PACKET3_INDEX_BASE:
4471         case PACKET3_DRAW_INDEX_2:
4472         case PACKET3_CONTEXT_CONTROL:
4473         case PACKET3_INDEX_TYPE:
4474         case PACKET3_DRAW_INDIRECT_MULTI:
4475         case PACKET3_DRAW_INDEX_AUTO:
4476         case PACKET3_DRAW_INDEX_IMMD:
4477         case PACKET3_NUM_INSTANCES:
4478         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4479         case PACKET3_STRMOUT_BUFFER_UPDATE:
4480         case PACKET3_DRAW_INDEX_OFFSET_2:
4481         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4482         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4483         case PACKET3_MPEG_INDEX:
4484         case PACKET3_WAIT_REG_MEM:
4485         case PACKET3_MEM_WRITE:
4486         case PACKET3_PFP_SYNC_ME:
4487         case PACKET3_SURFACE_SYNC:
4488         case PACKET3_EVENT_WRITE:
4489         case PACKET3_EVENT_WRITE_EOP:
4490         case PACKET3_EVENT_WRITE_EOS:
4491         case PACKET3_SET_CONTEXT_REG:
4492         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4493         case PACKET3_SET_SH_REG:
4494         case PACKET3_SET_SH_REG_OFFSET:
4495         case PACKET3_INCREMENT_DE_COUNTER:
4496         case PACKET3_WAIT_ON_CE_COUNTER:
4497         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4498         case PACKET3_ME_WRITE:
4499                 break;
4500         case PACKET3_COPY_DATA:
4501                 if ((idx_value & 0xf00) == 0) {
4502                         reg = ib[idx + 3] * 4;
4503                         if (!si_vm_reg_valid(reg))
4504                                 return -EINVAL;
4505                 }
4506                 break;
4507         case PACKET3_WRITE_DATA:
4508                 if ((idx_value & 0xf00) == 0) {
4509                         start_reg = ib[idx + 1] * 4;
4510                         if (idx_value & 0x10000) {
4511                                 if (!si_vm_reg_valid(start_reg))
4512                                         return -EINVAL;
4513                         } else {
4514                                 for (i = 0; i < (pkt->count - 2); i++) {
4515                                         reg = start_reg + (4 * i);
4516                                         if (!si_vm_reg_valid(reg))
4517                                                 return -EINVAL;
4518                                 }
4519                         }
4520                 }
4521                 break;
4522         case PACKET3_COND_WRITE:
4523                 if (idx_value & 0x100) {
4524                         reg = ib[idx + 5] * 4;
4525                         if (!si_vm_reg_valid(reg))
4526                                 return -EINVAL;
4527                 }
4528                 break;
4529         case PACKET3_COPY_DW:
4530                 if (idx_value & 0x2) {
4531                         reg = ib[idx + 3] * 4;
4532                         if (!si_vm_reg_valid(reg))
4533                                 return -EINVAL;
4534                 }
4535                 break;
4536         case PACKET3_SET_CONFIG_REG:
4537                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4538                 end_reg = 4 * pkt->count + start_reg - 4;
4539                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4540                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4541                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4542                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4543                         return -EINVAL;
4544                 }
4545                 for (i = 0; i < pkt->count; i++) {
4546                         reg = start_reg + (4 * i);
4547                         if (!si_vm_reg_valid(reg))
4548                                 return -EINVAL;
4549                 }
4550                 break;
4551         case PACKET3_CP_DMA:
4552                 r = si_vm_packet3_cp_dma_check(ib, idx);
4553                 if (r)
4554                         return r;
4555                 break;
4556         default:
4557                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4558                 return -EINVAL;
4559         }
4560         return 0;
4561 }
4562
4563 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4564                                        u32 *ib, struct radeon_cs_packet *pkt)
4565 {
4566         int r;
4567         u32 idx = pkt->idx + 1;
4568         u32 idx_value = ib[idx];
4569         u32 start_reg, reg, i;
4570
4571         switch (pkt->opcode) {
4572         case PACKET3_NOP:
4573         case PACKET3_SET_BASE:
4574         case PACKET3_CLEAR_STATE:
4575         case PACKET3_DISPATCH_DIRECT:
4576         case PACKET3_DISPATCH_INDIRECT:
4577         case PACKET3_ALLOC_GDS:
4578         case PACKET3_WRITE_GDS_RAM:
4579         case PACKET3_ATOMIC_GDS:
4580         case PACKET3_ATOMIC:
4581         case PACKET3_OCCLUSION_QUERY:
4582         case PACKET3_SET_PREDICATION:
4583         case PACKET3_COND_EXEC:
4584         case PACKET3_PRED_EXEC:
4585         case PACKET3_CONTEXT_CONTROL:
4586         case PACKET3_STRMOUT_BUFFER_UPDATE:
4587         case PACKET3_WAIT_REG_MEM:
4588         case PACKET3_MEM_WRITE:
4589         case PACKET3_PFP_SYNC_ME:
4590         case PACKET3_SURFACE_SYNC:
4591         case PACKET3_EVENT_WRITE:
4592         case PACKET3_EVENT_WRITE_EOP:
4593         case PACKET3_EVENT_WRITE_EOS:
4594         case PACKET3_SET_CONTEXT_REG:
4595         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4596         case PACKET3_SET_SH_REG:
4597         case PACKET3_SET_SH_REG_OFFSET:
4598         case PACKET3_INCREMENT_DE_COUNTER:
4599         case PACKET3_WAIT_ON_CE_COUNTER:
4600         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4601         case PACKET3_ME_WRITE:
4602                 break;
4603         case PACKET3_COPY_DATA:
4604                 if ((idx_value & 0xf00) == 0) {
4605                         reg = ib[idx + 3] * 4;
4606                         if (!si_vm_reg_valid(reg))
4607                                 return -EINVAL;
4608                 }
4609                 break;
4610         case PACKET3_WRITE_DATA:
4611                 if ((idx_value & 0xf00) == 0) {
4612                         start_reg = ib[idx + 1] * 4;
4613                         if (idx_value & 0x10000) {
4614                                 if (!si_vm_reg_valid(start_reg))
4615                                         return -EINVAL;
4616                         } else {
4617                                 for (i = 0; i < (pkt->count - 2); i++) {
4618                                         reg = start_reg + (4 * i);
4619                                         if (!si_vm_reg_valid(reg))
4620                                                 return -EINVAL;
4621                                 }
4622                         }
4623                 }
4624                 break;
4625         case PACKET3_COND_WRITE:
4626                 if (idx_value & 0x100) {
4627                         reg = ib[idx + 5] * 4;
4628                         if (!si_vm_reg_valid(reg))
4629                                 return -EINVAL;
4630                 }
4631                 break;
4632         case PACKET3_COPY_DW:
4633                 if (idx_value & 0x2) {
4634                         reg = ib[idx + 3] * 4;
4635                         if (!si_vm_reg_valid(reg))
4636                                 return -EINVAL;
4637                 }
4638                 break;
4639         case PACKET3_CP_DMA:
4640                 r = si_vm_packet3_cp_dma_check(ib, idx);
4641                 if (r)
4642                         return r;
4643                 break;
4644         default:
4645                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4646                 return -EINVAL;
4647         }
4648         return 0;
4649 }
4650
4651 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4652 {
4653         int ret = 0;
4654         u32 idx = 0, i;
4655         struct radeon_cs_packet pkt;
4656
4657         do {
4658                 pkt.idx = idx;
4659                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4660                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4661                 pkt.one_reg_wr = 0;
4662                 switch (pkt.type) {
4663                 case RADEON_PACKET_TYPE0:
4664                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4665                         ret = -EINVAL;
4666                         break;
4667                 case RADEON_PACKET_TYPE2:
4668                         idx += 1;
4669                         break;
4670                 case RADEON_PACKET_TYPE3:
4671                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4672                         if (ib->is_const_ib)
4673                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4674                         else {
4675                                 switch (ib->ring) {
4676                                 case RADEON_RING_TYPE_GFX_INDEX:
4677                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4678                                         break;
4679                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4680                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4681                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4682                                         break;
4683                                 default:
4684                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4685                                         ret = -EINVAL;
4686                                         break;
4687                                 }
4688                         }
4689                         idx += pkt.count + 2;
4690                         break;
4691                 default:
4692                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4693                         ret = -EINVAL;
4694                         break;
4695                 }
4696                 if (ret) {
4697                         for (i = 0; i < ib->length_dw; i++) {
4698                                 if (i == idx)
4699                                         printk("\t0x%08x <---\n", ib->ptr[i]);
4700                                 else
4701                                         printk("\t0x%08x\n", ib->ptr[i]);
4702                         }
4703                         break;
4704                 }
4705         } while (idx < ib->length_dw);
4706
4707         return ret;
4708 }
4709
4710 /*
4711  * vm
4712  */
4713 int si_vm_init(struct radeon_device *rdev)
4714 {
4715         /* number of VMs */
4716         rdev->vm_manager.nvm = 16;
4717         /* base offset of vram pages */
4718         rdev->vm_manager.vram_base_offset = 0;
4719
4720         return 0;
4721 }
4722
4723 void si_vm_fini(struct radeon_device *rdev)
4724 {
4725 }
4726
4727 /**
4728  * si_vm_decode_fault - print human readable fault info
4729  *
4730  * @rdev: radeon_device pointer
4731  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4732  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4733  *
4734  * Print human readable fault information (SI).
4735  */
4736 static void si_vm_decode_fault(struct radeon_device *rdev,
4737                                u32 status, u32 addr)
4738 {
4739         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4740         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4741         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4742         char *block;
4743
4744         if (rdev->family == CHIP_TAHITI) {
4745                 switch (mc_id) {
4746                 case 160:
4747                 case 144:
4748                 case 96:
4749                 case 80:
4750                 case 224:
4751                 case 208:
4752                 case 32:
4753                 case 16:
4754                         block = "CB";
4755                         break;
4756                 case 161:
4757                 case 145:
4758                 case 97:
4759                 case 81:
4760                 case 225:
4761                 case 209:
4762                 case 33:
4763                 case 17:
4764                         block = "CB_FMASK";
4765                         break;
4766                 case 162:
4767                 case 146:
4768                 case 98:
4769                 case 82:
4770                 case 226:
4771                 case 210:
4772                 case 34:
4773                 case 18:
4774                         block = "CB_CMASK";
4775                         break;
4776                 case 163:
4777                 case 147:
4778                 case 99:
4779                 case 83:
4780                 case 227:
4781                 case 211:
4782                 case 35:
4783                 case 19:
4784                         block = "CB_IMMED";
4785                         break;
4786                 case 164:
4787                 case 148:
4788                 case 100:
4789                 case 84:
4790                 case 228:
4791                 case 212:
4792                 case 36:
4793                 case 20:
4794                         block = "DB";
4795                         break;
4796                 case 165:
4797                 case 149:
4798                 case 101:
4799                 case 85:
4800                 case 229:
4801                 case 213:
4802                 case 37:
4803                 case 21:
4804                         block = "DB_HTILE";
4805                         break;
4806                 case 167:
4807                 case 151:
4808                 case 103:
4809                 case 87:
4810                 case 231:
4811                 case 215:
4812                 case 39:
4813                 case 23:
4814                         block = "DB_STEN";
4815                         break;
4816                 case 72:
4817                 case 68:
4818                 case 64:
4819                 case 8:
4820                 case 4:
4821                 case 0:
4822                 case 136:
4823                 case 132:
4824                 case 128:
4825                 case 200:
4826                 case 196:
4827                 case 192:
4828                         block = "TC";
4829                         break;
4830                 case 112:
4831                 case 48:
4832                         block = "CP";
4833                         break;
4834                 case 49:
4835                 case 177:
4836                 case 50:
4837                 case 178:
4838                         block = "SH";
4839                         break;
4840                 case 53:
4841                 case 190:
4842                         block = "VGT";
4843                         break;
4844                 case 117:
4845                         block = "IH";
4846                         break;
4847                 case 51:
4848                 case 115:
4849                         block = "RLC";
4850                         break;
4851                 case 119:
4852                 case 183:
4853                         block = "DMA0";
4854                         break;
4855                 case 61:
4856                         block = "DMA1";
4857                         break;
4858                 case 248:
4859                 case 120:
4860                         block = "HDP";
4861                         break;
4862                 default:
4863                         block = "unknown";
4864                         break;
4865                 }
4866         } else {
4867                 switch (mc_id) {
4868                 case 32:
4869                 case 16:
4870                 case 96:
4871                 case 80:
4872                 case 160:
4873                 case 144:
4874                 case 224:
4875                 case 208:
4876                         block = "CB";
4877                         break;
4878                 case 33:
4879                 case 17:
4880                 case 97:
4881                 case 81:
4882                 case 161:
4883                 case 145:
4884                 case 225:
4885                 case 209:
4886                         block = "CB_FMASK";
4887                         break;
4888                 case 34:
4889                 case 18:
4890                 case 98:
4891                 case 82:
4892                 case 162:
4893                 case 146:
4894                 case 226:
4895                 case 210:
4896                         block = "CB_CMASK";
4897                         break;
4898                 case 35:
4899                 case 19:
4900                 case 99:
4901                 case 83:
4902                 case 163:
4903                 case 147:
4904                 case 227:
4905                 case 211:
4906                         block = "CB_IMMED";
4907                         break;
4908                 case 36:
4909                 case 20:
4910                 case 100:
4911                 case 84:
4912                 case 164:
4913                 case 148:
4914                 case 228:
4915                 case 212:
4916                         block = "DB";
4917                         break;
4918                 case 37:
4919                 case 21:
4920                 case 101:
4921                 case 85:
4922                 case 165:
4923                 case 149:
4924                 case 229:
4925                 case 213:
4926                         block = "DB_HTILE";
4927                         break;
4928                 case 39:
4929                 case 23:
4930                 case 103:
4931                 case 87:
4932                 case 167:
4933                 case 151:
4934                 case 231:
4935                 case 215:
4936                         block = "DB_STEN";
4937                         break;
4938                 case 72:
4939                 case 68:
4940                 case 8:
4941                 case 4:
4942                 case 136:
4943                 case 132:
4944                 case 200:
4945                 case 196:
4946                         block = "TC";
4947                         break;
4948                 case 112:
4949                 case 48:
4950                         block = "CP";
4951                         break;
4952                 case 49:
4953                 case 177:
4954                 case 50:
4955                 case 178:
4956                         block = "SH";
4957                         break;
4958                 case 53:
4959                         block = "VGT";
4960                         break;
4961                 case 117:
4962                         block = "IH";
4963                         break;
4964                 case 51:
4965                 case 115:
4966                         block = "RLC";
4967                         break;
4968                 case 119:
4969                 case 183:
4970                         block = "DMA0";
4971                         break;
4972                 case 61:
4973                         block = "DMA1";
4974                         break;
4975                 case 248:
4976                 case 120:
4977                         block = "HDP";
4978                         break;
4979                 default:
4980                         block = "unknown";
4981                         break;
4982                 }
4983         }
4984
4985         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4986                protections, vmid, addr,
4987                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4988                block, mc_id);
4989 }
4990
4991 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
4992                  unsigned vm_id, uint64_t pd_addr)
4993 {
4994         /* write new base address */
4995         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4996         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4997                                  WRITE_DATA_DST_SEL(0)));
4998
4999         if (vm_id < 8) {
5000                 radeon_ring_write(ring,
5001                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5002         } else {
5003                 radeon_ring_write(ring,
5004                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5005         }
5006         radeon_ring_write(ring, 0);
5007         radeon_ring_write(ring, pd_addr >> 12);
5008
5009         /* flush hdp cache */
5010         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5011         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5012                                  WRITE_DATA_DST_SEL(0)));
5013         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5014         radeon_ring_write(ring, 0);
5015         radeon_ring_write(ring, 0x1);
5016
5017         /* bits 0-15 are the VM contexts0-15 */
5018         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5019         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5020                                  WRITE_DATA_DST_SEL(0)));
5021         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5022         radeon_ring_write(ring, 0);
5023         radeon_ring_write(ring, 1 << vm_id);
5024
5025         /* wait for the invalidate to complete */
5026         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5027         radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5028                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5029         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5030         radeon_ring_write(ring, 0);
5031         radeon_ring_write(ring, 0); /* ref */
5032         radeon_ring_write(ring, 0); /* mask */
5033         radeon_ring_write(ring, 0x20); /* poll interval */
5034
5035         /* sync PFP to ME, otherwise we might get invalid PFP reads */
5036         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5037         radeon_ring_write(ring, 0x0);
5038 }
5039
5040 /*
5041  *  Power and clock gating
5042  */
5043 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5044 {
5045         int i;
5046
5047         for (i = 0; i < rdev->usec_timeout; i++) {
5048                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5049                         break;
5050                 udelay(1);
5051         }
5052
5053         for (i = 0; i < rdev->usec_timeout; i++) {
5054                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5055                         break;
5056                 udelay(1);
5057         }
5058 }
5059
5060 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5061                                          bool enable)
5062 {
5063         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5064         u32 mask;
5065         int i;
5066
5067         if (enable)
5068                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5069         else
5070                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5071         WREG32(CP_INT_CNTL_RING0, tmp);
5072
5073         if (!enable) {
5074                 /* read a gfx register */
5075                 tmp = RREG32(DB_DEPTH_INFO);
5076
5077                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5078                 for (i = 0; i < rdev->usec_timeout; i++) {
5079                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5080                                 break;
5081                         udelay(1);
5082                 }
5083         }
5084 }
5085
5086 static void si_set_uvd_dcm(struct radeon_device *rdev,
5087                            bool sw_mode)
5088 {
5089         u32 tmp, tmp2;
5090
5091         tmp = RREG32(UVD_CGC_CTRL);
5092         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5093         tmp |= DCM | CG_DT(1) | CLK_OD(4);
5094
5095         if (sw_mode) {
5096                 tmp &= ~0x7ffff800;
5097                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5098         } else {
5099                 tmp |= 0x7ffff800;
5100                 tmp2 = 0;
5101         }
5102
5103         WREG32(UVD_CGC_CTRL, tmp);
5104         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5105 }
5106
5107 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5108 {
5109         bool hw_mode = true;
5110
5111         if (hw_mode) {
5112                 si_set_uvd_dcm(rdev, false);
5113         } else {
5114                 u32 tmp = RREG32(UVD_CGC_CTRL);
5115                 tmp &= ~DCM;
5116                 WREG32(UVD_CGC_CTRL, tmp);
5117         }
5118 }
5119
5120 static u32 si_halt_rlc(struct radeon_device *rdev)
5121 {
5122         u32 data, orig;
5123
5124         orig = data = RREG32(RLC_CNTL);
5125
5126         if (data & RLC_ENABLE) {
5127                 data &= ~RLC_ENABLE;
5128                 WREG32(RLC_CNTL, data);
5129
5130                 si_wait_for_rlc_serdes(rdev);
5131         }
5132
5133         return orig;
5134 }
5135
5136 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5137 {
5138         u32 tmp;
5139
5140         tmp = RREG32(RLC_CNTL);
5141         if (tmp != rlc)
5142                 WREG32(RLC_CNTL, rlc);
5143 }
5144
5145 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5146 {
5147         u32 data, orig;
5148
5149         orig = data = RREG32(DMA_PG);
5150         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5151                 data |= PG_CNTL_ENABLE;
5152         else
5153                 data &= ~PG_CNTL_ENABLE;
5154         if (orig != data)
5155                 WREG32(DMA_PG, data);
5156 }
5157
5158 static void si_init_dma_pg(struct radeon_device *rdev)
5159 {
5160         u32 tmp;
5161
5162         WREG32(DMA_PGFSM_WRITE,  0x00002000);
5163         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5164
5165         for (tmp = 0; tmp < 5; tmp++)
5166                 WREG32(DMA_PGFSM_WRITE, 0);
5167 }
5168
5169 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5170                                bool enable)
5171 {
5172         u32 tmp;
5173
5174         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5175                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5176                 WREG32(RLC_TTOP_D, tmp);
5177
5178                 tmp = RREG32(RLC_PG_CNTL);
5179                 tmp |= GFX_PG_ENABLE;
5180                 WREG32(RLC_PG_CNTL, tmp);
5181
5182                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5183                 tmp |= AUTO_PG_EN;
5184                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5185         } else {
5186                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5187                 tmp &= ~AUTO_PG_EN;
5188                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5189
5190                 tmp = RREG32(DB_RENDER_CONTROL);
5191         }
5192 }
5193
5194 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5195 {
5196         u32 tmp;
5197
5198         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5199
5200         tmp = RREG32(RLC_PG_CNTL);
5201         tmp |= GFX_PG_SRC;
5202         WREG32(RLC_PG_CNTL, tmp);
5203
5204         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5205
5206         tmp = RREG32(RLC_AUTO_PG_CTRL);
5207
5208         tmp &= ~GRBM_REG_SGIT_MASK;
5209         tmp |= GRBM_REG_SGIT(0x700);
5210         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5211         WREG32(RLC_AUTO_PG_CTRL, tmp);
5212 }
5213
5214 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5215 {
5216         u32 mask = 0, tmp, tmp1;
5217         int i;
5218
5219         si_select_se_sh(rdev, se, sh);
5220         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5221         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5222         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5223
5224         tmp &= 0xffff0000;
5225
5226         tmp |= tmp1;
5227         tmp >>= 16;
5228
5229         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5230                 mask <<= 1;
5231                 mask |= 1;
5232         }
5233
5234         return (~tmp) & mask;
5235 }
5236
5237 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5238 {
5239         u32 i, j, k, active_cu_number = 0;
5240         u32 mask, counter, cu_bitmap;
5241         u32 tmp = 0;
5242
5243         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5244                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5245                         mask = 1;
5246                         cu_bitmap = 0;
5247                         counter  = 0;
5248                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5249                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5250                                         if (counter < 2)
5251                                                 cu_bitmap |= mask;
5252                                         counter++;
5253                                 }
5254                                 mask <<= 1;
5255                         }
5256
5257                         active_cu_number += counter;
5258                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5259                 }
5260         }
5261
5262         WREG32(RLC_PG_AO_CU_MASK, tmp);
5263
5264         tmp = RREG32(RLC_MAX_PG_CU);
5265         tmp &= ~MAX_PU_CU_MASK;
5266         tmp |= MAX_PU_CU(active_cu_number);
5267         WREG32(RLC_MAX_PG_CU, tmp);
5268 }
5269
5270 static void si_enable_cgcg(struct radeon_device *rdev,
5271                            bool enable)
5272 {
5273         u32 data, orig, tmp;
5274
5275         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5276
5277         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5278                 si_enable_gui_idle_interrupt(rdev, true);
5279
5280                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5281
5282                 tmp = si_halt_rlc(rdev);
5283
5284                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5285                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5286                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5287
5288                 si_wait_for_rlc_serdes(rdev);
5289
5290                 si_update_rlc(rdev, tmp);
5291
5292                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5293
5294                 data |= CGCG_EN | CGLS_EN;
5295         } else {
5296                 si_enable_gui_idle_interrupt(rdev, false);
5297
5298                 RREG32(CB_CGTT_SCLK_CTRL);
5299                 RREG32(CB_CGTT_SCLK_CTRL);
5300                 RREG32(CB_CGTT_SCLK_CTRL);
5301                 RREG32(CB_CGTT_SCLK_CTRL);
5302
5303                 data &= ~(CGCG_EN | CGLS_EN);
5304         }
5305
5306         if (orig != data)
5307                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5308 }
5309
5310 static void si_enable_mgcg(struct radeon_device *rdev,
5311                            bool enable)
5312 {
5313         u32 data, orig, tmp = 0;
5314
5315         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5316                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5317                 data = 0x96940200;
5318                 if (orig != data)
5319                         WREG32(CGTS_SM_CTRL_REG, data);
5320
5321                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5322                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5323                         data |= CP_MEM_LS_EN;
5324                         if (orig != data)
5325                                 WREG32(CP_MEM_SLP_CNTL, data);
5326                 }
5327
5328                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5329                 data &= 0xffffffc0;
5330                 if (orig != data)
5331                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5332
5333                 tmp = si_halt_rlc(rdev);
5334
5335                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5336                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5337                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5338
5339                 si_update_rlc(rdev, tmp);
5340         } else {
5341                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5342                 data |= 0x00000003;
5343                 if (orig != data)
5344                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5345
5346                 data = RREG32(CP_MEM_SLP_CNTL);
5347                 if (data & CP_MEM_LS_EN) {
5348                         data &= ~CP_MEM_LS_EN;
5349                         WREG32(CP_MEM_SLP_CNTL, data);
5350                 }
5351                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5352                 data |= LS_OVERRIDE | OVERRIDE;
5353                 if (orig != data)
5354                         WREG32(CGTS_SM_CTRL_REG, data);
5355
5356                 tmp = si_halt_rlc(rdev);
5357
5358                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5359                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5360                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5361
5362                 si_update_rlc(rdev, tmp);
5363         }
5364 }
5365
5366 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5367                                bool enable)
5368 {
5369         u32 orig, data, tmp;
5370
5371         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5372                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5373                 tmp |= 0x3fff;
5374                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5375
5376                 orig = data = RREG32(UVD_CGC_CTRL);
5377                 data |= DCM;
5378                 if (orig != data)
5379                         WREG32(UVD_CGC_CTRL, data);
5380
5381                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5382                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5383         } else {
5384                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5385                 tmp &= ~0x3fff;
5386                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5387
5388                 orig = data = RREG32(UVD_CGC_CTRL);
5389                 data &= ~DCM;
5390                 if (orig != data)
5391                         WREG32(UVD_CGC_CTRL, data);
5392
5393                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5394                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5395         }
5396 }
5397
5398 static const u32 mc_cg_registers[] =
5399 {
5400         MC_HUB_MISC_HUB_CG,
5401         MC_HUB_MISC_SIP_CG,
5402         MC_HUB_MISC_VM_CG,
5403         MC_XPB_CLK_GAT,
5404         ATC_MISC_CG,
5405         MC_CITF_MISC_WR_CG,
5406         MC_CITF_MISC_RD_CG,
5407         MC_CITF_MISC_VM_CG,
5408         VM_L2_CG,
5409 };
5410
5411 static void si_enable_mc_ls(struct radeon_device *rdev,
5412                             bool enable)
5413 {
5414         int i;
5415         u32 orig, data;
5416
5417         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5418                 orig = data = RREG32(mc_cg_registers[i]);
5419                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5420                         data |= MC_LS_ENABLE;
5421                 else
5422                         data &= ~MC_LS_ENABLE;
5423                 if (data != orig)
5424                         WREG32(mc_cg_registers[i], data);
5425         }
5426 }
5427
5428 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5429                                bool enable)
5430 {
5431         int i;
5432         u32 orig, data;
5433
5434         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5435                 orig = data = RREG32(mc_cg_registers[i]);
5436                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5437                         data |= MC_CG_ENABLE;
5438                 else
5439                         data &= ~MC_CG_ENABLE;
5440                 if (data != orig)
5441                         WREG32(mc_cg_registers[i], data);
5442         }
5443 }
5444
5445 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5446                                bool enable)
5447 {
5448         u32 orig, data, offset;
5449         int i;
5450
5451         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5452                 for (i = 0; i < 2; i++) {
5453                         if (i == 0)
5454                                 offset = DMA0_REGISTER_OFFSET;
5455                         else
5456                                 offset = DMA1_REGISTER_OFFSET;
5457                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5458                         data &= ~MEM_POWER_OVERRIDE;
5459                         if (data != orig)
5460                                 WREG32(DMA_POWER_CNTL + offset, data);
5461                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5462                 }
5463         } else {
5464                 for (i = 0; i < 2; i++) {
5465                         if (i == 0)
5466                                 offset = DMA0_REGISTER_OFFSET;
5467                         else
5468                                 offset = DMA1_REGISTER_OFFSET;
5469                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5470                         data |= MEM_POWER_OVERRIDE;
5471                         if (data != orig)
5472                                 WREG32(DMA_POWER_CNTL + offset, data);
5473
5474                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5475                         data = 0xff000000;
5476                         if (data != orig)
5477                                 WREG32(DMA_CLK_CTRL + offset, data);
5478                 }
5479         }
5480 }
5481
5482 static void si_enable_bif_mgls(struct radeon_device *rdev,
5483                                bool enable)
5484 {
5485         u32 orig, data;
5486
5487         orig = data = RREG32_PCIE(PCIE_CNTL2);
5488
5489         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5490                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5491                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5492         else
5493                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5494                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5495
5496         if (orig != data)
5497                 WREG32_PCIE(PCIE_CNTL2, data);
5498 }
5499
5500 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5501                                bool enable)
5502 {
5503         u32 orig, data;
5504
5505         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5506
5507         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5508                 data &= ~CLOCK_GATING_DIS;
5509         else
5510                 data |= CLOCK_GATING_DIS;
5511
5512         if (orig != data)
5513                 WREG32(HDP_HOST_PATH_CNTL, data);
5514 }
5515
5516 static void si_enable_hdp_ls(struct radeon_device *rdev,
5517                              bool enable)
5518 {
5519         u32 orig, data;
5520
5521         orig = data = RREG32(HDP_MEM_POWER_LS);
5522
5523         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5524                 data |= HDP_LS_ENABLE;
5525         else
5526                 data &= ~HDP_LS_ENABLE;
5527
5528         if (orig != data)
5529                 WREG32(HDP_MEM_POWER_LS, data);
5530 }
5531
5532 static void si_update_cg(struct radeon_device *rdev,
5533                          u32 block, bool enable)
5534 {
5535         if (block & RADEON_CG_BLOCK_GFX) {
5536                 si_enable_gui_idle_interrupt(rdev, false);
5537                 /* order matters! */
5538                 if (enable) {
5539                         si_enable_mgcg(rdev, true);
5540                         si_enable_cgcg(rdev, true);
5541                 } else {
5542                         si_enable_cgcg(rdev, false);
5543                         si_enable_mgcg(rdev, false);
5544                 }
5545                 si_enable_gui_idle_interrupt(rdev, true);
5546         }
5547
5548         if (block & RADEON_CG_BLOCK_MC) {
5549                 si_enable_mc_mgcg(rdev, enable);
5550                 si_enable_mc_ls(rdev, enable);
5551         }
5552
5553         if (block & RADEON_CG_BLOCK_SDMA) {
5554                 si_enable_dma_mgcg(rdev, enable);
5555         }
5556
5557         if (block & RADEON_CG_BLOCK_BIF) {
5558                 si_enable_bif_mgls(rdev, enable);
5559         }
5560
5561         if (block & RADEON_CG_BLOCK_UVD) {
5562                 if (rdev->has_uvd) {
5563                         si_enable_uvd_mgcg(rdev, enable);
5564                 }
5565         }
5566
5567         if (block & RADEON_CG_BLOCK_HDP) {
5568                 si_enable_hdp_mgcg(rdev, enable);
5569                 si_enable_hdp_ls(rdev, enable);
5570         }
5571 }
5572
5573 static void si_init_cg(struct radeon_device *rdev)
5574 {
5575         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5576                             RADEON_CG_BLOCK_MC |
5577                             RADEON_CG_BLOCK_SDMA |
5578                             RADEON_CG_BLOCK_BIF |
5579                             RADEON_CG_BLOCK_HDP), true);
5580         if (rdev->has_uvd) {
5581                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5582                 si_init_uvd_internal_cg(rdev);
5583         }
5584 }
5585
5586 static void si_fini_cg(struct radeon_device *rdev)
5587 {
5588         if (rdev->has_uvd) {
5589                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5590         }
5591         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5592                             RADEON_CG_BLOCK_MC |
5593                             RADEON_CG_BLOCK_SDMA |
5594                             RADEON_CG_BLOCK_BIF |
5595                             RADEON_CG_BLOCK_HDP), false);
5596 }
5597
5598 u32 si_get_csb_size(struct radeon_device *rdev)
5599 {
5600         u32 count = 0;
5601         const struct cs_section_def *sect = NULL;
5602         const struct cs_extent_def *ext = NULL;
5603
5604         if (rdev->rlc.cs_data == NULL)
5605                 return 0;
5606
5607         /* begin clear state */
5608         count += 2;
5609         /* context control state */
5610         count += 3;
5611
5612         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5613                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5614                         if (sect->id == SECT_CONTEXT)
5615                                 count += 2 + ext->reg_count;
5616                         else
5617                                 return 0;
5618                 }
5619         }
5620         /* pa_sc_raster_config */
5621         count += 3;
5622         /* end clear state */
5623         count += 2;
5624         /* clear state */
5625         count += 2;
5626
5627         return count;
5628 }
5629
5630 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5631 {
5632         u32 count = 0, i;
5633         const struct cs_section_def *sect = NULL;
5634         const struct cs_extent_def *ext = NULL;
5635
5636         if (rdev->rlc.cs_data == NULL)
5637                 return;
5638         if (buffer == NULL)
5639                 return;
5640
5641         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5642         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5643
5644         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5645         buffer[count++] = cpu_to_le32(0x80000000);
5646         buffer[count++] = cpu_to_le32(0x80000000);
5647
5648         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5649                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5650                         if (sect->id == SECT_CONTEXT) {
5651                                 buffer[count++] =
5652                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5653                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5654                                 for (i = 0; i < ext->reg_count; i++)
5655                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5656                         } else {
5657                                 return;
5658                         }
5659                 }
5660         }
5661
5662         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5663         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5664         switch (rdev->family) {
5665         case CHIP_TAHITI:
5666         case CHIP_PITCAIRN:
5667                 buffer[count++] = cpu_to_le32(0x2a00126a);
5668                 break;
5669         case CHIP_VERDE:
5670                 buffer[count++] = cpu_to_le32(0x0000124a);
5671                 break;
5672         case CHIP_OLAND:
5673                 buffer[count++] = cpu_to_le32(0x00000082);
5674                 break;
5675         case CHIP_HAINAN:
5676                 buffer[count++] = cpu_to_le32(0x00000000);
5677                 break;
5678         default:
5679                 buffer[count++] = cpu_to_le32(0x00000000);
5680                 break;
5681         }
5682
5683         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5684         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5685
5686         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5687         buffer[count++] = cpu_to_le32(0);
5688 }
5689
5690 static void si_init_pg(struct radeon_device *rdev)
5691 {
5692         if (rdev->pg_flags) {
5693                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5694                         si_init_dma_pg(rdev);
5695                 }
5696                 si_init_ao_cu_mask(rdev);
5697                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5698                         si_init_gfx_cgpg(rdev);
5699                 } else {
5700                         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5701                         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5702                 }
5703                 si_enable_dma_pg(rdev, true);
5704                 si_enable_gfx_cgpg(rdev, true);
5705         } else {
5706                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5707                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5708         }
5709 }
5710
5711 static void si_fini_pg(struct radeon_device *rdev)
5712 {
5713         if (rdev->pg_flags) {
5714                 si_enable_dma_pg(rdev, false);
5715                 si_enable_gfx_cgpg(rdev, false);
5716         }
5717 }
5718
5719 /*
5720  * RLC
5721  */
5722 void si_rlc_reset(struct radeon_device *rdev)
5723 {
5724         u32 tmp = RREG32(GRBM_SOFT_RESET);
5725
5726         tmp |= SOFT_RESET_RLC;
5727         WREG32(GRBM_SOFT_RESET, tmp);
5728         udelay(50);
5729         tmp &= ~SOFT_RESET_RLC;
5730         WREG32(GRBM_SOFT_RESET, tmp);
5731         udelay(50);
5732 }
5733
5734 static void si_rlc_stop(struct radeon_device *rdev)
5735 {
5736         WREG32(RLC_CNTL, 0);
5737
5738         si_enable_gui_idle_interrupt(rdev, false);
5739
5740         si_wait_for_rlc_serdes(rdev);
5741 }
5742
5743 static void si_rlc_start(struct radeon_device *rdev)
5744 {
5745         WREG32(RLC_CNTL, RLC_ENABLE);
5746
5747         si_enable_gui_idle_interrupt(rdev, true);
5748
5749         udelay(50);
5750 }
5751
5752 static bool si_lbpw_supported(struct radeon_device *rdev)
5753 {
5754         u32 tmp;
5755
5756         /* Enable LBPW only for DDR3 */
5757         tmp = RREG32(MC_SEQ_MISC0);
5758         if ((tmp & 0xF0000000) == 0xB0000000)
5759                 return true;
5760         return false;
5761 }
5762
5763 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5764 {
5765         u32 tmp;
5766
5767         tmp = RREG32(RLC_LB_CNTL);
5768         if (enable)
5769                 tmp |= LOAD_BALANCE_ENABLE;
5770         else
5771                 tmp &= ~LOAD_BALANCE_ENABLE;
5772         WREG32(RLC_LB_CNTL, tmp);
5773
5774         if (!enable) {
5775                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5776                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5777         }
5778 }
5779
5780 static int si_rlc_resume(struct radeon_device *rdev)
5781 {
5782         u32 i;
5783
5784         if (!rdev->rlc_fw)
5785                 return -EINVAL;
5786
5787         si_rlc_stop(rdev);
5788
5789         si_rlc_reset(rdev);
5790
5791         si_init_pg(rdev);
5792
5793         si_init_cg(rdev);
5794
5795         WREG32(RLC_RL_BASE, 0);
5796         WREG32(RLC_RL_SIZE, 0);
5797         WREG32(RLC_LB_CNTL, 0);
5798         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5799         WREG32(RLC_LB_CNTR_INIT, 0);
5800         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5801
5802         WREG32(RLC_MC_CNTL, 0);
5803         WREG32(RLC_UCODE_CNTL, 0);
5804
5805         if (rdev->new_fw) {
5806                 const struct rlc_firmware_header_v1_0 *hdr =
5807                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5808                 u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5809                 const __le32 *fw_data = (const __le32 *)
5810                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5811
5812                 radeon_ucode_print_rlc_hdr(&hdr->header);
5813
5814                 for (i = 0; i < fw_size; i++) {
5815                         WREG32(RLC_UCODE_ADDR, i);
5816                         WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5817                 }
5818         } else {
5819                 const __be32 *fw_data =
5820                         (const __be32 *)rdev->rlc_fw->data;
5821                 for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5822                         WREG32(RLC_UCODE_ADDR, i);
5823                         WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5824                 }
5825         }
5826         WREG32(RLC_UCODE_ADDR, 0);
5827
5828         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5829
5830         si_rlc_start(rdev);
5831
5832         return 0;
5833 }
5834
5835 static void si_enable_interrupts(struct radeon_device *rdev)
5836 {
5837         u32 ih_cntl = RREG32(IH_CNTL);
5838         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5839
5840         ih_cntl |= ENABLE_INTR;
5841         ih_rb_cntl |= IH_RB_ENABLE;
5842         WREG32(IH_CNTL, ih_cntl);
5843         WREG32(IH_RB_CNTL, ih_rb_cntl);
5844         rdev->ih.enabled = true;
5845 }
5846
5847 static void si_disable_interrupts(struct radeon_device *rdev)
5848 {
5849         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5850         u32 ih_cntl = RREG32(IH_CNTL);
5851
5852         ih_rb_cntl &= ~IH_RB_ENABLE;
5853         ih_cntl &= ~ENABLE_INTR;
5854         WREG32(IH_RB_CNTL, ih_rb_cntl);
5855         WREG32(IH_CNTL, ih_cntl);
5856         /* set rptr, wptr to 0 */
5857         WREG32(IH_RB_RPTR, 0);
5858         WREG32(IH_RB_WPTR, 0);
5859         rdev->ih.enabled = false;
5860         rdev->ih.rptr = 0;
5861 }
5862
5863 static void si_disable_interrupt_state(struct radeon_device *rdev)
5864 {
5865         int i;
5866         u32 tmp;
5867
5868         tmp = RREG32(CP_INT_CNTL_RING0) &
5869                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5870         WREG32(CP_INT_CNTL_RING0, tmp);
5871         WREG32(CP_INT_CNTL_RING1, 0);
5872         WREG32(CP_INT_CNTL_RING2, 0);
5873         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5874         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5875         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5876         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5877         WREG32(GRBM_INT_CNTL, 0);
5878         WREG32(SRBM_INT_CNTL, 0);
5879         for (i = 0; i < rdev->num_crtc; i++)
5880                 WREG32(INT_MASK + crtc_offsets[i], 0);
5881         for (i = 0; i < rdev->num_crtc; i++)
5882                 WREG32(GRPH_INT_CONTROL + crtc_offsets[i], 0);
5883
5884         if (!ASIC_IS_NODCE(rdev)) {
5885                 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5886
5887                 for (i = 0; i < 6; i++)
5888                         WREG32_AND(DC_HPDx_INT_CONTROL(i),
5889                                    DC_HPDx_INT_POLARITY);
5890         }
5891 }
5892
5893 static int si_irq_init(struct radeon_device *rdev)
5894 {
5895         int ret = 0;
5896         int rb_bufsz;
5897         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5898
5899         /* allocate ring */
5900         ret = r600_ih_ring_alloc(rdev);
5901         if (ret)
5902                 return ret;
5903
5904         /* disable irqs */
5905         si_disable_interrupts(rdev);
5906
5907         /* init rlc */
5908         ret = si_rlc_resume(rdev);
5909         if (ret) {
5910                 r600_ih_ring_fini(rdev);
5911                 return ret;
5912         }
5913
5914         /* setup interrupt control */
5915         /* set dummy read address to dummy page address */
5916         WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
5917         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5918         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5919          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5920          */
5921         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5922         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5923         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5924         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5925
5926         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5927         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5928
5929         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5930                       IH_WPTR_OVERFLOW_CLEAR |
5931                       (rb_bufsz << 1));
5932
5933         if (rdev->wb.enabled)
5934                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5935
5936         /* set the writeback address whether it's enabled or not */
5937         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5938         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5939
5940         WREG32(IH_RB_CNTL, ih_rb_cntl);
5941
5942         /* set rptr, wptr to 0 */
5943         WREG32(IH_RB_RPTR, 0);
5944         WREG32(IH_RB_WPTR, 0);
5945
5946         /* Default settings for IH_CNTL (disabled at first) */
5947         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5948         /* RPTR_REARM only works if msi's are enabled */
5949         if (rdev->msi_enabled)
5950                 ih_cntl |= RPTR_REARM;
5951         WREG32(IH_CNTL, ih_cntl);
5952
5953         /* force the active interrupt state to all disabled */
5954         si_disable_interrupt_state(rdev);
5955
5956         pci_set_master(rdev->pdev);
5957
5958         /* enable irqs */
5959         si_enable_interrupts(rdev);
5960
5961         return ret;
5962 }
5963
5964 /* The order we write back each register here is important */
5965 int si_irq_set(struct radeon_device *rdev)
5966 {
5967         int i;
5968         u32 cp_int_cntl;
5969         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5970         u32 grbm_int_cntl = 0;
5971         u32 dma_cntl, dma_cntl1;
5972         u32 thermal_int = 0;
5973
5974         if (!rdev->irq.installed) {
5975                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5976                 return -EINVAL;
5977         }
5978         /* don't enable anything if the ih is disabled */
5979         if (!rdev->ih.enabled) {
5980                 si_disable_interrupts(rdev);
5981                 /* force the active interrupt state to all disabled */
5982                 si_disable_interrupt_state(rdev);
5983                 return 0;
5984         }
5985
5986         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5987                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5988
5989         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5990         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5991
5992         thermal_int = RREG32(CG_THERMAL_INT) &
5993                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5994
5995         /* enable CP interrupts on all rings */
5996         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5997                 DRM_DEBUG("si_irq_set: sw int gfx\n");
5998                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5999         }
6000         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6001                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6002                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6003         }
6004         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6005                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6006                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6007         }
6008         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6009                 DRM_DEBUG("si_irq_set: sw int dma\n");
6010                 dma_cntl |= TRAP_ENABLE;
6011         }
6012
6013         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6014                 DRM_DEBUG("si_irq_set: sw int dma1\n");
6015                 dma_cntl1 |= TRAP_ENABLE;
6016         }
6017
6018         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6019         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6020         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6021
6022         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6023         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6024
6025         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6026
6027         if (rdev->irq.dpm_thermal) {
6028                 DRM_DEBUG("dpm thermal\n");
6029                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6030         }
6031
6032         for (i = 0; i < rdev->num_crtc; i++) {
6033                 radeon_irq_kms_set_irq_n_enabled(
6034                     rdev, INT_MASK + crtc_offsets[i], VBLANK_INT_MASK,
6035                     rdev->irq.crtc_vblank_int[i] ||
6036                     atomic_read(&rdev->irq.pflip[i]), "vblank", i);
6037         }
6038
6039         for (i = 0; i < rdev->num_crtc; i++)
6040                 WREG32(GRPH_INT_CONTROL + crtc_offsets[i], GRPH_PFLIP_INT_MASK);
6041
6042         if (!ASIC_IS_NODCE(rdev)) {
6043                 for (i = 0; i < 6; i++) {
6044                         radeon_irq_kms_set_irq_n_enabled(
6045                             rdev, DC_HPDx_INT_CONTROL(i),
6046                             DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN,
6047                             rdev->irq.hpd[i], "HPD", i);
6048                 }
6049         }
6050
6051         WREG32(CG_THERMAL_INT, thermal_int);
6052
6053         /* posting read */
6054         RREG32(SRBM_STATUS);
6055
6056         return 0;
6057 }
6058
6059 /* The order we write back each register here is important */
6060 static inline void si_irq_ack(struct radeon_device *rdev)
6061 {
6062         int i, j;
6063         u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6064         u32 *grph_int = rdev->irq.stat_regs.evergreen.grph_int;
6065
6066         if (ASIC_IS_NODCE(rdev))
6067                 return;
6068
6069         for (i = 0; i < 6; i++) {
6070                 disp_int[i] = RREG32(si_disp_int_status[i]);
6071                 if (i < rdev->num_crtc)
6072                         grph_int[i] = RREG32(GRPH_INT_STATUS + crtc_offsets[i]);
6073         }
6074
6075         /* We write back each interrupt register in pairs of two */
6076         for (i = 0; i < rdev->num_crtc; i += 2) {
6077                 for (j = i; j < (i + 2); j++) {
6078                         if (grph_int[j] & GRPH_PFLIP_INT_OCCURRED)
6079                                 WREG32(GRPH_INT_STATUS + crtc_offsets[j],
6080                                        GRPH_PFLIP_INT_CLEAR);
6081                 }
6082
6083                 for (j = i; j < (i + 2); j++) {
6084                         if (disp_int[j] & LB_D1_VBLANK_INTERRUPT)
6085                                 WREG32(VBLANK_STATUS + crtc_offsets[j],
6086                                        VBLANK_ACK);
6087                         if (disp_int[j] & LB_D1_VLINE_INTERRUPT)
6088                                 WREG32(VLINE_STATUS + crtc_offsets[j],
6089                                        VLINE_ACK);
6090                 }
6091         }
6092
6093         for (i = 0; i < 6; i++) {
6094                 if (disp_int[i] & DC_HPD1_INTERRUPT)
6095                         WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_INT_ACK);
6096         }
6097
6098         for (i = 0; i < 6; i++) {
6099                 if (disp_int[i] & DC_HPD1_RX_INTERRUPT)
6100                         WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_RX_INT_ACK);
6101         }
6102 }
6103
6104 static void si_irq_disable(struct radeon_device *rdev)
6105 {
6106         si_disable_interrupts(rdev);
6107         /* Wait and acknowledge irq */
6108         mdelay(1);
6109         si_irq_ack(rdev);
6110         si_disable_interrupt_state(rdev);
6111 }
6112
6113 static void si_irq_suspend(struct radeon_device *rdev)
6114 {
6115         si_irq_disable(rdev);
6116         si_rlc_stop(rdev);
6117 }
6118
6119 static void si_irq_fini(struct radeon_device *rdev)
6120 {
6121         si_irq_suspend(rdev);
6122         r600_ih_ring_fini(rdev);
6123 }
6124
6125 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6126 {
6127         u32 wptr, tmp;
6128
6129         if (rdev->wb.enabled)
6130                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6131         else
6132                 wptr = RREG32(IH_RB_WPTR);
6133
6134         if (wptr & RB_OVERFLOW) {
6135                 wptr &= ~RB_OVERFLOW;
6136                 /* When a ring buffer overflow happen start parsing interrupt
6137                  * from the last not overwritten vector (wptr + 16). Hopefully
6138                  * this should allow us to catchup.
6139                  */
6140                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6141                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6142                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6143                 tmp = RREG32(IH_RB_CNTL);
6144                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6145                 WREG32(IH_RB_CNTL, tmp);
6146         }
6147         return (wptr & rdev->ih.ptr_mask);
6148 }
6149
6150 /*        SI IV Ring
6151  * Each IV ring entry is 128 bits:
6152  * [7:0]    - interrupt source id
6153  * [31:8]   - reserved
6154  * [59:32]  - interrupt source data
6155  * [63:60]  - reserved
6156  * [71:64]  - RINGID
6157  * [79:72]  - VMID
6158  * [127:80] - reserved
6159  */
6160 int si_irq_process(struct radeon_device *rdev)
6161 {
6162         u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6163         u32 crtc_idx, hpd_idx;
6164         u32 mask;
6165         u32 wptr;
6166         u32 rptr;
6167         u32 src_id, src_data, ring_id;
6168         u32 ring_index;
6169         bool queue_hotplug = false;
6170         bool queue_dp = false;
6171         bool queue_thermal = false;
6172         u32 status, addr;
6173         const char *event_name;
6174
6175         if (!rdev->ih.enabled || rdev->shutdown)
6176                 return IRQ_NONE;
6177
6178         wptr = si_get_ih_wptr(rdev);
6179
6180 restart_ih:
6181         /* is somebody else already processing irqs? */
6182         if (atomic_xchg(&rdev->ih.lock, 1))
6183                 return IRQ_NONE;
6184
6185         rptr = rdev->ih.rptr;
6186         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6187
6188         /* Order reading of wptr vs. reading of IH ring data */
6189         rmb();
6190
6191         /* display interrupts */
6192         si_irq_ack(rdev);
6193
6194         while (rptr != wptr) {
6195                 /* wptr/rptr are in bytes! */
6196                 ring_index = rptr / 4;
6197                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6198                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6199                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6200
6201                 switch (src_id) {
6202                 case 1: /* D1 vblank/vline */
6203                 case 2: /* D2 vblank/vline */
6204                 case 3: /* D3 vblank/vline */
6205                 case 4: /* D4 vblank/vline */
6206                 case 5: /* D5 vblank/vline */
6207                 case 6: /* D6 vblank/vline */
6208                         crtc_idx = src_id - 1;
6209
6210                         if (src_data == 0) { /* vblank */
6211                                 mask = LB_D1_VBLANK_INTERRUPT;
6212                                 event_name = "vblank";
6213
6214                                 if (rdev->irq.crtc_vblank_int[crtc_idx]) {
6215                                         drm_handle_vblank(rdev->ddev, crtc_idx);
6216                                         rdev->pm.vblank_sync = true;
6217                                         wake_up(&rdev->irq.vblank_queue);
6218                                 }
6219                                 if (atomic_read(&rdev->irq.pflip[crtc_idx])) {
6220                                         radeon_crtc_handle_vblank(rdev,
6221                                                                   crtc_idx);
6222                                 }
6223
6224                         } else if (src_data == 1) { /* vline */
6225                                 mask = LB_D1_VLINE_INTERRUPT;
6226                                 event_name = "vline";
6227                         } else {
6228                                 DRM_DEBUG("Unhandled interrupt: %d %d\n",
6229                                           src_id, src_data);
6230                                 break;
6231                         }
6232
6233                         if (!(disp_int[crtc_idx] & mask)) {
6234                                 DRM_DEBUG("IH: D%d %s - IH event w/o asserted irq bit?\n",
6235                                           crtc_idx + 1, event_name);
6236                         }
6237
6238                         disp_int[crtc_idx] &= ~mask;
6239                         DRM_DEBUG("IH: D%d %s\n", crtc_idx + 1, event_name);
6240
6241                         break;
6242                 case 8: /* D1 page flip */
6243                 case 10: /* D2 page flip */
6244                 case 12: /* D3 page flip */
6245                 case 14: /* D4 page flip */
6246                 case 16: /* D5 page flip */
6247                 case 18: /* D6 page flip */
6248                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6249                         if (radeon_use_pflipirq > 0)
6250                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6251                         break;
6252                 case 42: /* HPD hotplug */
6253                         if (src_data <= 5) {
6254                                 hpd_idx = src_data;
6255                                 mask = DC_HPD1_INTERRUPT;
6256                                 queue_hotplug = true;
6257                                 event_name = "HPD";
6258
6259                         } else if (src_data <= 11) {
6260                                 hpd_idx = src_data - 6;
6261                                 mask = DC_HPD1_RX_INTERRUPT;
6262                                 queue_dp = true;
6263                                 event_name = "HPD_RX";
6264
6265                         } else {
6266                                 DRM_DEBUG("Unhandled interrupt: %d %d\n",
6267                                           src_id, src_data);
6268                                 break;
6269                         }
6270
6271                         if (!(disp_int[hpd_idx] & mask))
6272                                 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6273
6274                         disp_int[hpd_idx] &= ~mask;
6275                         DRM_DEBUG("IH: %s%d\n", event_name, hpd_idx + 1);
6276                         break;
6277                 case 96:
6278                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6279                         WREG32(SRBM_INT_ACK, 0x1);
6280                         break;
6281                 case 124: /* UVD */
6282                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6283                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6284                         break;
6285                 case 146:
6286                 case 147:
6287                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6288                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6289                         /* reset addr and status */
6290                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6291                         if (addr == 0x0 && status == 0x0)
6292                                 break;
6293                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6294                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6295                                 addr);
6296                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6297                                 status);
6298                         si_vm_decode_fault(rdev, status, addr);
6299                         break;
6300                 case 176: /* RINGID0 CP_INT */
6301                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6302                         break;
6303                 case 177: /* RINGID1 CP_INT */
6304                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6305                         break;
6306                 case 178: /* RINGID2 CP_INT */
6307                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6308                         break;
6309                 case 181: /* CP EOP event */
6310                         DRM_DEBUG("IH: CP EOP\n");
6311                         switch (ring_id) {
6312                         case 0:
6313                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6314                                 break;
6315                         case 1:
6316                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6317                                 break;
6318                         case 2:
6319                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6320                                 break;
6321                         }
6322                         break;
6323                 case 224: /* DMA trap event */
6324                         DRM_DEBUG("IH: DMA trap\n");
6325                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6326                         break;
6327                 case 230: /* thermal low to high */
6328                         DRM_DEBUG("IH: thermal low to high\n");
6329                         rdev->pm.dpm.thermal.high_to_low = false;
6330                         queue_thermal = true;
6331                         break;
6332                 case 231: /* thermal high to low */
6333                         DRM_DEBUG("IH: thermal high to low\n");
6334                         rdev->pm.dpm.thermal.high_to_low = true;
6335                         queue_thermal = true;
6336                         break;
6337                 case 233: /* GUI IDLE */
6338                         DRM_DEBUG("IH: GUI idle\n");
6339                         break;
6340                 case 244: /* DMA trap event */
6341                         DRM_DEBUG("IH: DMA1 trap\n");
6342                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6343                         break;
6344                 default:
6345                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6346                         break;
6347                 }
6348
6349                 /* wptr/rptr are in bytes! */
6350                 rptr += 16;
6351                 rptr &= rdev->ih.ptr_mask;
6352                 WREG32(IH_RB_RPTR, rptr);
6353         }
6354         if (queue_dp)
6355                 schedule_work(&rdev->dp_work);
6356         if (queue_hotplug)
6357                 schedule_delayed_work(&rdev->hotplug_work, 0);
6358         if (queue_thermal && rdev->pm.dpm_enabled)
6359                 schedule_work(&rdev->pm.dpm.thermal.work);
6360         rdev->ih.rptr = rptr;
6361         atomic_set(&rdev->ih.lock, 0);
6362
6363         /* make sure wptr hasn't changed while processing */
6364         wptr = si_get_ih_wptr(rdev);
6365         if (wptr != rptr)
6366                 goto restart_ih;
6367
6368         return IRQ_HANDLED;
6369 }
6370
6371 /*
6372  * startup/shutdown callbacks
6373  */
6374 static void si_uvd_init(struct radeon_device *rdev)
6375 {
6376         int r;
6377
6378         if (!rdev->has_uvd)
6379                 return;
6380
6381         r = radeon_uvd_init(rdev);
6382         if (r) {
6383                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6384                 /*
6385                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6386                  * to early fails uvd_v2_2_resume() and thus nothing happens
6387                  * there. So it is pointless to try to go through that code
6388                  * hence why we disable uvd here.
6389                  */
6390                 rdev->has_uvd = false;
6391                 return;
6392         }
6393         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6394         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6395 }
6396
6397 static void si_uvd_start(struct radeon_device *rdev)
6398 {
6399         int r;
6400
6401         if (!rdev->has_uvd)
6402                 return;
6403
6404         r = uvd_v2_2_resume(rdev);
6405         if (r) {
6406                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6407                 goto error;
6408         }
6409         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6410         if (r) {
6411                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6412                 goto error;
6413         }
6414         return;
6415
6416 error:
6417         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6418 }
6419
6420 static void si_uvd_resume(struct radeon_device *rdev)
6421 {
6422         struct radeon_ring *ring;
6423         int r;
6424
6425         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6426                 return;
6427
6428         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6429         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6430         if (r) {
6431                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6432                 return;
6433         }
6434         r = uvd_v1_0_init(rdev);
6435         if (r) {
6436                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6437                 return;
6438         }
6439 }
6440
6441 static void si_vce_init(struct radeon_device *rdev)
6442 {
6443         int r;
6444
6445         if (!rdev->has_vce)
6446                 return;
6447
6448         r = radeon_vce_init(rdev);
6449         if (r) {
6450                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6451                 /*
6452                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
6453                  * to early fails si_vce_start() and thus nothing happens
6454                  * there. So it is pointless to try to go through that code
6455                  * hence why we disable vce here.
6456                  */
6457                 rdev->has_vce = false;
6458                 return;
6459         }
6460         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6461         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6462         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6463         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6464 }
6465
6466 static void si_vce_start(struct radeon_device *rdev)
6467 {
6468         int r;
6469
6470         if (!rdev->has_vce)
6471                 return;
6472
6473         r = radeon_vce_resume(rdev);
6474         if (r) {
6475                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6476                 goto error;
6477         }
6478         r = vce_v1_0_resume(rdev);
6479         if (r) {
6480                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6481                 goto error;
6482         }
6483         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6484         if (r) {
6485                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6486                 goto error;
6487         }
6488         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6489         if (r) {
6490                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6491                 goto error;
6492         }
6493         return;
6494
6495 error:
6496         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6497         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6498 }
6499
6500 static void si_vce_resume(struct radeon_device *rdev)
6501 {
6502         struct radeon_ring *ring;
6503         int r;
6504
6505         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6506                 return;
6507
6508         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6509         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6510         if (r) {
6511                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6512                 return;
6513         }
6514         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6515         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6516         if (r) {
6517                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6518                 return;
6519         }
6520         r = vce_v1_0_init(rdev);
6521         if (r) {
6522                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6523                 return;
6524         }
6525 }
6526
6527 static int si_startup(struct radeon_device *rdev)
6528 {
6529         struct radeon_ring *ring;
6530         int r;
6531
6532         /* enable pcie gen2/3 link */
6533         si_pcie_gen3_enable(rdev);
6534         /* enable aspm */
6535         si_program_aspm(rdev);
6536
6537         /* scratch needs to be initialized before MC */
6538         r = r600_vram_scratch_init(rdev);
6539         if (r)
6540                 return r;
6541
6542         si_mc_program(rdev);
6543
6544         if (!rdev->pm.dpm_enabled) {
6545                 r = si_mc_load_microcode(rdev);
6546                 if (r) {
6547                         DRM_ERROR("Failed to load MC firmware!\n");
6548                         return r;
6549                 }
6550         }
6551
6552         r = si_pcie_gart_enable(rdev);
6553         if (r)
6554                 return r;
6555         si_gpu_init(rdev);
6556
6557         /* allocate rlc buffers */
6558         if (rdev->family == CHIP_VERDE) {
6559                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6560                 rdev->rlc.reg_list_size =
6561                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6562         }
6563         rdev->rlc.cs_data = si_cs_data;
6564         r = sumo_rlc_init(rdev);
6565         if (r) {
6566                 DRM_ERROR("Failed to init rlc BOs!\n");
6567                 return r;
6568         }
6569
6570         /* allocate wb buffer */
6571         r = radeon_wb_init(rdev);
6572         if (r)
6573                 return r;
6574
6575         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6576         if (r) {
6577                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6578                 return r;
6579         }
6580
6581         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6582         if (r) {
6583                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6584                 return r;
6585         }
6586
6587         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6588         if (r) {
6589                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6590                 return r;
6591         }
6592
6593         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6594         if (r) {
6595                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6596                 return r;
6597         }
6598
6599         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6600         if (r) {
6601                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6602                 return r;
6603         }
6604
6605         si_uvd_start(rdev);
6606         si_vce_start(rdev);
6607
6608         /* Enable IRQ */
6609         if (!rdev->irq.installed) {
6610                 r = radeon_irq_kms_init(rdev);
6611                 if (r)
6612                         return r;
6613         }
6614
6615         r = si_irq_init(rdev);
6616         if (r) {
6617                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6618                 radeon_irq_kms_fini(rdev);
6619                 return r;
6620         }
6621         si_irq_set(rdev);
6622
6623         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6624         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6625                              RADEON_CP_PACKET2);
6626         if (r)
6627                 return r;
6628
6629         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6630         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6631                              RADEON_CP_PACKET2);
6632         if (r)
6633                 return r;
6634
6635         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6636         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6637                              RADEON_CP_PACKET2);
6638         if (r)
6639                 return r;
6640
6641         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6642         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6643                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6644         if (r)
6645                 return r;
6646
6647         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6648         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6649                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6650         if (r)
6651                 return r;
6652
6653         r = si_cp_load_microcode(rdev);
6654         if (r)
6655                 return r;
6656         r = si_cp_resume(rdev);
6657         if (r)
6658                 return r;
6659
6660         r = cayman_dma_resume(rdev);
6661         if (r)
6662                 return r;
6663
6664         si_uvd_resume(rdev);
6665         si_vce_resume(rdev);
6666
6667         r = radeon_ib_pool_init(rdev);
6668         if (r) {
6669                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6670                 return r;
6671         }
6672
6673         r = radeon_vm_manager_init(rdev);
6674         if (r) {
6675                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6676                 return r;
6677         }
6678
6679         r = radeon_audio_init(rdev);
6680         if (r)
6681                 return r;
6682
6683         return 0;
6684 }
6685
6686 int si_resume(struct radeon_device *rdev)
6687 {
6688         int r;
6689
6690         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6691          * posting will perform necessary task to bring back GPU into good
6692          * shape.
6693          */
6694         /* post card */
6695         atom_asic_init(rdev->mode_info.atom_context);
6696
6697         /* init golden registers */
6698         si_init_golden_registers(rdev);
6699
6700         if (rdev->pm.pm_method == PM_METHOD_DPM)
6701                 radeon_pm_resume(rdev);
6702
6703         rdev->accel_working = true;
6704         r = si_startup(rdev);
6705         if (r) {
6706                 DRM_ERROR("si startup failed on resume\n");
6707                 rdev->accel_working = false;
6708                 return r;
6709         }
6710
6711         return r;
6712
6713 }
6714
6715 int si_suspend(struct radeon_device *rdev)
6716 {
6717         radeon_pm_suspend(rdev);
6718         radeon_audio_fini(rdev);
6719         radeon_vm_manager_fini(rdev);
6720         si_cp_enable(rdev, false);
6721         cayman_dma_stop(rdev);
6722         if (rdev->has_uvd) {
6723                 radeon_uvd_suspend(rdev);
6724                 uvd_v1_0_fini(rdev);
6725         }
6726         if (rdev->has_vce)
6727                 radeon_vce_suspend(rdev);
6728         si_fini_pg(rdev);
6729         si_fini_cg(rdev);
6730         si_irq_suspend(rdev);
6731         radeon_wb_disable(rdev);
6732         si_pcie_gart_disable(rdev);
6733         return 0;
6734 }
6735
6736 /* Plan is to move initialization in that function and use
6737  * helper function so that radeon_device_init pretty much
6738  * do nothing more than calling asic specific function. This
6739  * should also allow to remove a bunch of callback function
6740  * like vram_info.
6741  */
6742 int si_init(struct radeon_device *rdev)
6743 {
6744         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6745         int r;
6746
6747         /* Read BIOS */
6748         if (!radeon_get_bios(rdev)) {
6749                 if (ASIC_IS_AVIVO(rdev))
6750                         return -EINVAL;
6751         }
6752         /* Must be an ATOMBIOS */
6753         if (!rdev->is_atom_bios) {
6754                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6755                 return -EINVAL;
6756         }
6757         r = radeon_atombios_init(rdev);
6758         if (r)
6759                 return r;
6760
6761         /* Post card if necessary */
6762         if (!radeon_card_posted(rdev)) {
6763                 if (!rdev->bios) {
6764                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6765                         return -EINVAL;
6766                 }
6767                 DRM_INFO("GPU not posted. posting now...\n");
6768                 atom_asic_init(rdev->mode_info.atom_context);
6769         }
6770         /* init golden registers */
6771         si_init_golden_registers(rdev);
6772         /* Initialize scratch registers */
6773         si_scratch_init(rdev);
6774         /* Initialize surface registers */
6775         radeon_surface_init(rdev);
6776         /* Initialize clocks */
6777         radeon_get_clock_info(rdev->ddev);
6778
6779         /* Fence driver */
6780         radeon_fence_driver_init(rdev);
6781
6782         /* initialize memory controller */
6783         r = si_mc_init(rdev);
6784         if (r)
6785                 return r;
6786         /* Memory manager */
6787         r = radeon_bo_init(rdev);
6788         if (r)
6789                 return r;
6790
6791         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6792             !rdev->rlc_fw || !rdev->mc_fw) {
6793                 r = si_init_microcode(rdev);
6794                 if (r) {
6795                         DRM_ERROR("Failed to load firmware!\n");
6796                         /*(DEBLOBBED)*/
6797                 }
6798         }
6799
6800         /* Initialize power management */
6801         radeon_pm_init(rdev);
6802
6803         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6804         ring->ring_obj = NULL;
6805         r600_ring_init(rdev, ring, 1024 * 1024);
6806
6807         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6808         ring->ring_obj = NULL;
6809         r600_ring_init(rdev, ring, 1024 * 1024);
6810
6811         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6812         ring->ring_obj = NULL;
6813         r600_ring_init(rdev, ring, 1024 * 1024);
6814
6815         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6816         ring->ring_obj = NULL;
6817         r600_ring_init(rdev, ring, 64 * 1024);
6818
6819         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6820         ring->ring_obj = NULL;
6821         r600_ring_init(rdev, ring, 64 * 1024);
6822
6823         si_uvd_init(rdev);
6824         si_vce_init(rdev);
6825
6826         rdev->ih.ring_obj = NULL;
6827         r600_ih_ring_init(rdev, 64 * 1024);
6828
6829         r = r600_pcie_gart_init(rdev);
6830         if (r)
6831                 return r;
6832
6833         rdev->accel_working = true;
6834         r = si_startup(rdev);
6835         if (r) {
6836                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6837                 si_cp_fini(rdev);
6838                 cayman_dma_fini(rdev);
6839                 si_irq_fini(rdev);
6840                 sumo_rlc_fini(rdev);
6841                 radeon_wb_fini(rdev);
6842                 radeon_ib_pool_fini(rdev);
6843                 radeon_vm_manager_fini(rdev);
6844                 radeon_irq_kms_fini(rdev);
6845                 si_pcie_gart_fini(rdev);
6846                 rdev->accel_working = false;
6847         }
6848
6849         /* Don't start up if the MC ucode is missing.
6850          * The default clocks and voltages before the MC ucode
6851          * is loaded are not suffient for advanced operations.
6852          */
6853         if (!rdev->mc_fw) {
6854                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6855                 /*(DEBLOBBED)*/
6856         }
6857
6858         return 0;
6859 }
6860
6861 void si_fini(struct radeon_device *rdev)
6862 {
6863         radeon_pm_fini(rdev);
6864         si_cp_fini(rdev);
6865         cayman_dma_fini(rdev);
6866         si_fini_pg(rdev);
6867         si_fini_cg(rdev);
6868         si_irq_fini(rdev);
6869         sumo_rlc_fini(rdev);
6870         radeon_wb_fini(rdev);
6871         radeon_vm_manager_fini(rdev);
6872         radeon_ib_pool_fini(rdev);
6873         radeon_irq_kms_fini(rdev);
6874         if (rdev->has_uvd) {
6875                 uvd_v1_0_fini(rdev);
6876                 radeon_uvd_fini(rdev);
6877         }
6878         if (rdev->has_vce)
6879                 radeon_vce_fini(rdev);
6880         si_pcie_gart_fini(rdev);
6881         r600_vram_scratch_fini(rdev);
6882         radeon_gem_fini(rdev);
6883         radeon_fence_driver_fini(rdev);
6884         radeon_bo_fini(rdev);
6885         radeon_atombios_fini(rdev);
6886         kfree(rdev->bios);
6887         rdev->bios = NULL;
6888 }
6889
6890 /**
6891  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6892  *
6893  * @rdev: radeon_device pointer
6894  *
6895  * Fetches a GPU clock counter snapshot (SI).
6896  * Returns the 64 bit clock counter snapshot.
6897  */
6898 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6899 {
6900         uint64_t clock;
6901
6902         mutex_lock(&rdev->gpu_clock_mutex);
6903         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6904         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6905                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6906         mutex_unlock(&rdev->gpu_clock_mutex);
6907         return clock;
6908 }
6909
6910 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6911 {
6912         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6913         int r;
6914
6915         /* bypass vclk and dclk with bclk */
6916         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6917                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6918                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6919
6920         /* put PLL in bypass mode */
6921         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6922
6923         if (!vclk || !dclk) {
6924                 /* keep the Bypass mode */
6925                 return 0;
6926         }
6927
6928         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6929                                           16384, 0x03FFFFFF, 0, 128, 5,
6930                                           &fb_div, &vclk_div, &dclk_div);
6931         if (r)
6932                 return r;
6933
6934         /* set RESET_ANTI_MUX to 0 */
6935         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6936
6937         /* set VCO_MODE to 1 */
6938         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6939
6940         /* disable sleep mode */
6941         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6942
6943         /* deassert UPLL_RESET */
6944         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6945
6946         mdelay(1);
6947
6948         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6949         if (r)
6950                 return r;
6951
6952         /* assert UPLL_RESET again */
6953         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6954
6955         /* disable spread spectrum. */
6956         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6957
6958         /* set feedback divider */
6959         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6960
6961         /* set ref divider to 0 */
6962         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6963
6964         if (fb_div < 307200)
6965                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6966         else
6967                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6968
6969         /* set PDIV_A and PDIV_B */
6970         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6971                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6972                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6973
6974         /* give the PLL some time to settle */
6975         mdelay(15);
6976
6977         /* deassert PLL_RESET */
6978         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6979
6980         mdelay(15);
6981
6982         /* switch from bypass mode to normal mode */
6983         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6984
6985         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6986         if (r)
6987                 return r;
6988
6989         /* switch VCLK and DCLK selection */
6990         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6991                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6992                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6993
6994         mdelay(100);
6995
6996         return 0;
6997 }
6998
6999 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7000 {
7001         struct pci_dev *root = rdev->pdev->bus->self;
7002         enum pci_bus_speed speed_cap;
7003         u32 speed_cntl, current_data_rate;
7004         int i;
7005         u16 tmp16;
7006
7007         if (pci_is_root_bus(rdev->pdev->bus))
7008                 return;
7009
7010         if (radeon_pcie_gen2 == 0)
7011                 return;
7012
7013         if (rdev->flags & RADEON_IS_IGP)
7014                 return;
7015
7016         if (!(rdev->flags & RADEON_IS_PCIE))
7017                 return;
7018
7019         speed_cap = pcie_get_speed_cap(root);
7020         if (speed_cap == PCI_SPEED_UNKNOWN)
7021                 return;
7022
7023         if ((speed_cap != PCIE_SPEED_8_0GT) &&
7024             (speed_cap != PCIE_SPEED_5_0GT))
7025                 return;
7026
7027         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7028         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7029                 LC_CURRENT_DATA_RATE_SHIFT;
7030         if (speed_cap == PCIE_SPEED_8_0GT) {
7031                 if (current_data_rate == 2) {
7032                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7033                         return;
7034                 }
7035                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7036         } else if (speed_cap == PCIE_SPEED_5_0GT) {
7037                 if (current_data_rate == 1) {
7038                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7039                         return;
7040                 }
7041                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7042         }
7043
7044         if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
7045                 return;
7046
7047         if (speed_cap == PCIE_SPEED_8_0GT) {
7048                 /* re-try equalization if gen3 is not already enabled */
7049                 if (current_data_rate != 2) {
7050                         u16 bridge_cfg, gpu_cfg;
7051                         u16 bridge_cfg2, gpu_cfg2;
7052                         u32 max_lw, current_lw, tmp;
7053
7054                         pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7055                                                   &bridge_cfg);
7056                         pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
7057                                                   &gpu_cfg);
7058
7059                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7060                         pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
7061
7062                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7063                         pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
7064                                                    tmp16);
7065
7066                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7067                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7068                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7069
7070                         if (current_lw < max_lw) {
7071                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7072                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7073                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7074                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7075                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7076                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7077                                 }
7078                         }
7079
7080                         for (i = 0; i < 10; i++) {
7081                                 /* check status */
7082                                 pcie_capability_read_word(rdev->pdev,
7083                                                           PCI_EXP_DEVSTA,
7084                                                           &tmp16);
7085                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7086                                         break;
7087
7088                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7089                                                           &bridge_cfg);
7090                                 pcie_capability_read_word(rdev->pdev,
7091                                                           PCI_EXP_LNKCTL,
7092                                                           &gpu_cfg);
7093
7094                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7095                                                           &bridge_cfg2);
7096                                 pcie_capability_read_word(rdev->pdev,
7097                                                           PCI_EXP_LNKCTL2,
7098                                                           &gpu_cfg2);
7099
7100                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7101                                 tmp |= LC_SET_QUIESCE;
7102                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7103
7104                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7105                                 tmp |= LC_REDO_EQ;
7106                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7107
7108                                 msleep(100);
7109
7110                                 /* linkctl */
7111                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7112                                                           &tmp16);
7113                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7114                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7115                                 pcie_capability_write_word(root,
7116                                                            PCI_EXP_LNKCTL,
7117                                                            tmp16);
7118
7119                                 pcie_capability_read_word(rdev->pdev,
7120                                                           PCI_EXP_LNKCTL,
7121                                                           &tmp16);
7122                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7123                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7124                                 pcie_capability_write_word(rdev->pdev,
7125                                                            PCI_EXP_LNKCTL,
7126                                                            tmp16);
7127
7128                                 /* linkctl2 */
7129                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7130                                                           &tmp16);
7131                                 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7132                                            PCI_EXP_LNKCTL2_TX_MARGIN);
7133                                 tmp16 |= (bridge_cfg2 &
7134                                           (PCI_EXP_LNKCTL2_ENTER_COMP |
7135                                            PCI_EXP_LNKCTL2_TX_MARGIN));
7136                                 pcie_capability_write_word(root,
7137                                                            PCI_EXP_LNKCTL2,
7138                                                            tmp16);
7139
7140                                 pcie_capability_read_word(rdev->pdev,
7141                                                           PCI_EXP_LNKCTL2,
7142                                                           &tmp16);
7143                                 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7144                                            PCI_EXP_LNKCTL2_TX_MARGIN);
7145                                 tmp16 |= (gpu_cfg2 &
7146                                           (PCI_EXP_LNKCTL2_ENTER_COMP |
7147                                            PCI_EXP_LNKCTL2_TX_MARGIN));
7148                                 pcie_capability_write_word(rdev->pdev,
7149                                                            PCI_EXP_LNKCTL2,
7150                                                            tmp16);
7151
7152                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7153                                 tmp &= ~LC_SET_QUIESCE;
7154                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7155                         }
7156                 }
7157         }
7158
7159         /* set the link speed */
7160         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7161         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7162         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7163
7164         pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
7165         tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
7166         if (speed_cap == PCIE_SPEED_8_0GT)
7167                 tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
7168         else if (speed_cap == PCIE_SPEED_5_0GT)
7169                 tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
7170         else
7171                 tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
7172         pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
7173
7174         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7175         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7176         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7177
7178         for (i = 0; i < rdev->usec_timeout; i++) {
7179                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7180                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7181                         break;
7182                 udelay(1);
7183         }
7184 }
7185
7186 static void si_program_aspm(struct radeon_device *rdev)
7187 {
7188         u32 data, orig;
7189         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7190         bool disable_clkreq = false;
7191
7192         if (radeon_aspm == 0)
7193                 return;
7194
7195         if (!(rdev->flags & RADEON_IS_PCIE))
7196                 return;
7197
7198         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7199         data &= ~LC_XMIT_N_FTS_MASK;
7200         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7201         if (orig != data)
7202                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7203
7204         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7205         data |= LC_GO_TO_RECOVERY;
7206         if (orig != data)
7207                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7208
7209         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7210         data |= P_IGNORE_EDB_ERR;
7211         if (orig != data)
7212                 WREG32_PCIE(PCIE_P_CNTL, data);
7213
7214         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7215         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7216         data |= LC_PMI_TO_L1_DIS;
7217         if (!disable_l0s)
7218                 data |= LC_L0S_INACTIVITY(7);
7219
7220         if (!disable_l1) {
7221                 data |= LC_L1_INACTIVITY(7);
7222                 data &= ~LC_PMI_TO_L1_DIS;
7223                 if (orig != data)
7224                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7225
7226                 if (!disable_plloff_in_l1) {
7227                         bool clk_req_support;
7228
7229                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7230                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7231                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7232                         if (orig != data)
7233                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7234
7235                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7236                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7237                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7238                         if (orig != data)
7239                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7240
7241                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7242                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7243                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7244                         if (orig != data)
7245                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7246
7247                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7248                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7249                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7250                         if (orig != data)
7251                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7252
7253                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7254                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7255                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7256                                 if (orig != data)
7257                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7258
7259                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7260                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7261                                 if (orig != data)
7262                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7263
7264                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7265                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7266                                 if (orig != data)
7267                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7268
7269                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7270                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7271                                 if (orig != data)
7272                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7273
7274                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7275                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7276                                 if (orig != data)
7277                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7278
7279                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7280                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7281                                 if (orig != data)
7282                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7283
7284                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7285                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7286                                 if (orig != data)
7287                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7288
7289                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7290                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7291                                 if (orig != data)
7292                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7293                         }
7294                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7295                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7296                         data |= LC_DYN_LANES_PWR_STATE(3);
7297                         if (orig != data)
7298                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7299
7300                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7301                         data &= ~LS2_EXIT_TIME_MASK;
7302                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7303                                 data |= LS2_EXIT_TIME(5);
7304                         if (orig != data)
7305                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7306
7307                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7308                         data &= ~LS2_EXIT_TIME_MASK;
7309                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7310                                 data |= LS2_EXIT_TIME(5);
7311                         if (orig != data)
7312                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7313
7314                         if (!disable_clkreq &&
7315                             !pci_is_root_bus(rdev->pdev->bus)) {
7316                                 struct pci_dev *root = rdev->pdev->bus->self;
7317                                 u32 lnkcap;
7318
7319                                 clk_req_support = false;
7320                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7321                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7322                                         clk_req_support = true;
7323                         } else {
7324                                 clk_req_support = false;
7325                         }
7326
7327                         if (clk_req_support) {
7328                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7329                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7330                                 if (orig != data)
7331                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7332
7333                                 orig = data = RREG32(THM_CLK_CNTL);
7334                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7335                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7336                                 if (orig != data)
7337                                         WREG32(THM_CLK_CNTL, data);
7338
7339                                 orig = data = RREG32(MISC_CLK_CNTL);
7340                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7341                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7342                                 if (orig != data)
7343                                         WREG32(MISC_CLK_CNTL, data);
7344
7345                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7346                                 data &= ~BCLK_AS_XCLK;
7347                                 if (orig != data)
7348                                         WREG32(CG_CLKPIN_CNTL, data);
7349
7350                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7351                                 data &= ~FORCE_BIF_REFCLK_EN;
7352                                 if (orig != data)
7353                                         WREG32(CG_CLKPIN_CNTL_2, data);
7354
7355                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7356                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7357                                 data |= MPLL_CLKOUT_SEL(4);
7358                                 if (orig != data)
7359                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7360
7361                                 orig = data = RREG32(SPLL_CNTL_MODE);
7362                                 data &= ~SPLL_REFCLK_SEL_MASK;
7363                                 if (orig != data)
7364                                         WREG32(SPLL_CNTL_MODE, data);
7365                         }
7366                 }
7367         } else {
7368                 if (orig != data)
7369                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7370         }
7371
7372         orig = data = RREG32_PCIE(PCIE_CNTL2);
7373         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7374         if (orig != data)
7375                 WREG32_PCIE(PCIE_CNTL2, data);
7376
7377         if (!disable_l0s) {
7378                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7379                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7380                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7381                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7382                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7383                                 data &= ~LC_L0S_INACTIVITY_MASK;
7384                                 if (orig != data)
7385                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7386                         }
7387                 }
7388         }
7389 }
7390
7391 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7392 {
7393         unsigned i;
7394
7395         /* make sure VCEPLL_CTLREQ is deasserted */
7396         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7397
7398         mdelay(10);
7399
7400         /* assert UPLL_CTLREQ */
7401         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7402
7403         /* wait for CTLACK and CTLACK2 to get asserted */
7404         for (i = 0; i < 100; ++i) {
7405                 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7406                 if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7407                         break;
7408                 mdelay(10);
7409         }
7410
7411         /* deassert UPLL_CTLREQ */
7412         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7413
7414         if (i == 100) {
7415                 DRM_ERROR("Timeout setting UVD clocks!\n");
7416                 return -ETIMEDOUT;
7417         }
7418
7419         return 0;
7420 }
7421
7422 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7423 {
7424         unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7425         int r;
7426
7427         /* bypass evclk and ecclk with bclk */
7428         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7429                      EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7430                      ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7431
7432         /* put PLL in bypass mode */
7433         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7434                      ~VCEPLL_BYPASS_EN_MASK);
7435
7436         if (!evclk || !ecclk) {
7437                 /* keep the Bypass mode, put PLL to sleep */
7438                 WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7439                              ~VCEPLL_SLEEP_MASK);
7440                 return 0;
7441         }
7442
7443         r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7444                                           16384, 0x03FFFFFF, 0, 128, 5,
7445                                           &fb_div, &evclk_div, &ecclk_div);
7446         if (r)
7447                 return r;
7448
7449         /* set RESET_ANTI_MUX to 0 */
7450         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7451
7452         /* set VCO_MODE to 1 */
7453         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7454                      ~VCEPLL_VCO_MODE_MASK);
7455
7456         /* toggle VCEPLL_SLEEP to 1 then back to 0 */
7457         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7458                      ~VCEPLL_SLEEP_MASK);
7459         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7460
7461         /* deassert VCEPLL_RESET */
7462         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7463
7464         mdelay(1);
7465
7466         r = si_vce_send_vcepll_ctlreq(rdev);
7467         if (r)
7468                 return r;
7469
7470         /* assert VCEPLL_RESET again */
7471         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7472
7473         /* disable spread spectrum. */
7474         WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7475
7476         /* set feedback divider */
7477         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7478
7479         /* set ref divider to 0 */
7480         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7481
7482         /* set PDIV_A and PDIV_B */
7483         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7484                      VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7485                      ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7486
7487         /* give the PLL some time to settle */
7488         mdelay(15);
7489
7490         /* deassert PLL_RESET */
7491         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7492
7493         mdelay(15);
7494
7495         /* switch from bypass mode to normal mode */
7496         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7497
7498         r = si_vce_send_vcepll_ctlreq(rdev);
7499         if (r)
7500                 return r;
7501
7502         /* switch VCLK and DCLK selection */
7503         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7504                      EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7505                      ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7506
7507         mdelay(100);
7508
7509         return 0;
7510 }