GNU Linux-libre 5.10.153-gnu1
[releases.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include <linux/slab.h>
29
30 #include <drm/drm_vblank.h>
31 #include <drm/radeon_drm.h>
32
33 #include "atom.h"
34 #include "clearstate_si.h"
35 #include "radeon.h"
36 #include "radeon_asic.h"
37 #include "radeon_audio.h"
38 #include "radeon_ucode.h"
39 #include "si_blit_shaders.h"
40 #include "sid.h"
41
42
43 /*(DEBLOBBED)*/
44
45 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
46 static void si_pcie_gen3_enable(struct radeon_device *rdev);
47 static void si_program_aspm(struct radeon_device *rdev);
48 extern void sumo_rlc_fini(struct radeon_device *rdev);
49 extern int sumo_rlc_init(struct radeon_device *rdev);
50 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
51 extern void r600_ih_ring_fini(struct radeon_device *rdev);
52 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
53 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
54 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
55 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
56 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
57 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
58 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
59                                          bool enable);
60 static void si_init_pg(struct radeon_device *rdev);
61 static void si_init_cg(struct radeon_device *rdev);
62 static void si_fini_pg(struct radeon_device *rdev);
63 static void si_fini_cg(struct radeon_device *rdev);
64 static void si_rlc_stop(struct radeon_device *rdev);
65
66 static const u32 crtc_offsets[] =
67 {
68         EVERGREEN_CRTC0_REGISTER_OFFSET,
69         EVERGREEN_CRTC1_REGISTER_OFFSET,
70         EVERGREEN_CRTC2_REGISTER_OFFSET,
71         EVERGREEN_CRTC3_REGISTER_OFFSET,
72         EVERGREEN_CRTC4_REGISTER_OFFSET,
73         EVERGREEN_CRTC5_REGISTER_OFFSET
74 };
75
76 static const u32 si_disp_int_status[] =
77 {
78         DISP_INTERRUPT_STATUS,
79         DISP_INTERRUPT_STATUS_CONTINUE,
80         DISP_INTERRUPT_STATUS_CONTINUE2,
81         DISP_INTERRUPT_STATUS_CONTINUE3,
82         DISP_INTERRUPT_STATUS_CONTINUE4,
83         DISP_INTERRUPT_STATUS_CONTINUE5
84 };
85
86 #define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
87 #define DC_HPDx_INT_CONTROL(x)    (DC_HPD1_INT_CONTROL + (x * 0xc))
88 #define DC_HPDx_INT_STATUS_REG(x) (DC_HPD1_INT_STATUS  + (x * 0xc))
89
90 static const u32 verde_rlc_save_restore_register_list[] =
91 {
92         (0x8000 << 16) | (0x98f4 >> 2),
93         0x00000000,
94         (0x8040 << 16) | (0x98f4 >> 2),
95         0x00000000,
96         (0x8000 << 16) | (0xe80 >> 2),
97         0x00000000,
98         (0x8040 << 16) | (0xe80 >> 2),
99         0x00000000,
100         (0x8000 << 16) | (0x89bc >> 2),
101         0x00000000,
102         (0x8040 << 16) | (0x89bc >> 2),
103         0x00000000,
104         (0x8000 << 16) | (0x8c1c >> 2),
105         0x00000000,
106         (0x8040 << 16) | (0x8c1c >> 2),
107         0x00000000,
108         (0x9c00 << 16) | (0x98f0 >> 2),
109         0x00000000,
110         (0x9c00 << 16) | (0xe7c >> 2),
111         0x00000000,
112         (0x8000 << 16) | (0x9148 >> 2),
113         0x00000000,
114         (0x8040 << 16) | (0x9148 >> 2),
115         0x00000000,
116         (0x9c00 << 16) | (0x9150 >> 2),
117         0x00000000,
118         (0x9c00 << 16) | (0x897c >> 2),
119         0x00000000,
120         (0x9c00 << 16) | (0x8d8c >> 2),
121         0x00000000,
122         (0x9c00 << 16) | (0xac54 >> 2),
123         0X00000000,
124         0x3,
125         (0x9c00 << 16) | (0x98f8 >> 2),
126         0x00000000,
127         (0x9c00 << 16) | (0x9910 >> 2),
128         0x00000000,
129         (0x9c00 << 16) | (0x9914 >> 2),
130         0x00000000,
131         (0x9c00 << 16) | (0x9918 >> 2),
132         0x00000000,
133         (0x9c00 << 16) | (0x991c >> 2),
134         0x00000000,
135         (0x9c00 << 16) | (0x9920 >> 2),
136         0x00000000,
137         (0x9c00 << 16) | (0x9924 >> 2),
138         0x00000000,
139         (0x9c00 << 16) | (0x9928 >> 2),
140         0x00000000,
141         (0x9c00 << 16) | (0x992c >> 2),
142         0x00000000,
143         (0x9c00 << 16) | (0x9930 >> 2),
144         0x00000000,
145         (0x9c00 << 16) | (0x9934 >> 2),
146         0x00000000,
147         (0x9c00 << 16) | (0x9938 >> 2),
148         0x00000000,
149         (0x9c00 << 16) | (0x993c >> 2),
150         0x00000000,
151         (0x9c00 << 16) | (0x9940 >> 2),
152         0x00000000,
153         (0x9c00 << 16) | (0x9944 >> 2),
154         0x00000000,
155         (0x9c00 << 16) | (0x9948 >> 2),
156         0x00000000,
157         (0x9c00 << 16) | (0x994c >> 2),
158         0x00000000,
159         (0x9c00 << 16) | (0x9950 >> 2),
160         0x00000000,
161         (0x9c00 << 16) | (0x9954 >> 2),
162         0x00000000,
163         (0x9c00 << 16) | (0x9958 >> 2),
164         0x00000000,
165         (0x9c00 << 16) | (0x995c >> 2),
166         0x00000000,
167         (0x9c00 << 16) | (0x9960 >> 2),
168         0x00000000,
169         (0x9c00 << 16) | (0x9964 >> 2),
170         0x00000000,
171         (0x9c00 << 16) | (0x9968 >> 2),
172         0x00000000,
173         (0x9c00 << 16) | (0x996c >> 2),
174         0x00000000,
175         (0x9c00 << 16) | (0x9970 >> 2),
176         0x00000000,
177         (0x9c00 << 16) | (0x9974 >> 2),
178         0x00000000,
179         (0x9c00 << 16) | (0x9978 >> 2),
180         0x00000000,
181         (0x9c00 << 16) | (0x997c >> 2),
182         0x00000000,
183         (0x9c00 << 16) | (0x9980 >> 2),
184         0x00000000,
185         (0x9c00 << 16) | (0x9984 >> 2),
186         0x00000000,
187         (0x9c00 << 16) | (0x9988 >> 2),
188         0x00000000,
189         (0x9c00 << 16) | (0x998c >> 2),
190         0x00000000,
191         (0x9c00 << 16) | (0x8c00 >> 2),
192         0x00000000,
193         (0x9c00 << 16) | (0x8c14 >> 2),
194         0x00000000,
195         (0x9c00 << 16) | (0x8c04 >> 2),
196         0x00000000,
197         (0x9c00 << 16) | (0x8c08 >> 2),
198         0x00000000,
199         (0x8000 << 16) | (0x9b7c >> 2),
200         0x00000000,
201         (0x8040 << 16) | (0x9b7c >> 2),
202         0x00000000,
203         (0x8000 << 16) | (0xe84 >> 2),
204         0x00000000,
205         (0x8040 << 16) | (0xe84 >> 2),
206         0x00000000,
207         (0x8000 << 16) | (0x89c0 >> 2),
208         0x00000000,
209         (0x8040 << 16) | (0x89c0 >> 2),
210         0x00000000,
211         (0x8000 << 16) | (0x914c >> 2),
212         0x00000000,
213         (0x8040 << 16) | (0x914c >> 2),
214         0x00000000,
215         (0x8000 << 16) | (0x8c20 >> 2),
216         0x00000000,
217         (0x8040 << 16) | (0x8c20 >> 2),
218         0x00000000,
219         (0x8000 << 16) | (0x9354 >> 2),
220         0x00000000,
221         (0x8040 << 16) | (0x9354 >> 2),
222         0x00000000,
223         (0x9c00 << 16) | (0x9060 >> 2),
224         0x00000000,
225         (0x9c00 << 16) | (0x9364 >> 2),
226         0x00000000,
227         (0x9c00 << 16) | (0x9100 >> 2),
228         0x00000000,
229         (0x9c00 << 16) | (0x913c >> 2),
230         0x00000000,
231         (0x8000 << 16) | (0x90e0 >> 2),
232         0x00000000,
233         (0x8000 << 16) | (0x90e4 >> 2),
234         0x00000000,
235         (0x8000 << 16) | (0x90e8 >> 2),
236         0x00000000,
237         (0x8040 << 16) | (0x90e0 >> 2),
238         0x00000000,
239         (0x8040 << 16) | (0x90e4 >> 2),
240         0x00000000,
241         (0x8040 << 16) | (0x90e8 >> 2),
242         0x00000000,
243         (0x9c00 << 16) | (0x8bcc >> 2),
244         0x00000000,
245         (0x9c00 << 16) | (0x8b24 >> 2),
246         0x00000000,
247         (0x9c00 << 16) | (0x88c4 >> 2),
248         0x00000000,
249         (0x9c00 << 16) | (0x8e50 >> 2),
250         0x00000000,
251         (0x9c00 << 16) | (0x8c0c >> 2),
252         0x00000000,
253         (0x9c00 << 16) | (0x8e58 >> 2),
254         0x00000000,
255         (0x9c00 << 16) | (0x8e5c >> 2),
256         0x00000000,
257         (0x9c00 << 16) | (0x9508 >> 2),
258         0x00000000,
259         (0x9c00 << 16) | (0x950c >> 2),
260         0x00000000,
261         (0x9c00 << 16) | (0x9494 >> 2),
262         0x00000000,
263         (0x9c00 << 16) | (0xac0c >> 2),
264         0x00000000,
265         (0x9c00 << 16) | (0xac10 >> 2),
266         0x00000000,
267         (0x9c00 << 16) | (0xac14 >> 2),
268         0x00000000,
269         (0x9c00 << 16) | (0xae00 >> 2),
270         0x00000000,
271         (0x9c00 << 16) | (0xac08 >> 2),
272         0x00000000,
273         (0x9c00 << 16) | (0x88d4 >> 2),
274         0x00000000,
275         (0x9c00 << 16) | (0x88c8 >> 2),
276         0x00000000,
277         (0x9c00 << 16) | (0x88cc >> 2),
278         0x00000000,
279         (0x9c00 << 16) | (0x89b0 >> 2),
280         0x00000000,
281         (0x9c00 << 16) | (0x8b10 >> 2),
282         0x00000000,
283         (0x9c00 << 16) | (0x8a14 >> 2),
284         0x00000000,
285         (0x9c00 << 16) | (0x9830 >> 2),
286         0x00000000,
287         (0x9c00 << 16) | (0x9834 >> 2),
288         0x00000000,
289         (0x9c00 << 16) | (0x9838 >> 2),
290         0x00000000,
291         (0x9c00 << 16) | (0x9a10 >> 2),
292         0x00000000,
293         (0x8000 << 16) | (0x9870 >> 2),
294         0x00000000,
295         (0x8000 << 16) | (0x9874 >> 2),
296         0x00000000,
297         (0x8001 << 16) | (0x9870 >> 2),
298         0x00000000,
299         (0x8001 << 16) | (0x9874 >> 2),
300         0x00000000,
301         (0x8040 << 16) | (0x9870 >> 2),
302         0x00000000,
303         (0x8040 << 16) | (0x9874 >> 2),
304         0x00000000,
305         (0x8041 << 16) | (0x9870 >> 2),
306         0x00000000,
307         (0x8041 << 16) | (0x9874 >> 2),
308         0x00000000,
309         0x00000000
310 };
311
312 static const u32 tahiti_golden_rlc_registers[] =
313 {
314         0xc424, 0xffffffff, 0x00601005,
315         0xc47c, 0xffffffff, 0x10104040,
316         0xc488, 0xffffffff, 0x0100000a,
317         0xc314, 0xffffffff, 0x00000800,
318         0xc30c, 0xffffffff, 0x800000f4,
319         0xf4a8, 0xffffffff, 0x00000000
320 };
321
322 static const u32 tahiti_golden_registers[] =
323 {
324         0x9a10, 0x00010000, 0x00018208,
325         0x9830, 0xffffffff, 0x00000000,
326         0x9834, 0xf00fffff, 0x00000400,
327         0x9838, 0x0002021c, 0x00020200,
328         0xc78, 0x00000080, 0x00000000,
329         0xd030, 0x000300c0, 0x00800040,
330         0xd830, 0x000300c0, 0x00800040,
331         0x5bb0, 0x000000f0, 0x00000070,
332         0x5bc0, 0x00200000, 0x50100000,
333         0x7030, 0x31000311, 0x00000011,
334         0x277c, 0x00000003, 0x000007ff,
335         0x240c, 0x000007ff, 0x00000000,
336         0x8a14, 0xf000001f, 0x00000007,
337         0x8b24, 0xffffffff, 0x00ffffff,
338         0x8b10, 0x0000ff0f, 0x00000000,
339         0x28a4c, 0x07ffffff, 0x4e000000,
340         0x28350, 0x3f3f3fff, 0x2a00126a,
341         0x30, 0x000000ff, 0x0040,
342         0x34, 0x00000040, 0x00004040,
343         0x9100, 0x07ffffff, 0x03000000,
344         0x8e88, 0x01ff1f3f, 0x00000000,
345         0x8e84, 0x01ff1f3f, 0x00000000,
346         0x9060, 0x0000007f, 0x00000020,
347         0x9508, 0x00010000, 0x00010000,
348         0xac14, 0x00000200, 0x000002fb,
349         0xac10, 0xffffffff, 0x0000543b,
350         0xac0c, 0xffffffff, 0xa9210876,
351         0x88d0, 0xffffffff, 0x000fff40,
352         0x88d4, 0x0000001f, 0x00000010,
353         0x1410, 0x20000000, 0x20fffed8,
354         0x15c0, 0x000c0fc0, 0x000c0400
355 };
356
357 static const u32 tahiti_golden_registers2[] =
358 {
359         0xc64, 0x00000001, 0x00000001
360 };
361
362 static const u32 pitcairn_golden_rlc_registers[] =
363 {
364         0xc424, 0xffffffff, 0x00601004,
365         0xc47c, 0xffffffff, 0x10102020,
366         0xc488, 0xffffffff, 0x01000020,
367         0xc314, 0xffffffff, 0x00000800,
368         0xc30c, 0xffffffff, 0x800000a4
369 };
370
371 static const u32 pitcairn_golden_registers[] =
372 {
373         0x9a10, 0x00010000, 0x00018208,
374         0x9830, 0xffffffff, 0x00000000,
375         0x9834, 0xf00fffff, 0x00000400,
376         0x9838, 0x0002021c, 0x00020200,
377         0xc78, 0x00000080, 0x00000000,
378         0xd030, 0x000300c0, 0x00800040,
379         0xd830, 0x000300c0, 0x00800040,
380         0x5bb0, 0x000000f0, 0x00000070,
381         0x5bc0, 0x00200000, 0x50100000,
382         0x7030, 0x31000311, 0x00000011,
383         0x2ae4, 0x00073ffe, 0x000022a2,
384         0x240c, 0x000007ff, 0x00000000,
385         0x8a14, 0xf000001f, 0x00000007,
386         0x8b24, 0xffffffff, 0x00ffffff,
387         0x8b10, 0x0000ff0f, 0x00000000,
388         0x28a4c, 0x07ffffff, 0x4e000000,
389         0x28350, 0x3f3f3fff, 0x2a00126a,
390         0x30, 0x000000ff, 0x0040,
391         0x34, 0x00000040, 0x00004040,
392         0x9100, 0x07ffffff, 0x03000000,
393         0x9060, 0x0000007f, 0x00000020,
394         0x9508, 0x00010000, 0x00010000,
395         0xac14, 0x000003ff, 0x000000f7,
396         0xac10, 0xffffffff, 0x00000000,
397         0xac0c, 0xffffffff, 0x32761054,
398         0x88d4, 0x0000001f, 0x00000010,
399         0x15c0, 0x000c0fc0, 0x000c0400
400 };
401
402 static const u32 verde_golden_rlc_registers[] =
403 {
404         0xc424, 0xffffffff, 0x033f1005,
405         0xc47c, 0xffffffff, 0x10808020,
406         0xc488, 0xffffffff, 0x00800008,
407         0xc314, 0xffffffff, 0x00001000,
408         0xc30c, 0xffffffff, 0x80010014
409 };
410
411 static const u32 verde_golden_registers[] =
412 {
413         0x9a10, 0x00010000, 0x00018208,
414         0x9830, 0xffffffff, 0x00000000,
415         0x9834, 0xf00fffff, 0x00000400,
416         0x9838, 0x0002021c, 0x00020200,
417         0xc78, 0x00000080, 0x00000000,
418         0xd030, 0x000300c0, 0x00800040,
419         0xd030, 0x000300c0, 0x00800040,
420         0xd830, 0x000300c0, 0x00800040,
421         0xd830, 0x000300c0, 0x00800040,
422         0x5bb0, 0x000000f0, 0x00000070,
423         0x5bc0, 0x00200000, 0x50100000,
424         0x7030, 0x31000311, 0x00000011,
425         0x2ae4, 0x00073ffe, 0x000022a2,
426         0x2ae4, 0x00073ffe, 0x000022a2,
427         0x2ae4, 0x00073ffe, 0x000022a2,
428         0x240c, 0x000007ff, 0x00000000,
429         0x240c, 0x000007ff, 0x00000000,
430         0x240c, 0x000007ff, 0x00000000,
431         0x8a14, 0xf000001f, 0x00000007,
432         0x8a14, 0xf000001f, 0x00000007,
433         0x8a14, 0xf000001f, 0x00000007,
434         0x8b24, 0xffffffff, 0x00ffffff,
435         0x8b10, 0x0000ff0f, 0x00000000,
436         0x28a4c, 0x07ffffff, 0x4e000000,
437         0x28350, 0x3f3f3fff, 0x0000124a,
438         0x28350, 0x3f3f3fff, 0x0000124a,
439         0x28350, 0x3f3f3fff, 0x0000124a,
440         0x30, 0x000000ff, 0x0040,
441         0x34, 0x00000040, 0x00004040,
442         0x9100, 0x07ffffff, 0x03000000,
443         0x9100, 0x07ffffff, 0x03000000,
444         0x8e88, 0x01ff1f3f, 0x00000000,
445         0x8e88, 0x01ff1f3f, 0x00000000,
446         0x8e88, 0x01ff1f3f, 0x00000000,
447         0x8e84, 0x01ff1f3f, 0x00000000,
448         0x8e84, 0x01ff1f3f, 0x00000000,
449         0x8e84, 0x01ff1f3f, 0x00000000,
450         0x9060, 0x0000007f, 0x00000020,
451         0x9508, 0x00010000, 0x00010000,
452         0xac14, 0x000003ff, 0x00000003,
453         0xac14, 0x000003ff, 0x00000003,
454         0xac14, 0x000003ff, 0x00000003,
455         0xac10, 0xffffffff, 0x00000000,
456         0xac10, 0xffffffff, 0x00000000,
457         0xac10, 0xffffffff, 0x00000000,
458         0xac0c, 0xffffffff, 0x00001032,
459         0xac0c, 0xffffffff, 0x00001032,
460         0xac0c, 0xffffffff, 0x00001032,
461         0x88d4, 0x0000001f, 0x00000010,
462         0x88d4, 0x0000001f, 0x00000010,
463         0x88d4, 0x0000001f, 0x00000010,
464         0x15c0, 0x000c0fc0, 0x000c0400
465 };
466
467 static const u32 oland_golden_rlc_registers[] =
468 {
469         0xc424, 0xffffffff, 0x00601005,
470         0xc47c, 0xffffffff, 0x10104040,
471         0xc488, 0xffffffff, 0x0100000a,
472         0xc314, 0xffffffff, 0x00000800,
473         0xc30c, 0xffffffff, 0x800000f4
474 };
475
476 static const u32 oland_golden_registers[] =
477 {
478         0x9a10, 0x00010000, 0x00018208,
479         0x9830, 0xffffffff, 0x00000000,
480         0x9834, 0xf00fffff, 0x00000400,
481         0x9838, 0x0002021c, 0x00020200,
482         0xc78, 0x00000080, 0x00000000,
483         0xd030, 0x000300c0, 0x00800040,
484         0xd830, 0x000300c0, 0x00800040,
485         0x5bb0, 0x000000f0, 0x00000070,
486         0x5bc0, 0x00200000, 0x50100000,
487         0x7030, 0x31000311, 0x00000011,
488         0x2ae4, 0x00073ffe, 0x000022a2,
489         0x240c, 0x000007ff, 0x00000000,
490         0x8a14, 0xf000001f, 0x00000007,
491         0x8b24, 0xffffffff, 0x00ffffff,
492         0x8b10, 0x0000ff0f, 0x00000000,
493         0x28a4c, 0x07ffffff, 0x4e000000,
494         0x28350, 0x3f3f3fff, 0x00000082,
495         0x30, 0x000000ff, 0x0040,
496         0x34, 0x00000040, 0x00004040,
497         0x9100, 0x07ffffff, 0x03000000,
498         0x9060, 0x0000007f, 0x00000020,
499         0x9508, 0x00010000, 0x00010000,
500         0xac14, 0x000003ff, 0x000000f3,
501         0xac10, 0xffffffff, 0x00000000,
502         0xac0c, 0xffffffff, 0x00003210,
503         0x88d4, 0x0000001f, 0x00000010,
504         0x15c0, 0x000c0fc0, 0x000c0400
505 };
506
507 static const u32 hainan_golden_registers[] =
508 {
509         0x9a10, 0x00010000, 0x00018208,
510         0x9830, 0xffffffff, 0x00000000,
511         0x9834, 0xf00fffff, 0x00000400,
512         0x9838, 0x0002021c, 0x00020200,
513         0xd0c0, 0xff000fff, 0x00000100,
514         0xd030, 0x000300c0, 0x00800040,
515         0xd8c0, 0xff000fff, 0x00000100,
516         0xd830, 0x000300c0, 0x00800040,
517         0x2ae4, 0x00073ffe, 0x000022a2,
518         0x240c, 0x000007ff, 0x00000000,
519         0x8a14, 0xf000001f, 0x00000007,
520         0x8b24, 0xffffffff, 0x00ffffff,
521         0x8b10, 0x0000ff0f, 0x00000000,
522         0x28a4c, 0x07ffffff, 0x4e000000,
523         0x28350, 0x3f3f3fff, 0x00000000,
524         0x30, 0x000000ff, 0x0040,
525         0x34, 0x00000040, 0x00004040,
526         0x9100, 0x03e00000, 0x03600000,
527         0x9060, 0x0000007f, 0x00000020,
528         0x9508, 0x00010000, 0x00010000,
529         0xac14, 0x000003ff, 0x000000f1,
530         0xac10, 0xffffffff, 0x00000000,
531         0xac0c, 0xffffffff, 0x00003210,
532         0x88d4, 0x0000001f, 0x00000010,
533         0x15c0, 0x000c0fc0, 0x000c0400
534 };
535
536 static const u32 hainan_golden_registers2[] =
537 {
538         0x98f8, 0xffffffff, 0x02010001
539 };
540
541 static const u32 tahiti_mgcg_cgcg_init[] =
542 {
543         0xc400, 0xffffffff, 0xfffffffc,
544         0x802c, 0xffffffff, 0xe0000000,
545         0x9a60, 0xffffffff, 0x00000100,
546         0x92a4, 0xffffffff, 0x00000100,
547         0xc164, 0xffffffff, 0x00000100,
548         0x9774, 0xffffffff, 0x00000100,
549         0x8984, 0xffffffff, 0x06000100,
550         0x8a18, 0xffffffff, 0x00000100,
551         0x92a0, 0xffffffff, 0x00000100,
552         0xc380, 0xffffffff, 0x00000100,
553         0x8b28, 0xffffffff, 0x00000100,
554         0x9144, 0xffffffff, 0x00000100,
555         0x8d88, 0xffffffff, 0x00000100,
556         0x8d8c, 0xffffffff, 0x00000100,
557         0x9030, 0xffffffff, 0x00000100,
558         0x9034, 0xffffffff, 0x00000100,
559         0x9038, 0xffffffff, 0x00000100,
560         0x903c, 0xffffffff, 0x00000100,
561         0xad80, 0xffffffff, 0x00000100,
562         0xac54, 0xffffffff, 0x00000100,
563         0x897c, 0xffffffff, 0x06000100,
564         0x9868, 0xffffffff, 0x00000100,
565         0x9510, 0xffffffff, 0x00000100,
566         0xaf04, 0xffffffff, 0x00000100,
567         0xae04, 0xffffffff, 0x00000100,
568         0x949c, 0xffffffff, 0x00000100,
569         0x802c, 0xffffffff, 0xe0000000,
570         0x9160, 0xffffffff, 0x00010000,
571         0x9164, 0xffffffff, 0x00030002,
572         0x9168, 0xffffffff, 0x00040007,
573         0x916c, 0xffffffff, 0x00060005,
574         0x9170, 0xffffffff, 0x00090008,
575         0x9174, 0xffffffff, 0x00020001,
576         0x9178, 0xffffffff, 0x00040003,
577         0x917c, 0xffffffff, 0x00000007,
578         0x9180, 0xffffffff, 0x00060005,
579         0x9184, 0xffffffff, 0x00090008,
580         0x9188, 0xffffffff, 0x00030002,
581         0x918c, 0xffffffff, 0x00050004,
582         0x9190, 0xffffffff, 0x00000008,
583         0x9194, 0xffffffff, 0x00070006,
584         0x9198, 0xffffffff, 0x000a0009,
585         0x919c, 0xffffffff, 0x00040003,
586         0x91a0, 0xffffffff, 0x00060005,
587         0x91a4, 0xffffffff, 0x00000009,
588         0x91a8, 0xffffffff, 0x00080007,
589         0x91ac, 0xffffffff, 0x000b000a,
590         0x91b0, 0xffffffff, 0x00050004,
591         0x91b4, 0xffffffff, 0x00070006,
592         0x91b8, 0xffffffff, 0x0008000b,
593         0x91bc, 0xffffffff, 0x000a0009,
594         0x91c0, 0xffffffff, 0x000d000c,
595         0x91c4, 0xffffffff, 0x00060005,
596         0x91c8, 0xffffffff, 0x00080007,
597         0x91cc, 0xffffffff, 0x0000000b,
598         0x91d0, 0xffffffff, 0x000a0009,
599         0x91d4, 0xffffffff, 0x000d000c,
600         0x91d8, 0xffffffff, 0x00070006,
601         0x91dc, 0xffffffff, 0x00090008,
602         0x91e0, 0xffffffff, 0x0000000c,
603         0x91e4, 0xffffffff, 0x000b000a,
604         0x91e8, 0xffffffff, 0x000e000d,
605         0x91ec, 0xffffffff, 0x00080007,
606         0x91f0, 0xffffffff, 0x000a0009,
607         0x91f4, 0xffffffff, 0x0000000d,
608         0x91f8, 0xffffffff, 0x000c000b,
609         0x91fc, 0xffffffff, 0x000f000e,
610         0x9200, 0xffffffff, 0x00090008,
611         0x9204, 0xffffffff, 0x000b000a,
612         0x9208, 0xffffffff, 0x000c000f,
613         0x920c, 0xffffffff, 0x000e000d,
614         0x9210, 0xffffffff, 0x00110010,
615         0x9214, 0xffffffff, 0x000a0009,
616         0x9218, 0xffffffff, 0x000c000b,
617         0x921c, 0xffffffff, 0x0000000f,
618         0x9220, 0xffffffff, 0x000e000d,
619         0x9224, 0xffffffff, 0x00110010,
620         0x9228, 0xffffffff, 0x000b000a,
621         0x922c, 0xffffffff, 0x000d000c,
622         0x9230, 0xffffffff, 0x00000010,
623         0x9234, 0xffffffff, 0x000f000e,
624         0x9238, 0xffffffff, 0x00120011,
625         0x923c, 0xffffffff, 0x000c000b,
626         0x9240, 0xffffffff, 0x000e000d,
627         0x9244, 0xffffffff, 0x00000011,
628         0x9248, 0xffffffff, 0x0010000f,
629         0x924c, 0xffffffff, 0x00130012,
630         0x9250, 0xffffffff, 0x000d000c,
631         0x9254, 0xffffffff, 0x000f000e,
632         0x9258, 0xffffffff, 0x00100013,
633         0x925c, 0xffffffff, 0x00120011,
634         0x9260, 0xffffffff, 0x00150014,
635         0x9264, 0xffffffff, 0x000e000d,
636         0x9268, 0xffffffff, 0x0010000f,
637         0x926c, 0xffffffff, 0x00000013,
638         0x9270, 0xffffffff, 0x00120011,
639         0x9274, 0xffffffff, 0x00150014,
640         0x9278, 0xffffffff, 0x000f000e,
641         0x927c, 0xffffffff, 0x00110010,
642         0x9280, 0xffffffff, 0x00000014,
643         0x9284, 0xffffffff, 0x00130012,
644         0x9288, 0xffffffff, 0x00160015,
645         0x928c, 0xffffffff, 0x0010000f,
646         0x9290, 0xffffffff, 0x00120011,
647         0x9294, 0xffffffff, 0x00000015,
648         0x9298, 0xffffffff, 0x00140013,
649         0x929c, 0xffffffff, 0x00170016,
650         0x9150, 0xffffffff, 0x96940200,
651         0x8708, 0xffffffff, 0x00900100,
652         0xc478, 0xffffffff, 0x00000080,
653         0xc404, 0xffffffff, 0x0020003f,
654         0x30, 0xffffffff, 0x0000001c,
655         0x34, 0x000f0000, 0x000f0000,
656         0x160c, 0xffffffff, 0x00000100,
657         0x1024, 0xffffffff, 0x00000100,
658         0x102c, 0x00000101, 0x00000000,
659         0x20a8, 0xffffffff, 0x00000104,
660         0x264c, 0x000c0000, 0x000c0000,
661         0x2648, 0x000c0000, 0x000c0000,
662         0x55e4, 0xff000fff, 0x00000100,
663         0x55e8, 0x00000001, 0x00000001,
664         0x2f50, 0x00000001, 0x00000001,
665         0x30cc, 0xc0000fff, 0x00000104,
666         0xc1e4, 0x00000001, 0x00000001,
667         0xd0c0, 0xfffffff0, 0x00000100,
668         0xd8c0, 0xfffffff0, 0x00000100
669 };
670
671 static const u32 pitcairn_mgcg_cgcg_init[] =
672 {
673         0xc400, 0xffffffff, 0xfffffffc,
674         0x802c, 0xffffffff, 0xe0000000,
675         0x9a60, 0xffffffff, 0x00000100,
676         0x92a4, 0xffffffff, 0x00000100,
677         0xc164, 0xffffffff, 0x00000100,
678         0x9774, 0xffffffff, 0x00000100,
679         0x8984, 0xffffffff, 0x06000100,
680         0x8a18, 0xffffffff, 0x00000100,
681         0x92a0, 0xffffffff, 0x00000100,
682         0xc380, 0xffffffff, 0x00000100,
683         0x8b28, 0xffffffff, 0x00000100,
684         0x9144, 0xffffffff, 0x00000100,
685         0x8d88, 0xffffffff, 0x00000100,
686         0x8d8c, 0xffffffff, 0x00000100,
687         0x9030, 0xffffffff, 0x00000100,
688         0x9034, 0xffffffff, 0x00000100,
689         0x9038, 0xffffffff, 0x00000100,
690         0x903c, 0xffffffff, 0x00000100,
691         0xad80, 0xffffffff, 0x00000100,
692         0xac54, 0xffffffff, 0x00000100,
693         0x897c, 0xffffffff, 0x06000100,
694         0x9868, 0xffffffff, 0x00000100,
695         0x9510, 0xffffffff, 0x00000100,
696         0xaf04, 0xffffffff, 0x00000100,
697         0xae04, 0xffffffff, 0x00000100,
698         0x949c, 0xffffffff, 0x00000100,
699         0x802c, 0xffffffff, 0xe0000000,
700         0x9160, 0xffffffff, 0x00010000,
701         0x9164, 0xffffffff, 0x00030002,
702         0x9168, 0xffffffff, 0x00040007,
703         0x916c, 0xffffffff, 0x00060005,
704         0x9170, 0xffffffff, 0x00090008,
705         0x9174, 0xffffffff, 0x00020001,
706         0x9178, 0xffffffff, 0x00040003,
707         0x917c, 0xffffffff, 0x00000007,
708         0x9180, 0xffffffff, 0x00060005,
709         0x9184, 0xffffffff, 0x00090008,
710         0x9188, 0xffffffff, 0x00030002,
711         0x918c, 0xffffffff, 0x00050004,
712         0x9190, 0xffffffff, 0x00000008,
713         0x9194, 0xffffffff, 0x00070006,
714         0x9198, 0xffffffff, 0x000a0009,
715         0x919c, 0xffffffff, 0x00040003,
716         0x91a0, 0xffffffff, 0x00060005,
717         0x91a4, 0xffffffff, 0x00000009,
718         0x91a8, 0xffffffff, 0x00080007,
719         0x91ac, 0xffffffff, 0x000b000a,
720         0x91b0, 0xffffffff, 0x00050004,
721         0x91b4, 0xffffffff, 0x00070006,
722         0x91b8, 0xffffffff, 0x0008000b,
723         0x91bc, 0xffffffff, 0x000a0009,
724         0x91c0, 0xffffffff, 0x000d000c,
725         0x9200, 0xffffffff, 0x00090008,
726         0x9204, 0xffffffff, 0x000b000a,
727         0x9208, 0xffffffff, 0x000c000f,
728         0x920c, 0xffffffff, 0x000e000d,
729         0x9210, 0xffffffff, 0x00110010,
730         0x9214, 0xffffffff, 0x000a0009,
731         0x9218, 0xffffffff, 0x000c000b,
732         0x921c, 0xffffffff, 0x0000000f,
733         0x9220, 0xffffffff, 0x000e000d,
734         0x9224, 0xffffffff, 0x00110010,
735         0x9228, 0xffffffff, 0x000b000a,
736         0x922c, 0xffffffff, 0x000d000c,
737         0x9230, 0xffffffff, 0x00000010,
738         0x9234, 0xffffffff, 0x000f000e,
739         0x9238, 0xffffffff, 0x00120011,
740         0x923c, 0xffffffff, 0x000c000b,
741         0x9240, 0xffffffff, 0x000e000d,
742         0x9244, 0xffffffff, 0x00000011,
743         0x9248, 0xffffffff, 0x0010000f,
744         0x924c, 0xffffffff, 0x00130012,
745         0x9250, 0xffffffff, 0x000d000c,
746         0x9254, 0xffffffff, 0x000f000e,
747         0x9258, 0xffffffff, 0x00100013,
748         0x925c, 0xffffffff, 0x00120011,
749         0x9260, 0xffffffff, 0x00150014,
750         0x9150, 0xffffffff, 0x96940200,
751         0x8708, 0xffffffff, 0x00900100,
752         0xc478, 0xffffffff, 0x00000080,
753         0xc404, 0xffffffff, 0x0020003f,
754         0x30, 0xffffffff, 0x0000001c,
755         0x34, 0x000f0000, 0x000f0000,
756         0x160c, 0xffffffff, 0x00000100,
757         0x1024, 0xffffffff, 0x00000100,
758         0x102c, 0x00000101, 0x00000000,
759         0x20a8, 0xffffffff, 0x00000104,
760         0x55e4, 0xff000fff, 0x00000100,
761         0x55e8, 0x00000001, 0x00000001,
762         0x2f50, 0x00000001, 0x00000001,
763         0x30cc, 0xc0000fff, 0x00000104,
764         0xc1e4, 0x00000001, 0x00000001,
765         0xd0c0, 0xfffffff0, 0x00000100,
766         0xd8c0, 0xfffffff0, 0x00000100
767 };
768
769 static const u32 verde_mgcg_cgcg_init[] =
770 {
771         0xc400, 0xffffffff, 0xfffffffc,
772         0x802c, 0xffffffff, 0xe0000000,
773         0x9a60, 0xffffffff, 0x00000100,
774         0x92a4, 0xffffffff, 0x00000100,
775         0xc164, 0xffffffff, 0x00000100,
776         0x9774, 0xffffffff, 0x00000100,
777         0x8984, 0xffffffff, 0x06000100,
778         0x8a18, 0xffffffff, 0x00000100,
779         0x92a0, 0xffffffff, 0x00000100,
780         0xc380, 0xffffffff, 0x00000100,
781         0x8b28, 0xffffffff, 0x00000100,
782         0x9144, 0xffffffff, 0x00000100,
783         0x8d88, 0xffffffff, 0x00000100,
784         0x8d8c, 0xffffffff, 0x00000100,
785         0x9030, 0xffffffff, 0x00000100,
786         0x9034, 0xffffffff, 0x00000100,
787         0x9038, 0xffffffff, 0x00000100,
788         0x903c, 0xffffffff, 0x00000100,
789         0xad80, 0xffffffff, 0x00000100,
790         0xac54, 0xffffffff, 0x00000100,
791         0x897c, 0xffffffff, 0x06000100,
792         0x9868, 0xffffffff, 0x00000100,
793         0x9510, 0xffffffff, 0x00000100,
794         0xaf04, 0xffffffff, 0x00000100,
795         0xae04, 0xffffffff, 0x00000100,
796         0x949c, 0xffffffff, 0x00000100,
797         0x802c, 0xffffffff, 0xe0000000,
798         0x9160, 0xffffffff, 0x00010000,
799         0x9164, 0xffffffff, 0x00030002,
800         0x9168, 0xffffffff, 0x00040007,
801         0x916c, 0xffffffff, 0x00060005,
802         0x9170, 0xffffffff, 0x00090008,
803         0x9174, 0xffffffff, 0x00020001,
804         0x9178, 0xffffffff, 0x00040003,
805         0x917c, 0xffffffff, 0x00000007,
806         0x9180, 0xffffffff, 0x00060005,
807         0x9184, 0xffffffff, 0x00090008,
808         0x9188, 0xffffffff, 0x00030002,
809         0x918c, 0xffffffff, 0x00050004,
810         0x9190, 0xffffffff, 0x00000008,
811         0x9194, 0xffffffff, 0x00070006,
812         0x9198, 0xffffffff, 0x000a0009,
813         0x919c, 0xffffffff, 0x00040003,
814         0x91a0, 0xffffffff, 0x00060005,
815         0x91a4, 0xffffffff, 0x00000009,
816         0x91a8, 0xffffffff, 0x00080007,
817         0x91ac, 0xffffffff, 0x000b000a,
818         0x91b0, 0xffffffff, 0x00050004,
819         0x91b4, 0xffffffff, 0x00070006,
820         0x91b8, 0xffffffff, 0x0008000b,
821         0x91bc, 0xffffffff, 0x000a0009,
822         0x91c0, 0xffffffff, 0x000d000c,
823         0x9200, 0xffffffff, 0x00090008,
824         0x9204, 0xffffffff, 0x000b000a,
825         0x9208, 0xffffffff, 0x000c000f,
826         0x920c, 0xffffffff, 0x000e000d,
827         0x9210, 0xffffffff, 0x00110010,
828         0x9214, 0xffffffff, 0x000a0009,
829         0x9218, 0xffffffff, 0x000c000b,
830         0x921c, 0xffffffff, 0x0000000f,
831         0x9220, 0xffffffff, 0x000e000d,
832         0x9224, 0xffffffff, 0x00110010,
833         0x9228, 0xffffffff, 0x000b000a,
834         0x922c, 0xffffffff, 0x000d000c,
835         0x9230, 0xffffffff, 0x00000010,
836         0x9234, 0xffffffff, 0x000f000e,
837         0x9238, 0xffffffff, 0x00120011,
838         0x923c, 0xffffffff, 0x000c000b,
839         0x9240, 0xffffffff, 0x000e000d,
840         0x9244, 0xffffffff, 0x00000011,
841         0x9248, 0xffffffff, 0x0010000f,
842         0x924c, 0xffffffff, 0x00130012,
843         0x9250, 0xffffffff, 0x000d000c,
844         0x9254, 0xffffffff, 0x000f000e,
845         0x9258, 0xffffffff, 0x00100013,
846         0x925c, 0xffffffff, 0x00120011,
847         0x9260, 0xffffffff, 0x00150014,
848         0x9150, 0xffffffff, 0x96940200,
849         0x8708, 0xffffffff, 0x00900100,
850         0xc478, 0xffffffff, 0x00000080,
851         0xc404, 0xffffffff, 0x0020003f,
852         0x30, 0xffffffff, 0x0000001c,
853         0x34, 0x000f0000, 0x000f0000,
854         0x160c, 0xffffffff, 0x00000100,
855         0x1024, 0xffffffff, 0x00000100,
856         0x102c, 0x00000101, 0x00000000,
857         0x20a8, 0xffffffff, 0x00000104,
858         0x264c, 0x000c0000, 0x000c0000,
859         0x2648, 0x000c0000, 0x000c0000,
860         0x55e4, 0xff000fff, 0x00000100,
861         0x55e8, 0x00000001, 0x00000001,
862         0x2f50, 0x00000001, 0x00000001,
863         0x30cc, 0xc0000fff, 0x00000104,
864         0xc1e4, 0x00000001, 0x00000001,
865         0xd0c0, 0xfffffff0, 0x00000100,
866         0xd8c0, 0xfffffff0, 0x00000100
867 };
868
869 static const u32 oland_mgcg_cgcg_init[] =
870 {
871         0xc400, 0xffffffff, 0xfffffffc,
872         0x802c, 0xffffffff, 0xe0000000,
873         0x9a60, 0xffffffff, 0x00000100,
874         0x92a4, 0xffffffff, 0x00000100,
875         0xc164, 0xffffffff, 0x00000100,
876         0x9774, 0xffffffff, 0x00000100,
877         0x8984, 0xffffffff, 0x06000100,
878         0x8a18, 0xffffffff, 0x00000100,
879         0x92a0, 0xffffffff, 0x00000100,
880         0xc380, 0xffffffff, 0x00000100,
881         0x8b28, 0xffffffff, 0x00000100,
882         0x9144, 0xffffffff, 0x00000100,
883         0x8d88, 0xffffffff, 0x00000100,
884         0x8d8c, 0xffffffff, 0x00000100,
885         0x9030, 0xffffffff, 0x00000100,
886         0x9034, 0xffffffff, 0x00000100,
887         0x9038, 0xffffffff, 0x00000100,
888         0x903c, 0xffffffff, 0x00000100,
889         0xad80, 0xffffffff, 0x00000100,
890         0xac54, 0xffffffff, 0x00000100,
891         0x897c, 0xffffffff, 0x06000100,
892         0x9868, 0xffffffff, 0x00000100,
893         0x9510, 0xffffffff, 0x00000100,
894         0xaf04, 0xffffffff, 0x00000100,
895         0xae04, 0xffffffff, 0x00000100,
896         0x949c, 0xffffffff, 0x00000100,
897         0x802c, 0xffffffff, 0xe0000000,
898         0x9160, 0xffffffff, 0x00010000,
899         0x9164, 0xffffffff, 0x00030002,
900         0x9168, 0xffffffff, 0x00040007,
901         0x916c, 0xffffffff, 0x00060005,
902         0x9170, 0xffffffff, 0x00090008,
903         0x9174, 0xffffffff, 0x00020001,
904         0x9178, 0xffffffff, 0x00040003,
905         0x917c, 0xffffffff, 0x00000007,
906         0x9180, 0xffffffff, 0x00060005,
907         0x9184, 0xffffffff, 0x00090008,
908         0x9188, 0xffffffff, 0x00030002,
909         0x918c, 0xffffffff, 0x00050004,
910         0x9190, 0xffffffff, 0x00000008,
911         0x9194, 0xffffffff, 0x00070006,
912         0x9198, 0xffffffff, 0x000a0009,
913         0x919c, 0xffffffff, 0x00040003,
914         0x91a0, 0xffffffff, 0x00060005,
915         0x91a4, 0xffffffff, 0x00000009,
916         0x91a8, 0xffffffff, 0x00080007,
917         0x91ac, 0xffffffff, 0x000b000a,
918         0x91b0, 0xffffffff, 0x00050004,
919         0x91b4, 0xffffffff, 0x00070006,
920         0x91b8, 0xffffffff, 0x0008000b,
921         0x91bc, 0xffffffff, 0x000a0009,
922         0x91c0, 0xffffffff, 0x000d000c,
923         0x91c4, 0xffffffff, 0x00060005,
924         0x91c8, 0xffffffff, 0x00080007,
925         0x91cc, 0xffffffff, 0x0000000b,
926         0x91d0, 0xffffffff, 0x000a0009,
927         0x91d4, 0xffffffff, 0x000d000c,
928         0x9150, 0xffffffff, 0x96940200,
929         0x8708, 0xffffffff, 0x00900100,
930         0xc478, 0xffffffff, 0x00000080,
931         0xc404, 0xffffffff, 0x0020003f,
932         0x30, 0xffffffff, 0x0000001c,
933         0x34, 0x000f0000, 0x000f0000,
934         0x160c, 0xffffffff, 0x00000100,
935         0x1024, 0xffffffff, 0x00000100,
936         0x102c, 0x00000101, 0x00000000,
937         0x20a8, 0xffffffff, 0x00000104,
938         0x264c, 0x000c0000, 0x000c0000,
939         0x2648, 0x000c0000, 0x000c0000,
940         0x55e4, 0xff000fff, 0x00000100,
941         0x55e8, 0x00000001, 0x00000001,
942         0x2f50, 0x00000001, 0x00000001,
943         0x30cc, 0xc0000fff, 0x00000104,
944         0xc1e4, 0x00000001, 0x00000001,
945         0xd0c0, 0xfffffff0, 0x00000100,
946         0xd8c0, 0xfffffff0, 0x00000100
947 };
948
949 static const u32 hainan_mgcg_cgcg_init[] =
950 {
951         0xc400, 0xffffffff, 0xfffffffc,
952         0x802c, 0xffffffff, 0xe0000000,
953         0x9a60, 0xffffffff, 0x00000100,
954         0x92a4, 0xffffffff, 0x00000100,
955         0xc164, 0xffffffff, 0x00000100,
956         0x9774, 0xffffffff, 0x00000100,
957         0x8984, 0xffffffff, 0x06000100,
958         0x8a18, 0xffffffff, 0x00000100,
959         0x92a0, 0xffffffff, 0x00000100,
960         0xc380, 0xffffffff, 0x00000100,
961         0x8b28, 0xffffffff, 0x00000100,
962         0x9144, 0xffffffff, 0x00000100,
963         0x8d88, 0xffffffff, 0x00000100,
964         0x8d8c, 0xffffffff, 0x00000100,
965         0x9030, 0xffffffff, 0x00000100,
966         0x9034, 0xffffffff, 0x00000100,
967         0x9038, 0xffffffff, 0x00000100,
968         0x903c, 0xffffffff, 0x00000100,
969         0xad80, 0xffffffff, 0x00000100,
970         0xac54, 0xffffffff, 0x00000100,
971         0x897c, 0xffffffff, 0x06000100,
972         0x9868, 0xffffffff, 0x00000100,
973         0x9510, 0xffffffff, 0x00000100,
974         0xaf04, 0xffffffff, 0x00000100,
975         0xae04, 0xffffffff, 0x00000100,
976         0x949c, 0xffffffff, 0x00000100,
977         0x802c, 0xffffffff, 0xe0000000,
978         0x9160, 0xffffffff, 0x00010000,
979         0x9164, 0xffffffff, 0x00030002,
980         0x9168, 0xffffffff, 0x00040007,
981         0x916c, 0xffffffff, 0x00060005,
982         0x9170, 0xffffffff, 0x00090008,
983         0x9174, 0xffffffff, 0x00020001,
984         0x9178, 0xffffffff, 0x00040003,
985         0x917c, 0xffffffff, 0x00000007,
986         0x9180, 0xffffffff, 0x00060005,
987         0x9184, 0xffffffff, 0x00090008,
988         0x9188, 0xffffffff, 0x00030002,
989         0x918c, 0xffffffff, 0x00050004,
990         0x9190, 0xffffffff, 0x00000008,
991         0x9194, 0xffffffff, 0x00070006,
992         0x9198, 0xffffffff, 0x000a0009,
993         0x919c, 0xffffffff, 0x00040003,
994         0x91a0, 0xffffffff, 0x00060005,
995         0x91a4, 0xffffffff, 0x00000009,
996         0x91a8, 0xffffffff, 0x00080007,
997         0x91ac, 0xffffffff, 0x000b000a,
998         0x91b0, 0xffffffff, 0x00050004,
999         0x91b4, 0xffffffff, 0x00070006,
1000         0x91b8, 0xffffffff, 0x0008000b,
1001         0x91bc, 0xffffffff, 0x000a0009,
1002         0x91c0, 0xffffffff, 0x000d000c,
1003         0x91c4, 0xffffffff, 0x00060005,
1004         0x91c8, 0xffffffff, 0x00080007,
1005         0x91cc, 0xffffffff, 0x0000000b,
1006         0x91d0, 0xffffffff, 0x000a0009,
1007         0x91d4, 0xffffffff, 0x000d000c,
1008         0x9150, 0xffffffff, 0x96940200,
1009         0x8708, 0xffffffff, 0x00900100,
1010         0xc478, 0xffffffff, 0x00000080,
1011         0xc404, 0xffffffff, 0x0020003f,
1012         0x30, 0xffffffff, 0x0000001c,
1013         0x34, 0x000f0000, 0x000f0000,
1014         0x160c, 0xffffffff, 0x00000100,
1015         0x1024, 0xffffffff, 0x00000100,
1016         0x20a8, 0xffffffff, 0x00000104,
1017         0x264c, 0x000c0000, 0x000c0000,
1018         0x2648, 0x000c0000, 0x000c0000,
1019         0x2f50, 0x00000001, 0x00000001,
1020         0x30cc, 0xc0000fff, 0x00000104,
1021         0xc1e4, 0x00000001, 0x00000001,
1022         0xd0c0, 0xfffffff0, 0x00000100,
1023         0xd8c0, 0xfffffff0, 0x00000100
1024 };
1025
1026 static u32 verde_pg_init[] =
1027 {
1028         0x353c, 0xffffffff, 0x40000,
1029         0x3538, 0xffffffff, 0x200010ff,
1030         0x353c, 0xffffffff, 0x0,
1031         0x353c, 0xffffffff, 0x0,
1032         0x353c, 0xffffffff, 0x0,
1033         0x353c, 0xffffffff, 0x0,
1034         0x353c, 0xffffffff, 0x0,
1035         0x353c, 0xffffffff, 0x7007,
1036         0x3538, 0xffffffff, 0x300010ff,
1037         0x353c, 0xffffffff, 0x0,
1038         0x353c, 0xffffffff, 0x0,
1039         0x353c, 0xffffffff, 0x0,
1040         0x353c, 0xffffffff, 0x0,
1041         0x353c, 0xffffffff, 0x0,
1042         0x353c, 0xffffffff, 0x400000,
1043         0x3538, 0xffffffff, 0x100010ff,
1044         0x353c, 0xffffffff, 0x0,
1045         0x353c, 0xffffffff, 0x0,
1046         0x353c, 0xffffffff, 0x0,
1047         0x353c, 0xffffffff, 0x0,
1048         0x353c, 0xffffffff, 0x0,
1049         0x353c, 0xffffffff, 0x120200,
1050         0x3538, 0xffffffff, 0x500010ff,
1051         0x353c, 0xffffffff, 0x0,
1052         0x353c, 0xffffffff, 0x0,
1053         0x353c, 0xffffffff, 0x0,
1054         0x353c, 0xffffffff, 0x0,
1055         0x353c, 0xffffffff, 0x0,
1056         0x353c, 0xffffffff, 0x1e1e16,
1057         0x3538, 0xffffffff, 0x600010ff,
1058         0x353c, 0xffffffff, 0x0,
1059         0x353c, 0xffffffff, 0x0,
1060         0x353c, 0xffffffff, 0x0,
1061         0x353c, 0xffffffff, 0x0,
1062         0x353c, 0xffffffff, 0x0,
1063         0x353c, 0xffffffff, 0x171f1e,
1064         0x3538, 0xffffffff, 0x700010ff,
1065         0x353c, 0xffffffff, 0x0,
1066         0x353c, 0xffffffff, 0x0,
1067         0x353c, 0xffffffff, 0x0,
1068         0x353c, 0xffffffff, 0x0,
1069         0x353c, 0xffffffff, 0x0,
1070         0x353c, 0xffffffff, 0x0,
1071         0x3538, 0xffffffff, 0x9ff,
1072         0x3500, 0xffffffff, 0x0,
1073         0x3504, 0xffffffff, 0x10000800,
1074         0x3504, 0xffffffff, 0xf,
1075         0x3504, 0xffffffff, 0xf,
1076         0x3500, 0xffffffff, 0x4,
1077         0x3504, 0xffffffff, 0x1000051e,
1078         0x3504, 0xffffffff, 0xffff,
1079         0x3504, 0xffffffff, 0xffff,
1080         0x3500, 0xffffffff, 0x8,
1081         0x3504, 0xffffffff, 0x80500,
1082         0x3500, 0xffffffff, 0x12,
1083         0x3504, 0xffffffff, 0x9050c,
1084         0x3500, 0xffffffff, 0x1d,
1085         0x3504, 0xffffffff, 0xb052c,
1086         0x3500, 0xffffffff, 0x2a,
1087         0x3504, 0xffffffff, 0x1053e,
1088         0x3500, 0xffffffff, 0x2d,
1089         0x3504, 0xffffffff, 0x10546,
1090         0x3500, 0xffffffff, 0x30,
1091         0x3504, 0xffffffff, 0xa054e,
1092         0x3500, 0xffffffff, 0x3c,
1093         0x3504, 0xffffffff, 0x1055f,
1094         0x3500, 0xffffffff, 0x3f,
1095         0x3504, 0xffffffff, 0x10567,
1096         0x3500, 0xffffffff, 0x42,
1097         0x3504, 0xffffffff, 0x1056f,
1098         0x3500, 0xffffffff, 0x45,
1099         0x3504, 0xffffffff, 0x10572,
1100         0x3500, 0xffffffff, 0x48,
1101         0x3504, 0xffffffff, 0x20575,
1102         0x3500, 0xffffffff, 0x4c,
1103         0x3504, 0xffffffff, 0x190801,
1104         0x3500, 0xffffffff, 0x67,
1105         0x3504, 0xffffffff, 0x1082a,
1106         0x3500, 0xffffffff, 0x6a,
1107         0x3504, 0xffffffff, 0x1b082d,
1108         0x3500, 0xffffffff, 0x87,
1109         0x3504, 0xffffffff, 0x310851,
1110         0x3500, 0xffffffff, 0xba,
1111         0x3504, 0xffffffff, 0x891,
1112         0x3500, 0xffffffff, 0xbc,
1113         0x3504, 0xffffffff, 0x893,
1114         0x3500, 0xffffffff, 0xbe,
1115         0x3504, 0xffffffff, 0x20895,
1116         0x3500, 0xffffffff, 0xc2,
1117         0x3504, 0xffffffff, 0x20899,
1118         0x3500, 0xffffffff, 0xc6,
1119         0x3504, 0xffffffff, 0x2089d,
1120         0x3500, 0xffffffff, 0xca,
1121         0x3504, 0xffffffff, 0x8a1,
1122         0x3500, 0xffffffff, 0xcc,
1123         0x3504, 0xffffffff, 0x8a3,
1124         0x3500, 0xffffffff, 0xce,
1125         0x3504, 0xffffffff, 0x308a5,
1126         0x3500, 0xffffffff, 0xd3,
1127         0x3504, 0xffffffff, 0x6d08cd,
1128         0x3500, 0xffffffff, 0x142,
1129         0x3504, 0xffffffff, 0x2000095a,
1130         0x3504, 0xffffffff, 0x1,
1131         0x3500, 0xffffffff, 0x144,
1132         0x3504, 0xffffffff, 0x301f095b,
1133         0x3500, 0xffffffff, 0x165,
1134         0x3504, 0xffffffff, 0xc094d,
1135         0x3500, 0xffffffff, 0x173,
1136         0x3504, 0xffffffff, 0xf096d,
1137         0x3500, 0xffffffff, 0x184,
1138         0x3504, 0xffffffff, 0x15097f,
1139         0x3500, 0xffffffff, 0x19b,
1140         0x3504, 0xffffffff, 0xc0998,
1141         0x3500, 0xffffffff, 0x1a9,
1142         0x3504, 0xffffffff, 0x409a7,
1143         0x3500, 0xffffffff, 0x1af,
1144         0x3504, 0xffffffff, 0xcdc,
1145         0x3500, 0xffffffff, 0x1b1,
1146         0x3504, 0xffffffff, 0x800,
1147         0x3508, 0xffffffff, 0x6c9b2000,
1148         0x3510, 0xfc00, 0x2000,
1149         0x3544, 0xffffffff, 0xfc0,
1150         0x28d4, 0x00000100, 0x100
1151 };
1152
1153 static void si_init_golden_registers(struct radeon_device *rdev)
1154 {
1155         switch (rdev->family) {
1156         case CHIP_TAHITI:
1157                 radeon_program_register_sequence(rdev,
1158                                                  tahiti_golden_registers,
1159                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1160                 radeon_program_register_sequence(rdev,
1161                                                  tahiti_golden_rlc_registers,
1162                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1163                 radeon_program_register_sequence(rdev,
1164                                                  tahiti_mgcg_cgcg_init,
1165                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1166                 radeon_program_register_sequence(rdev,
1167                                                  tahiti_golden_registers2,
1168                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1169                 break;
1170         case CHIP_PITCAIRN:
1171                 radeon_program_register_sequence(rdev,
1172                                                  pitcairn_golden_registers,
1173                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1174                 radeon_program_register_sequence(rdev,
1175                                                  pitcairn_golden_rlc_registers,
1176                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1177                 radeon_program_register_sequence(rdev,
1178                                                  pitcairn_mgcg_cgcg_init,
1179                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1180                 break;
1181         case CHIP_VERDE:
1182                 radeon_program_register_sequence(rdev,
1183                                                  verde_golden_registers,
1184                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1185                 radeon_program_register_sequence(rdev,
1186                                                  verde_golden_rlc_registers,
1187                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1188                 radeon_program_register_sequence(rdev,
1189                                                  verde_mgcg_cgcg_init,
1190                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1191                 radeon_program_register_sequence(rdev,
1192                                                  verde_pg_init,
1193                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1194                 break;
1195         case CHIP_OLAND:
1196                 radeon_program_register_sequence(rdev,
1197                                                  oland_golden_registers,
1198                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1199                 radeon_program_register_sequence(rdev,
1200                                                  oland_golden_rlc_registers,
1201                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1202                 radeon_program_register_sequence(rdev,
1203                                                  oland_mgcg_cgcg_init,
1204                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1205                 break;
1206         case CHIP_HAINAN:
1207                 radeon_program_register_sequence(rdev,
1208                                                  hainan_golden_registers,
1209                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1210                 radeon_program_register_sequence(rdev,
1211                                                  hainan_golden_registers2,
1212                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1213                 radeon_program_register_sequence(rdev,
1214                                                  hainan_mgcg_cgcg_init,
1215                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1216                 break;
1217         default:
1218                 break;
1219         }
1220 }
1221
1222 /**
1223  * si_get_allowed_info_register - fetch the register for the info ioctl
1224  *
1225  * @rdev: radeon_device pointer
1226  * @reg: register offset in bytes
1227  * @val: register value
1228  *
1229  * Returns 0 for success or -EINVAL for an invalid register
1230  *
1231  */
1232 int si_get_allowed_info_register(struct radeon_device *rdev,
1233                                  u32 reg, u32 *val)
1234 {
1235         switch (reg) {
1236         case GRBM_STATUS:
1237         case GRBM_STATUS2:
1238         case GRBM_STATUS_SE0:
1239         case GRBM_STATUS_SE1:
1240         case SRBM_STATUS:
1241         case SRBM_STATUS2:
1242         case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1243         case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1244         case UVD_STATUS:
1245                 *val = RREG32(reg);
1246                 return 0;
1247         default:
1248                 return -EINVAL;
1249         }
1250 }
1251
1252 #define PCIE_BUS_CLK                10000
1253 #define TCLK                        (PCIE_BUS_CLK / 10)
1254
1255 /**
1256  * si_get_xclk - get the xclk
1257  *
1258  * @rdev: radeon_device pointer
1259  *
1260  * Returns the reference clock used by the gfx engine
1261  * (SI).
1262  */
1263 u32 si_get_xclk(struct radeon_device *rdev)
1264 {
1265         u32 reference_clock = rdev->clock.spll.reference_freq;
1266         u32 tmp;
1267
1268         tmp = RREG32(CG_CLKPIN_CNTL_2);
1269         if (tmp & MUX_TCLK_TO_XCLK)
1270                 return TCLK;
1271
1272         tmp = RREG32(CG_CLKPIN_CNTL);
1273         if (tmp & XTALIN_DIVIDE)
1274                 return reference_clock / 4;
1275
1276         return reference_clock;
1277 }
1278
1279 /* get temperature in millidegrees */
1280 int si_get_temp(struct radeon_device *rdev)
1281 {
1282         u32 temp;
1283         int actual_temp = 0;
1284
1285         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1286                 CTF_TEMP_SHIFT;
1287
1288         if (temp & 0x200)
1289                 actual_temp = 255;
1290         else
1291                 actual_temp = temp & 0x1ff;
1292
1293         actual_temp = (actual_temp * 1000);
1294
1295         return actual_temp;
1296 }
1297
1298 #define TAHITI_IO_MC_REGS_SIZE 36
1299
1300 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1301         {0x0000006f, 0x03044000},
1302         {0x00000070, 0x0480c018},
1303         {0x00000071, 0x00000040},
1304         {0x00000072, 0x01000000},
1305         {0x00000074, 0x000000ff},
1306         {0x00000075, 0x00143400},
1307         {0x00000076, 0x08ec0800},
1308         {0x00000077, 0x040000cc},
1309         {0x00000079, 0x00000000},
1310         {0x0000007a, 0x21000409},
1311         {0x0000007c, 0x00000000},
1312         {0x0000007d, 0xe8000000},
1313         {0x0000007e, 0x044408a8},
1314         {0x0000007f, 0x00000003},
1315         {0x00000080, 0x00000000},
1316         {0x00000081, 0x01000000},
1317         {0x00000082, 0x02000000},
1318         {0x00000083, 0x00000000},
1319         {0x00000084, 0xe3f3e4f4},
1320         {0x00000085, 0x00052024},
1321         {0x00000087, 0x00000000},
1322         {0x00000088, 0x66036603},
1323         {0x00000089, 0x01000000},
1324         {0x0000008b, 0x1c0a0000},
1325         {0x0000008c, 0xff010000},
1326         {0x0000008e, 0xffffefff},
1327         {0x0000008f, 0xfff3efff},
1328         {0x00000090, 0xfff3efbf},
1329         {0x00000094, 0x00101101},
1330         {0x00000095, 0x00000fff},
1331         {0x00000096, 0x00116fff},
1332         {0x00000097, 0x60010000},
1333         {0x00000098, 0x10010000},
1334         {0x00000099, 0x00006000},
1335         {0x0000009a, 0x00001000},
1336         {0x0000009f, 0x00a77400}
1337 };
1338
1339 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1340         {0x0000006f, 0x03044000},
1341         {0x00000070, 0x0480c018},
1342         {0x00000071, 0x00000040},
1343         {0x00000072, 0x01000000},
1344         {0x00000074, 0x000000ff},
1345         {0x00000075, 0x00143400},
1346         {0x00000076, 0x08ec0800},
1347         {0x00000077, 0x040000cc},
1348         {0x00000079, 0x00000000},
1349         {0x0000007a, 0x21000409},
1350         {0x0000007c, 0x00000000},
1351         {0x0000007d, 0xe8000000},
1352         {0x0000007e, 0x044408a8},
1353         {0x0000007f, 0x00000003},
1354         {0x00000080, 0x00000000},
1355         {0x00000081, 0x01000000},
1356         {0x00000082, 0x02000000},
1357         {0x00000083, 0x00000000},
1358         {0x00000084, 0xe3f3e4f4},
1359         {0x00000085, 0x00052024},
1360         {0x00000087, 0x00000000},
1361         {0x00000088, 0x66036603},
1362         {0x00000089, 0x01000000},
1363         {0x0000008b, 0x1c0a0000},
1364         {0x0000008c, 0xff010000},
1365         {0x0000008e, 0xffffefff},
1366         {0x0000008f, 0xfff3efff},
1367         {0x00000090, 0xfff3efbf},
1368         {0x00000094, 0x00101101},
1369         {0x00000095, 0x00000fff},
1370         {0x00000096, 0x00116fff},
1371         {0x00000097, 0x60010000},
1372         {0x00000098, 0x10010000},
1373         {0x00000099, 0x00006000},
1374         {0x0000009a, 0x00001000},
1375         {0x0000009f, 0x00a47400}
1376 };
1377
1378 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1379         {0x0000006f, 0x03044000},
1380         {0x00000070, 0x0480c018},
1381         {0x00000071, 0x00000040},
1382         {0x00000072, 0x01000000},
1383         {0x00000074, 0x000000ff},
1384         {0x00000075, 0x00143400},
1385         {0x00000076, 0x08ec0800},
1386         {0x00000077, 0x040000cc},
1387         {0x00000079, 0x00000000},
1388         {0x0000007a, 0x21000409},
1389         {0x0000007c, 0x00000000},
1390         {0x0000007d, 0xe8000000},
1391         {0x0000007e, 0x044408a8},
1392         {0x0000007f, 0x00000003},
1393         {0x00000080, 0x00000000},
1394         {0x00000081, 0x01000000},
1395         {0x00000082, 0x02000000},
1396         {0x00000083, 0x00000000},
1397         {0x00000084, 0xe3f3e4f4},
1398         {0x00000085, 0x00052024},
1399         {0x00000087, 0x00000000},
1400         {0x00000088, 0x66036603},
1401         {0x00000089, 0x01000000},
1402         {0x0000008b, 0x1c0a0000},
1403         {0x0000008c, 0xff010000},
1404         {0x0000008e, 0xffffefff},
1405         {0x0000008f, 0xfff3efff},
1406         {0x00000090, 0xfff3efbf},
1407         {0x00000094, 0x00101101},
1408         {0x00000095, 0x00000fff},
1409         {0x00000096, 0x00116fff},
1410         {0x00000097, 0x60010000},
1411         {0x00000098, 0x10010000},
1412         {0x00000099, 0x00006000},
1413         {0x0000009a, 0x00001000},
1414         {0x0000009f, 0x00a37400}
1415 };
1416
1417 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1418         {0x0000006f, 0x03044000},
1419         {0x00000070, 0x0480c018},
1420         {0x00000071, 0x00000040},
1421         {0x00000072, 0x01000000},
1422         {0x00000074, 0x000000ff},
1423         {0x00000075, 0x00143400},
1424         {0x00000076, 0x08ec0800},
1425         {0x00000077, 0x040000cc},
1426         {0x00000079, 0x00000000},
1427         {0x0000007a, 0x21000409},
1428         {0x0000007c, 0x00000000},
1429         {0x0000007d, 0xe8000000},
1430         {0x0000007e, 0x044408a8},
1431         {0x0000007f, 0x00000003},
1432         {0x00000080, 0x00000000},
1433         {0x00000081, 0x01000000},
1434         {0x00000082, 0x02000000},
1435         {0x00000083, 0x00000000},
1436         {0x00000084, 0xe3f3e4f4},
1437         {0x00000085, 0x00052024},
1438         {0x00000087, 0x00000000},
1439         {0x00000088, 0x66036603},
1440         {0x00000089, 0x01000000},
1441         {0x0000008b, 0x1c0a0000},
1442         {0x0000008c, 0xff010000},
1443         {0x0000008e, 0xffffefff},
1444         {0x0000008f, 0xfff3efff},
1445         {0x00000090, 0xfff3efbf},
1446         {0x00000094, 0x00101101},
1447         {0x00000095, 0x00000fff},
1448         {0x00000096, 0x00116fff},
1449         {0x00000097, 0x60010000},
1450         {0x00000098, 0x10010000},
1451         {0x00000099, 0x00006000},
1452         {0x0000009a, 0x00001000},
1453         {0x0000009f, 0x00a17730}
1454 };
1455
1456 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1457         {0x0000006f, 0x03044000},
1458         {0x00000070, 0x0480c018},
1459         {0x00000071, 0x00000040},
1460         {0x00000072, 0x01000000},
1461         {0x00000074, 0x000000ff},
1462         {0x00000075, 0x00143400},
1463         {0x00000076, 0x08ec0800},
1464         {0x00000077, 0x040000cc},
1465         {0x00000079, 0x00000000},
1466         {0x0000007a, 0x21000409},
1467         {0x0000007c, 0x00000000},
1468         {0x0000007d, 0xe8000000},
1469         {0x0000007e, 0x044408a8},
1470         {0x0000007f, 0x00000003},
1471         {0x00000080, 0x00000000},
1472         {0x00000081, 0x01000000},
1473         {0x00000082, 0x02000000},
1474         {0x00000083, 0x00000000},
1475         {0x00000084, 0xe3f3e4f4},
1476         {0x00000085, 0x00052024},
1477         {0x00000087, 0x00000000},
1478         {0x00000088, 0x66036603},
1479         {0x00000089, 0x01000000},
1480         {0x0000008b, 0x1c0a0000},
1481         {0x0000008c, 0xff010000},
1482         {0x0000008e, 0xffffefff},
1483         {0x0000008f, 0xfff3efff},
1484         {0x00000090, 0xfff3efbf},
1485         {0x00000094, 0x00101101},
1486         {0x00000095, 0x00000fff},
1487         {0x00000096, 0x00116fff},
1488         {0x00000097, 0x60010000},
1489         {0x00000098, 0x10010000},
1490         {0x00000099, 0x00006000},
1491         {0x0000009a, 0x00001000},
1492         {0x0000009f, 0x00a07730}
1493 };
1494
1495 /* ucode loading */
1496 int si_mc_load_microcode(struct radeon_device *rdev)
1497 {
1498         const __be32 *fw_data = NULL;
1499         const __le32 *new_fw_data = NULL;
1500         u32 running;
1501         u32 *io_mc_regs = NULL;
1502         const __le32 *new_io_mc_regs = NULL;
1503         int i, regs_size, ucode_size;
1504
1505         if (!rdev->mc_fw)
1506                 return -EINVAL;
1507
1508         if (rdev->new_fw) {
1509                 const struct mc_firmware_header_v1_0 *hdr =
1510                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1511
1512                 radeon_ucode_print_mc_hdr(&hdr->header);
1513                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1514                 new_io_mc_regs = (const __le32 *)
1515                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1516                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1517                 new_fw_data = (const __le32 *)
1518                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1519         } else {
1520                 ucode_size = rdev->mc_fw->size / 4;
1521
1522                 switch (rdev->family) {
1523                 case CHIP_TAHITI:
1524                         io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1525                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1526                         break;
1527                 case CHIP_PITCAIRN:
1528                         io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1529                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1530                         break;
1531                 case CHIP_VERDE:
1532                 default:
1533                         io_mc_regs = (u32 *)&verde_io_mc_regs;
1534                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1535                         break;
1536                 case CHIP_OLAND:
1537                         io_mc_regs = (u32 *)&oland_io_mc_regs;
1538                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1539                         break;
1540                 case CHIP_HAINAN:
1541                         io_mc_regs = (u32 *)&hainan_io_mc_regs;
1542                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1543                         break;
1544                 }
1545                 fw_data = (const __be32 *)rdev->mc_fw->data;
1546         }
1547
1548         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1549
1550         if (running == 0) {
1551                 /* reset the engine and set to writable */
1552                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1553                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1554
1555                 /* load mc io regs */
1556                 for (i = 0; i < regs_size; i++) {
1557                         if (rdev->new_fw) {
1558                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1559                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1560                         } else {
1561                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1562                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1563                         }
1564                 }
1565                 /* load the MC ucode */
1566                 for (i = 0; i < ucode_size; i++) {
1567                         if (rdev->new_fw)
1568                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1569                         else
1570                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1571                 }
1572
1573                 /* put the engine back into the active state */
1574                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1575                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1576                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1577
1578                 /* wait for training to complete */
1579                 for (i = 0; i < rdev->usec_timeout; i++) {
1580                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1581                                 break;
1582                         udelay(1);
1583                 }
1584                 for (i = 0; i < rdev->usec_timeout; i++) {
1585                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1586                                 break;
1587                         udelay(1);
1588                 }
1589         }
1590
1591         return 0;
1592 }
1593
1594 static int si_init_microcode(struct radeon_device *rdev)
1595 {
1596         const char *chip_name;
1597         const char *new_chip_name;
1598         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1599         size_t smc_req_size, mc2_req_size;
1600         char fw_name[30];
1601         int err;
1602         int new_fw = 0;
1603         bool new_smc = false;
1604         bool si58_fw = false;
1605         bool banks2_fw = false;
1606
1607         DRM_DEBUG("\n");
1608
1609         switch (rdev->family) {
1610         case CHIP_TAHITI:
1611                 chip_name = "TAHITI";
1612                 new_chip_name = "tahiti";
1613                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1614                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1615                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1616                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1617                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1618                 mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1619                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1620                 break;
1621         case CHIP_PITCAIRN:
1622                 chip_name = "PITCAIRN";
1623                 if ((rdev->pdev->revision == 0x81) &&
1624                     ((rdev->pdev->device == 0x6810) ||
1625                      (rdev->pdev->device == 0x6811)))
1626                         new_smc = true;
1627                 new_chip_name = "pitcairn";
1628                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1629                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1630                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1631                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1632                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1633                 mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1634                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1635                 break;
1636         case CHIP_VERDE:
1637                 chip_name = "VERDE";
1638                 if (((rdev->pdev->device == 0x6820) &&
1639                      ((rdev->pdev->revision == 0x81) ||
1640                       (rdev->pdev->revision == 0x83))) ||
1641                     ((rdev->pdev->device == 0x6821) &&
1642                      ((rdev->pdev->revision == 0x83) ||
1643                       (rdev->pdev->revision == 0x87))) ||
1644                     ((rdev->pdev->revision == 0x87) &&
1645                      ((rdev->pdev->device == 0x6823) ||
1646                       (rdev->pdev->device == 0x682b))))
1647                         new_smc = true;
1648                 new_chip_name = "verde";
1649                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1650                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1651                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1652                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1653                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1654                 mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1655                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1656                 break;
1657         case CHIP_OLAND:
1658                 chip_name = "OLAND";
1659                 if (((rdev->pdev->revision == 0x81) &&
1660                      ((rdev->pdev->device == 0x6600) ||
1661                       (rdev->pdev->device == 0x6604) ||
1662                       (rdev->pdev->device == 0x6605) ||
1663                       (rdev->pdev->device == 0x6610))) ||
1664                     ((rdev->pdev->revision == 0x83) &&
1665                      (rdev->pdev->device == 0x6610)))
1666                         new_smc = true;
1667                 new_chip_name = "oland";
1668                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1669                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1670                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1671                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1672                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1673                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1674                 break;
1675         case CHIP_HAINAN:
1676                 chip_name = "HAINAN";
1677                 if (((rdev->pdev->revision == 0x81) &&
1678                      (rdev->pdev->device == 0x6660)) ||
1679                     ((rdev->pdev->revision == 0x83) &&
1680                      ((rdev->pdev->device == 0x6660) ||
1681                       (rdev->pdev->device == 0x6663) ||
1682                       (rdev->pdev->device == 0x6665) ||
1683                       (rdev->pdev->device == 0x6667))))
1684                         new_smc = true;
1685                 else if ((rdev->pdev->revision == 0xc3) &&
1686                          (rdev->pdev->device == 0x6665))
1687                         banks2_fw = true;
1688                 new_chip_name = "hainan";
1689                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1690                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1691                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1692                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1693                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1694                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1695                 break;
1696         default: BUG();
1697         }
1698
1699         /* this memory configuration requires special firmware */
1700         if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1701                 si58_fw = true;
1702
1703         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1704
1705         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1706         err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1707         if (err) {
1708                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1709                 err = reject_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1710                 if (err)
1711                         goto out;
1712                 if (rdev->pfp_fw->size != pfp_req_size) {
1713                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1714                                rdev->pfp_fw->size, fw_name);
1715                         err = -EINVAL;
1716                         goto out;
1717                 }
1718         } else {
1719                 err = radeon_ucode_validate(rdev->pfp_fw);
1720                 if (err) {
1721                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1722                                fw_name);
1723                         goto out;
1724                 } else {
1725                         new_fw++;
1726                 }
1727         }
1728
1729         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1730         err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
1731         if (err) {
1732                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1733                 err = reject_firmware(&rdev->me_fw, fw_name, rdev->dev);
1734                 if (err)
1735                         goto out;
1736                 if (rdev->me_fw->size != me_req_size) {
1737                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1738                                rdev->me_fw->size, fw_name);
1739                         err = -EINVAL;
1740                 }
1741         } else {
1742                 err = radeon_ucode_validate(rdev->me_fw);
1743                 if (err) {
1744                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1745                                fw_name);
1746                         goto out;
1747                 } else {
1748                         new_fw++;
1749                 }
1750         }
1751
1752         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1753         err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1754         if (err) {
1755                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1756                 err = reject_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1757                 if (err)
1758                         goto out;
1759                 if (rdev->ce_fw->size != ce_req_size) {
1760                         pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1761                                rdev->ce_fw->size, fw_name);
1762                         err = -EINVAL;
1763                 }
1764         } else {
1765                 err = radeon_ucode_validate(rdev->ce_fw);
1766                 if (err) {
1767                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1768                                fw_name);
1769                         goto out;
1770                 } else {
1771                         new_fw++;
1772                 }
1773         }
1774
1775         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1776         err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1777         if (err) {
1778                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1779                 err = reject_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1780                 if (err)
1781                         goto out;
1782                 if (rdev->rlc_fw->size != rlc_req_size) {
1783                         pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1784                                rdev->rlc_fw->size, fw_name);
1785                         err = -EINVAL;
1786                 }
1787         } else {
1788                 err = radeon_ucode_validate(rdev->rlc_fw);
1789                 if (err) {
1790                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1791                                fw_name);
1792                         goto out;
1793                 } else {
1794                         new_fw++;
1795                 }
1796         }
1797
1798         if (si58_fw)
1799                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/");
1800         else
1801                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1802         err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1803         if (err) {
1804                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1805                 err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1806                 if (err) {
1807                         snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1808                         err = reject_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1809                         if (err)
1810                                 goto out;
1811                 }
1812                 if ((rdev->mc_fw->size != mc_req_size) &&
1813                     (rdev->mc_fw->size != mc2_req_size)) {
1814                         pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1815                                rdev->mc_fw->size, fw_name);
1816                         err = -EINVAL;
1817                 }
1818                 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1819         } else {
1820                 err = radeon_ucode_validate(rdev->mc_fw);
1821                 if (err) {
1822                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1823                                fw_name);
1824                         goto out;
1825                 } else {
1826                         new_fw++;
1827                 }
1828         }
1829
1830         if (banks2_fw)
1831                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/");
1832         else if (new_smc)
1833                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1834         else
1835                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", new_chip_name);
1836         err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1837         if (err) {
1838                 snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name);
1839                 err = reject_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1840                 if (err) {
1841                         pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1842                         release_firmware(rdev->smc_fw);
1843                         rdev->smc_fw = NULL;
1844                         err = 0;
1845                 } else if (rdev->smc_fw->size != smc_req_size) {
1846                         pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1847                                rdev->smc_fw->size, fw_name);
1848                         err = -EINVAL;
1849                 }
1850         } else {
1851                 err = radeon_ucode_validate(rdev->smc_fw);
1852                 if (err) {
1853                         pr_err("si_cp: validation failed for firmware \"%s\"\n",
1854                                fw_name);
1855                         goto out;
1856                 } else {
1857                         new_fw++;
1858                 }
1859         }
1860
1861         if (new_fw == 0) {
1862                 rdev->new_fw = false;
1863         } else if (new_fw < 6) {
1864                 pr_err("si_fw: mixing new and old firmware!\n");
1865                 err = -EINVAL;
1866         } else {
1867                 rdev->new_fw = true;
1868         }
1869 out:
1870         if (err) {
1871                 if (err != -EINVAL)
1872                         pr_err("si_cp: Failed to load firmware \"%s\"\n",
1873                                fw_name);
1874                 release_firmware(rdev->pfp_fw);
1875                 rdev->pfp_fw = NULL;
1876                 release_firmware(rdev->me_fw);
1877                 rdev->me_fw = NULL;
1878                 release_firmware(rdev->ce_fw);
1879                 rdev->ce_fw = NULL;
1880                 release_firmware(rdev->rlc_fw);
1881                 rdev->rlc_fw = NULL;
1882                 release_firmware(rdev->mc_fw);
1883                 rdev->mc_fw = NULL;
1884                 release_firmware(rdev->smc_fw);
1885                 rdev->smc_fw = NULL;
1886         }
1887         return err;
1888 }
1889
1890 /* watermark setup */
1891 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1892                                    struct radeon_crtc *radeon_crtc,
1893                                    struct drm_display_mode *mode,
1894                                    struct drm_display_mode *other_mode)
1895 {
1896         u32 tmp, buffer_alloc, i;
1897         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1898         /*
1899          * Line Buffer Setup
1900          * There are 3 line buffers, each one shared by 2 display controllers.
1901          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1902          * the display controllers.  The paritioning is done via one of four
1903          * preset allocations specified in bits 21:20:
1904          *  0 - half lb
1905          *  2 - whole lb, other crtc must be disabled
1906          */
1907         /* this can get tricky if we have two large displays on a paired group
1908          * of crtcs.  Ideally for multiple large displays we'd assign them to
1909          * non-linked crtcs for maximum line buffer allocation.
1910          */
1911         if (radeon_crtc->base.enabled && mode) {
1912                 if (other_mode) {
1913                         tmp = 0; /* 1/2 */
1914                         buffer_alloc = 1;
1915                 } else {
1916                         tmp = 2; /* whole */
1917                         buffer_alloc = 2;
1918                 }
1919         } else {
1920                 tmp = 0;
1921                 buffer_alloc = 0;
1922         }
1923
1924         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1925                DC_LB_MEMORY_CONFIG(tmp));
1926
1927         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1928                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1929         for (i = 0; i < rdev->usec_timeout; i++) {
1930                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1931                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1932                         break;
1933                 udelay(1);
1934         }
1935
1936         if (radeon_crtc->base.enabled && mode) {
1937                 switch (tmp) {
1938                 case 0:
1939                 default:
1940                         return 4096 * 2;
1941                 case 2:
1942                         return 8192 * 2;
1943                 }
1944         }
1945
1946         /* controller not enabled, so no lb used */
1947         return 0;
1948 }
1949
1950 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1951 {
1952         u32 tmp = RREG32(MC_SHARED_CHMAP);
1953
1954         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1955         case 0:
1956         default:
1957                 return 1;
1958         case 1:
1959                 return 2;
1960         case 2:
1961                 return 4;
1962         case 3:
1963                 return 8;
1964         case 4:
1965                 return 3;
1966         case 5:
1967                 return 6;
1968         case 6:
1969                 return 10;
1970         case 7:
1971                 return 12;
1972         case 8:
1973                 return 16;
1974         }
1975 }
1976
1977 struct dce6_wm_params {
1978         u32 dram_channels; /* number of dram channels */
1979         u32 yclk;          /* bandwidth per dram data pin in kHz */
1980         u32 sclk;          /* engine clock in kHz */
1981         u32 disp_clk;      /* display clock in kHz */
1982         u32 src_width;     /* viewport width */
1983         u32 active_time;   /* active display time in ns */
1984         u32 blank_time;    /* blank time in ns */
1985         bool interlaced;    /* mode is interlaced */
1986         fixed20_12 vsc;    /* vertical scale ratio */
1987         u32 num_heads;     /* number of active crtcs */
1988         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1989         u32 lb_size;       /* line buffer allocated to pipe */
1990         u32 vtaps;         /* vertical scaler taps */
1991 };
1992
1993 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1994 {
1995         /* Calculate raw DRAM Bandwidth */
1996         fixed20_12 dram_efficiency; /* 0.7 */
1997         fixed20_12 yclk, dram_channels, bandwidth;
1998         fixed20_12 a;
1999
2000         a.full = dfixed_const(1000);
2001         yclk.full = dfixed_const(wm->yclk);
2002         yclk.full = dfixed_div(yclk, a);
2003         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2004         a.full = dfixed_const(10);
2005         dram_efficiency.full = dfixed_const(7);
2006         dram_efficiency.full = dfixed_div(dram_efficiency, a);
2007         bandwidth.full = dfixed_mul(dram_channels, yclk);
2008         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2009
2010         return dfixed_trunc(bandwidth);
2011 }
2012
2013 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2014 {
2015         /* Calculate DRAM Bandwidth and the part allocated to display. */
2016         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2017         fixed20_12 yclk, dram_channels, bandwidth;
2018         fixed20_12 a;
2019
2020         a.full = dfixed_const(1000);
2021         yclk.full = dfixed_const(wm->yclk);
2022         yclk.full = dfixed_div(yclk, a);
2023         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2024         a.full = dfixed_const(10);
2025         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2026         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2027         bandwidth.full = dfixed_mul(dram_channels, yclk);
2028         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2029
2030         return dfixed_trunc(bandwidth);
2031 }
2032
2033 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2034 {
2035         /* Calculate the display Data return Bandwidth */
2036         fixed20_12 return_efficiency; /* 0.8 */
2037         fixed20_12 sclk, bandwidth;
2038         fixed20_12 a;
2039
2040         a.full = dfixed_const(1000);
2041         sclk.full = dfixed_const(wm->sclk);
2042         sclk.full = dfixed_div(sclk, a);
2043         a.full = dfixed_const(10);
2044         return_efficiency.full = dfixed_const(8);
2045         return_efficiency.full = dfixed_div(return_efficiency, a);
2046         a.full = dfixed_const(32);
2047         bandwidth.full = dfixed_mul(a, sclk);
2048         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2049
2050         return dfixed_trunc(bandwidth);
2051 }
2052
2053 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2054 {
2055         return 32;
2056 }
2057
2058 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2059 {
2060         /* Calculate the DMIF Request Bandwidth */
2061         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2062         fixed20_12 disp_clk, sclk, bandwidth;
2063         fixed20_12 a, b1, b2;
2064         u32 min_bandwidth;
2065
2066         a.full = dfixed_const(1000);
2067         disp_clk.full = dfixed_const(wm->disp_clk);
2068         disp_clk.full = dfixed_div(disp_clk, a);
2069         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2070         b1.full = dfixed_mul(a, disp_clk);
2071
2072         a.full = dfixed_const(1000);
2073         sclk.full = dfixed_const(wm->sclk);
2074         sclk.full = dfixed_div(sclk, a);
2075         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2076         b2.full = dfixed_mul(a, sclk);
2077
2078         a.full = dfixed_const(10);
2079         disp_clk_request_efficiency.full = dfixed_const(8);
2080         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2081
2082         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2083
2084         a.full = dfixed_const(min_bandwidth);
2085         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2086
2087         return dfixed_trunc(bandwidth);
2088 }
2089
2090 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2091 {
2092         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2093         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2094         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2095         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2096
2097         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2098 }
2099
2100 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2101 {
2102         /* Calculate the display mode Average Bandwidth
2103          * DisplayMode should contain the source and destination dimensions,
2104          * timing, etc.
2105          */
2106         fixed20_12 bpp;
2107         fixed20_12 line_time;
2108         fixed20_12 src_width;
2109         fixed20_12 bandwidth;
2110         fixed20_12 a;
2111
2112         a.full = dfixed_const(1000);
2113         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2114         line_time.full = dfixed_div(line_time, a);
2115         bpp.full = dfixed_const(wm->bytes_per_pixel);
2116         src_width.full = dfixed_const(wm->src_width);
2117         bandwidth.full = dfixed_mul(src_width, bpp);
2118         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2119         bandwidth.full = dfixed_div(bandwidth, line_time);
2120
2121         return dfixed_trunc(bandwidth);
2122 }
2123
2124 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2125 {
2126         /* First calcualte the latency in ns */
2127         u32 mc_latency = 2000; /* 2000 ns. */
2128         u32 available_bandwidth = dce6_available_bandwidth(wm);
2129         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2130         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2131         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2132         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2133                 (wm->num_heads * cursor_line_pair_return_time);
2134         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2135         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2136         u32 tmp, dmif_size = 12288;
2137         fixed20_12 a, b, c;
2138
2139         if (wm->num_heads == 0)
2140                 return 0;
2141
2142         a.full = dfixed_const(2);
2143         b.full = dfixed_const(1);
2144         if ((wm->vsc.full > a.full) ||
2145             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2146             (wm->vtaps >= 5) ||
2147             ((wm->vsc.full >= a.full) && wm->interlaced))
2148                 max_src_lines_per_dst_line = 4;
2149         else
2150                 max_src_lines_per_dst_line = 2;
2151
2152         a.full = dfixed_const(available_bandwidth);
2153         b.full = dfixed_const(wm->num_heads);
2154         a.full = dfixed_div(a, b);
2155         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2156         tmp = min(dfixed_trunc(a), tmp);
2157
2158         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2159
2160         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2161         b.full = dfixed_const(1000);
2162         c.full = dfixed_const(lb_fill_bw);
2163         b.full = dfixed_div(c, b);
2164         a.full = dfixed_div(a, b);
2165         line_fill_time = dfixed_trunc(a);
2166
2167         if (line_fill_time < wm->active_time)
2168                 return latency;
2169         else
2170                 return latency + (line_fill_time - wm->active_time);
2171
2172 }
2173
2174 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2175 {
2176         if (dce6_average_bandwidth(wm) <=
2177             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2178                 return true;
2179         else
2180                 return false;
2181 };
2182
2183 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2184 {
2185         if (dce6_average_bandwidth(wm) <=
2186             (dce6_available_bandwidth(wm) / wm->num_heads))
2187                 return true;
2188         else
2189                 return false;
2190 };
2191
2192 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2193 {
2194         u32 lb_partitions = wm->lb_size / wm->src_width;
2195         u32 line_time = wm->active_time + wm->blank_time;
2196         u32 latency_tolerant_lines;
2197         u32 latency_hiding;
2198         fixed20_12 a;
2199
2200         a.full = dfixed_const(1);
2201         if (wm->vsc.full > a.full)
2202                 latency_tolerant_lines = 1;
2203         else {
2204                 if (lb_partitions <= (wm->vtaps + 1))
2205                         latency_tolerant_lines = 1;
2206                 else
2207                         latency_tolerant_lines = 2;
2208         }
2209
2210         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2211
2212         if (dce6_latency_watermark(wm) <= latency_hiding)
2213                 return true;
2214         else
2215                 return false;
2216 }
2217
2218 static void dce6_program_watermarks(struct radeon_device *rdev,
2219                                          struct radeon_crtc *radeon_crtc,
2220                                          u32 lb_size, u32 num_heads)
2221 {
2222         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2223         struct dce6_wm_params wm_low, wm_high;
2224         u32 dram_channels;
2225         u32 active_time;
2226         u32 line_time = 0;
2227         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2228         u32 priority_a_mark = 0, priority_b_mark = 0;
2229         u32 priority_a_cnt = PRIORITY_OFF;
2230         u32 priority_b_cnt = PRIORITY_OFF;
2231         u32 tmp, arb_control3;
2232         fixed20_12 a, b, c;
2233
2234         if (radeon_crtc->base.enabled && num_heads && mode) {
2235                 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2236                                             (u32)mode->clock);
2237                 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2238                                           (u32)mode->clock);
2239                 line_time = min(line_time, (u32)65535);
2240                 priority_a_cnt = 0;
2241                 priority_b_cnt = 0;
2242
2243                 if (rdev->family == CHIP_ARUBA)
2244                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2245                 else
2246                         dram_channels = si_get_number_of_dram_channels(rdev);
2247
2248                 /* watermark for high clocks */
2249                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2250                         wm_high.yclk =
2251                                 radeon_dpm_get_mclk(rdev, false) * 10;
2252                         wm_high.sclk =
2253                                 radeon_dpm_get_sclk(rdev, false) * 10;
2254                 } else {
2255                         wm_high.yclk = rdev->pm.current_mclk * 10;
2256                         wm_high.sclk = rdev->pm.current_sclk * 10;
2257                 }
2258
2259                 wm_high.disp_clk = mode->clock;
2260                 wm_high.src_width = mode->crtc_hdisplay;
2261                 wm_high.active_time = active_time;
2262                 wm_high.blank_time = line_time - wm_high.active_time;
2263                 wm_high.interlaced = false;
2264                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2265                         wm_high.interlaced = true;
2266                 wm_high.vsc = radeon_crtc->vsc;
2267                 wm_high.vtaps = 1;
2268                 if (radeon_crtc->rmx_type != RMX_OFF)
2269                         wm_high.vtaps = 2;
2270                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2271                 wm_high.lb_size = lb_size;
2272                 wm_high.dram_channels = dram_channels;
2273                 wm_high.num_heads = num_heads;
2274
2275                 /* watermark for low clocks */
2276                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2277                         wm_low.yclk =
2278                                 radeon_dpm_get_mclk(rdev, true) * 10;
2279                         wm_low.sclk =
2280                                 radeon_dpm_get_sclk(rdev, true) * 10;
2281                 } else {
2282                         wm_low.yclk = rdev->pm.current_mclk * 10;
2283                         wm_low.sclk = rdev->pm.current_sclk * 10;
2284                 }
2285
2286                 wm_low.disp_clk = mode->clock;
2287                 wm_low.src_width = mode->crtc_hdisplay;
2288                 wm_low.active_time = active_time;
2289                 wm_low.blank_time = line_time - wm_low.active_time;
2290                 wm_low.interlaced = false;
2291                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2292                         wm_low.interlaced = true;
2293                 wm_low.vsc = radeon_crtc->vsc;
2294                 wm_low.vtaps = 1;
2295                 if (radeon_crtc->rmx_type != RMX_OFF)
2296                         wm_low.vtaps = 2;
2297                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2298                 wm_low.lb_size = lb_size;
2299                 wm_low.dram_channels = dram_channels;
2300                 wm_low.num_heads = num_heads;
2301
2302                 /* set for high clocks */
2303                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2304                 /* set for low clocks */
2305                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2306
2307                 /* possibly force display priority to high */
2308                 /* should really do this at mode validation time... */
2309                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2310                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2311                     !dce6_check_latency_hiding(&wm_high) ||
2312                     (rdev->disp_priority == 2)) {
2313                         DRM_DEBUG_KMS("force priority to high\n");
2314                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2315                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2316                 }
2317                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2318                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2319                     !dce6_check_latency_hiding(&wm_low) ||
2320                     (rdev->disp_priority == 2)) {
2321                         DRM_DEBUG_KMS("force priority to high\n");
2322                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2323                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2324                 }
2325
2326                 a.full = dfixed_const(1000);
2327                 b.full = dfixed_const(mode->clock);
2328                 b.full = dfixed_div(b, a);
2329                 c.full = dfixed_const(latency_watermark_a);
2330                 c.full = dfixed_mul(c, b);
2331                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2332                 c.full = dfixed_div(c, a);
2333                 a.full = dfixed_const(16);
2334                 c.full = dfixed_div(c, a);
2335                 priority_a_mark = dfixed_trunc(c);
2336                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2337
2338                 a.full = dfixed_const(1000);
2339                 b.full = dfixed_const(mode->clock);
2340                 b.full = dfixed_div(b, a);
2341                 c.full = dfixed_const(latency_watermark_b);
2342                 c.full = dfixed_mul(c, b);
2343                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2344                 c.full = dfixed_div(c, a);
2345                 a.full = dfixed_const(16);
2346                 c.full = dfixed_div(c, a);
2347                 priority_b_mark = dfixed_trunc(c);
2348                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2349
2350                 /* Save number of lines the linebuffer leads before the scanout */
2351                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2352         }
2353
2354         /* select wm A */
2355         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2356         tmp = arb_control3;
2357         tmp &= ~LATENCY_WATERMARK_MASK(3);
2358         tmp |= LATENCY_WATERMARK_MASK(1);
2359         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2360         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2361                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2362                 LATENCY_HIGH_WATERMARK(line_time)));
2363         /* select wm B */
2364         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2365         tmp &= ~LATENCY_WATERMARK_MASK(3);
2366         tmp |= LATENCY_WATERMARK_MASK(2);
2367         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2368         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2369                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2370                 LATENCY_HIGH_WATERMARK(line_time)));
2371         /* restore original selection */
2372         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2373
2374         /* write the priority marks */
2375         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2376         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2377
2378         /* save values for DPM */
2379         radeon_crtc->line_time = line_time;
2380         radeon_crtc->wm_high = latency_watermark_a;
2381         radeon_crtc->wm_low = latency_watermark_b;
2382 }
2383
2384 void dce6_bandwidth_update(struct radeon_device *rdev)
2385 {
2386         struct drm_display_mode *mode0 = NULL;
2387         struct drm_display_mode *mode1 = NULL;
2388         u32 num_heads = 0, lb_size;
2389         int i;
2390
2391         if (!rdev->mode_info.mode_config_initialized)
2392                 return;
2393
2394         radeon_update_display_priority(rdev);
2395
2396         for (i = 0; i < rdev->num_crtc; i++) {
2397                 if (rdev->mode_info.crtcs[i]->base.enabled)
2398                         num_heads++;
2399         }
2400         for (i = 0; i < rdev->num_crtc; i += 2) {
2401                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2402                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2403                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2404                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2405                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2406                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2407         }
2408 }
2409
2410 /*
2411  * Core functions
2412  */
2413 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2414 {
2415         u32 *tile = rdev->config.si.tile_mode_array;
2416         const u32 num_tile_mode_states =
2417                         ARRAY_SIZE(rdev->config.si.tile_mode_array);
2418         u32 reg_offset, split_equal_to_row_size;
2419
2420         switch (rdev->config.si.mem_row_size_in_kb) {
2421         case 1:
2422                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2423                 break;
2424         case 2:
2425         default:
2426                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2427                 break;
2428         case 4:
2429                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2430                 break;
2431         }
2432
2433         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2434                 tile[reg_offset] = 0;
2435
2436         switch(rdev->family) {
2437         case CHIP_TAHITI:
2438         case CHIP_PITCAIRN:
2439                 /* non-AA compressed depth or any compressed stencil */
2440                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2441                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2442                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2443                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2444                            NUM_BANKS(ADDR_SURF_16_BANK) |
2445                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2448                 /* 2xAA/4xAA compressed depth only */
2449                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2451                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2452                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2453                            NUM_BANKS(ADDR_SURF_16_BANK) |
2454                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2456                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2457                 /* 8xAA compressed depth only */
2458                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2459                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2460                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2461                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2462                            NUM_BANKS(ADDR_SURF_16_BANK) |
2463                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2464                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2465                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2466                 /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2467                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2468                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2469                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2470                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2471                            NUM_BANKS(ADDR_SURF_16_BANK) |
2472                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2474                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2475                 /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2476                 tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2477                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2478                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2479                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2480                            NUM_BANKS(ADDR_SURF_16_BANK) |
2481                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2483                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2484                 /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2485                 tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2486                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2487                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2488                            TILE_SPLIT(split_equal_to_row_size) |
2489                            NUM_BANKS(ADDR_SURF_16_BANK) |
2490                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2492                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2493                 /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2494                 tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2495                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2496                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2497                            TILE_SPLIT(split_equal_to_row_size) |
2498                            NUM_BANKS(ADDR_SURF_16_BANK) |
2499                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2501                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2502                 /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2503                 tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2505                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2506                            TILE_SPLIT(split_equal_to_row_size) |
2507                            NUM_BANKS(ADDR_SURF_16_BANK) |
2508                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2510                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2511                 /* 1D and 1D Array Surfaces */
2512                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2513                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2514                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2515                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2516                            NUM_BANKS(ADDR_SURF_16_BANK) |
2517                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2519                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2520                 /* Displayable maps. */
2521                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2522                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2523                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2524                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2525                            NUM_BANKS(ADDR_SURF_16_BANK) |
2526                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2529                 /* Display 8bpp. */
2530                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2531                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2532                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2533                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2534                            NUM_BANKS(ADDR_SURF_16_BANK) |
2535                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2537                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2538                 /* Display 16bpp. */
2539                 tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2540                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2541                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2542                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2543                            NUM_BANKS(ADDR_SURF_16_BANK) |
2544                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2546                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2547                 /* Display 32bpp. */
2548                 tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2550                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2551                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2552                            NUM_BANKS(ADDR_SURF_16_BANK) |
2553                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2555                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2556                 /* Thin. */
2557                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2559                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2560                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2561                            NUM_BANKS(ADDR_SURF_16_BANK) |
2562                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2563                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2564                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2565                 /* Thin 8 bpp. */
2566                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2568                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2569                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2570                            NUM_BANKS(ADDR_SURF_16_BANK) |
2571                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2572                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2573                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2574                 /* Thin 16 bpp. */
2575                 tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2577                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2578                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2579                            NUM_BANKS(ADDR_SURF_16_BANK) |
2580                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2582                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2583                 /* Thin 32 bpp. */
2584                 tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2586                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2587                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2588                            NUM_BANKS(ADDR_SURF_16_BANK) |
2589                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2591                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2592                 /* Thin 64 bpp. */
2593                 tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2595                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2596                            TILE_SPLIT(split_equal_to_row_size) |
2597                            NUM_BANKS(ADDR_SURF_16_BANK) |
2598                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2600                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2601                 /* 8 bpp PRT. */
2602                 tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2603                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2604                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2605                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2606                            NUM_BANKS(ADDR_SURF_16_BANK) |
2607                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2608                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2609                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2610                 /* 16 bpp PRT */
2611                 tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2612                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2613                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2614                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2615                            NUM_BANKS(ADDR_SURF_16_BANK) |
2616                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2618                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2619                 /* 32 bpp PRT */
2620                 tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2622                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2623                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2624                            NUM_BANKS(ADDR_SURF_16_BANK) |
2625                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2626                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2627                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2628                 /* 64 bpp PRT */
2629                 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2630                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2631                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2632                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2633                            NUM_BANKS(ADDR_SURF_16_BANK) |
2634                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2636                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2637                 /* 128 bpp PRT */
2638                 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2639                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2640                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2641                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2642                            NUM_BANKS(ADDR_SURF_8_BANK) |
2643                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2646
2647                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2648                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2649                 break;
2650
2651         case CHIP_VERDE:
2652         case CHIP_OLAND:
2653         case CHIP_HAINAN:
2654                 /* non-AA compressed depth or any compressed stencil */
2655                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2657                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2658                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2659                            NUM_BANKS(ADDR_SURF_16_BANK) |
2660                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2662                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2663                 /* 2xAA/4xAA compressed depth only */
2664                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2665                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2666                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2667                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2668                            NUM_BANKS(ADDR_SURF_16_BANK) |
2669                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2671                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2672                 /* 8xAA compressed depth only */
2673                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2675                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2676                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2677                            NUM_BANKS(ADDR_SURF_16_BANK) |
2678                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2680                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2681                 /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2682                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2683                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2684                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2685                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2686                            NUM_BANKS(ADDR_SURF_16_BANK) |
2687                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2688                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2689                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2690                 /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2691                 tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2692                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2693                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2694                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2695                            NUM_BANKS(ADDR_SURF_16_BANK) |
2696                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2699                 /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2700                 tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2702                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2703                            TILE_SPLIT(split_equal_to_row_size) |
2704                            NUM_BANKS(ADDR_SURF_16_BANK) |
2705                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2706                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2707                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2708                 /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2709                 tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2711                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2712                            TILE_SPLIT(split_equal_to_row_size) |
2713                            NUM_BANKS(ADDR_SURF_16_BANK) |
2714                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2717                 /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2718                 tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2719                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2720                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2721                            TILE_SPLIT(split_equal_to_row_size) |
2722                            NUM_BANKS(ADDR_SURF_16_BANK) |
2723                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2725                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2726                 /* 1D and 1D Array Surfaces */
2727                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2728                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2729                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2731                            NUM_BANKS(ADDR_SURF_16_BANK) |
2732                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2734                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2735                 /* Displayable maps. */
2736                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2737                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2738                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2740                            NUM_BANKS(ADDR_SURF_16_BANK) |
2741                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2742                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2743                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2744                 /* Display 8bpp. */
2745                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2747                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2749                            NUM_BANKS(ADDR_SURF_16_BANK) |
2750                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2752                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2753                 /* Display 16bpp. */
2754                 tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2755                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2756                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2757                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2758                            NUM_BANKS(ADDR_SURF_16_BANK) |
2759                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2761                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2762                 /* Display 32bpp. */
2763                 tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2764                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2765                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2766                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2767                            NUM_BANKS(ADDR_SURF_16_BANK) |
2768                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2771                 /* Thin. */
2772                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2774                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2775                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2776                            NUM_BANKS(ADDR_SURF_16_BANK) |
2777                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2778                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2779                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2780                 /* Thin 8 bpp. */
2781                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2782                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2783                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2784                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2785                            NUM_BANKS(ADDR_SURF_16_BANK) |
2786                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2788                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2789                 /* Thin 16 bpp. */
2790                 tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2792                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2793                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2794                            NUM_BANKS(ADDR_SURF_16_BANK) |
2795                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2796                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2797                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2798                 /* Thin 32 bpp. */
2799                 tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2801                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2803                            NUM_BANKS(ADDR_SURF_16_BANK) |
2804                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2807                 /* Thin 64 bpp. */
2808                 tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2810                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2811                            TILE_SPLIT(split_equal_to_row_size) |
2812                            NUM_BANKS(ADDR_SURF_16_BANK) |
2813                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2815                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2816                 /* 8 bpp PRT. */
2817                 tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2818                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2819                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2820                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2821                            NUM_BANKS(ADDR_SURF_16_BANK) |
2822                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2823                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2824                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2825                 /* 16 bpp PRT */
2826                 tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2827                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2828                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2829                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2830                            NUM_BANKS(ADDR_SURF_16_BANK) |
2831                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2833                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2834                 /* 32 bpp PRT */
2835                 tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2836                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2837                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2838                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2839                            NUM_BANKS(ADDR_SURF_16_BANK) |
2840                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2842                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2843                 /* 64 bpp PRT */
2844                 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2845                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2846                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2847                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2848                            NUM_BANKS(ADDR_SURF_16_BANK) |
2849                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2852                 /* 128 bpp PRT */
2853                 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2854                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2855                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2856                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2857                            NUM_BANKS(ADDR_SURF_8_BANK) |
2858                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2860                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2861
2862                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2863                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2864                 break;
2865
2866         default:
2867                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2868         }
2869 }
2870
2871 static void si_select_se_sh(struct radeon_device *rdev,
2872                             u32 se_num, u32 sh_num)
2873 {
2874         u32 data = INSTANCE_BROADCAST_WRITES;
2875
2876         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2877                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2878         else if (se_num == 0xffffffff)
2879                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2880         else if (sh_num == 0xffffffff)
2881                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2882         else
2883                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2884         WREG32(GRBM_GFX_INDEX, data);
2885 }
2886
2887 static u32 si_create_bitmask(u32 bit_width)
2888 {
2889         u32 i, mask = 0;
2890
2891         for (i = 0; i < bit_width; i++) {
2892                 mask <<= 1;
2893                 mask |= 1;
2894         }
2895         return mask;
2896 }
2897
2898 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2899 {
2900         u32 data, mask;
2901
2902         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2903         if (data & 1)
2904                 data &= INACTIVE_CUS_MASK;
2905         else
2906                 data = 0;
2907         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2908
2909         data >>= INACTIVE_CUS_SHIFT;
2910
2911         mask = si_create_bitmask(cu_per_sh);
2912
2913         return ~data & mask;
2914 }
2915
2916 static void si_setup_spi(struct radeon_device *rdev,
2917                          u32 se_num, u32 sh_per_se,
2918                          u32 cu_per_sh)
2919 {
2920         int i, j, k;
2921         u32 data, mask, active_cu;
2922
2923         for (i = 0; i < se_num; i++) {
2924                 for (j = 0; j < sh_per_se; j++) {
2925                         si_select_se_sh(rdev, i, j);
2926                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2927                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2928
2929                         mask = 1;
2930                         for (k = 0; k < 16; k++) {
2931                                 mask <<= k;
2932                                 if (active_cu & mask) {
2933                                         data &= ~mask;
2934                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2935                                         break;
2936                                 }
2937                         }
2938                 }
2939         }
2940         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2941 }
2942
2943 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2944                               u32 max_rb_num_per_se,
2945                               u32 sh_per_se)
2946 {
2947         u32 data, mask;
2948
2949         data = RREG32(CC_RB_BACKEND_DISABLE);
2950         if (data & 1)
2951                 data &= BACKEND_DISABLE_MASK;
2952         else
2953                 data = 0;
2954         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2955
2956         data >>= BACKEND_DISABLE_SHIFT;
2957
2958         mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
2959
2960         return data & mask;
2961 }
2962
2963 static void si_setup_rb(struct radeon_device *rdev,
2964                         u32 se_num, u32 sh_per_se,
2965                         u32 max_rb_num_per_se)
2966 {
2967         int i, j;
2968         u32 data, mask;
2969         u32 disabled_rbs = 0;
2970         u32 enabled_rbs = 0;
2971
2972         for (i = 0; i < se_num; i++) {
2973                 for (j = 0; j < sh_per_se; j++) {
2974                         si_select_se_sh(rdev, i, j);
2975                         data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
2976                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2977                 }
2978         }
2979         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2980
2981         mask = 1;
2982         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2983                 if (!(disabled_rbs & mask))
2984                         enabled_rbs |= mask;
2985                 mask <<= 1;
2986         }
2987
2988         rdev->config.si.backend_enable_mask = enabled_rbs;
2989
2990         for (i = 0; i < se_num; i++) {
2991                 si_select_se_sh(rdev, i, 0xffffffff);
2992                 data = 0;
2993                 for (j = 0; j < sh_per_se; j++) {
2994                         switch (enabled_rbs & 3) {
2995                         case 1:
2996                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2997                                 break;
2998                         case 2:
2999                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3000                                 break;
3001                         case 3:
3002                         default:
3003                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3004                                 break;
3005                         }
3006                         enabled_rbs >>= 2;
3007                 }
3008                 WREG32(PA_SC_RASTER_CONFIG, data);
3009         }
3010         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3011 }
3012
3013 static void si_gpu_init(struct radeon_device *rdev)
3014 {
3015         u32 gb_addr_config = 0;
3016         u32 mc_shared_chmap, mc_arb_ramcfg;
3017         u32 sx_debug_1;
3018         u32 hdp_host_path_cntl;
3019         u32 tmp;
3020         int i, j;
3021
3022         switch (rdev->family) {
3023         case CHIP_TAHITI:
3024                 rdev->config.si.max_shader_engines = 2;
3025                 rdev->config.si.max_tile_pipes = 12;
3026                 rdev->config.si.max_cu_per_sh = 8;
3027                 rdev->config.si.max_sh_per_se = 2;
3028                 rdev->config.si.max_backends_per_se = 4;
3029                 rdev->config.si.max_texture_channel_caches = 12;
3030                 rdev->config.si.max_gprs = 256;
3031                 rdev->config.si.max_gs_threads = 32;
3032                 rdev->config.si.max_hw_contexts = 8;
3033
3034                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3035                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3036                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3037                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3038                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3039                 break;
3040         case CHIP_PITCAIRN:
3041                 rdev->config.si.max_shader_engines = 2;
3042                 rdev->config.si.max_tile_pipes = 8;
3043                 rdev->config.si.max_cu_per_sh = 5;
3044                 rdev->config.si.max_sh_per_se = 2;
3045                 rdev->config.si.max_backends_per_se = 4;
3046                 rdev->config.si.max_texture_channel_caches = 8;
3047                 rdev->config.si.max_gprs = 256;
3048                 rdev->config.si.max_gs_threads = 32;
3049                 rdev->config.si.max_hw_contexts = 8;
3050
3051                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3052                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3053                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3054                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3055                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3056                 break;
3057         case CHIP_VERDE:
3058         default:
3059                 rdev->config.si.max_shader_engines = 1;
3060                 rdev->config.si.max_tile_pipes = 4;
3061                 rdev->config.si.max_cu_per_sh = 5;
3062                 rdev->config.si.max_sh_per_se = 2;
3063                 rdev->config.si.max_backends_per_se = 4;
3064                 rdev->config.si.max_texture_channel_caches = 4;
3065                 rdev->config.si.max_gprs = 256;
3066                 rdev->config.si.max_gs_threads = 32;
3067                 rdev->config.si.max_hw_contexts = 8;
3068
3069                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3070                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3071                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3072                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3073                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3074                 break;
3075         case CHIP_OLAND:
3076                 rdev->config.si.max_shader_engines = 1;
3077                 rdev->config.si.max_tile_pipes = 4;
3078                 rdev->config.si.max_cu_per_sh = 6;
3079                 rdev->config.si.max_sh_per_se = 1;
3080                 rdev->config.si.max_backends_per_se = 2;
3081                 rdev->config.si.max_texture_channel_caches = 4;
3082                 rdev->config.si.max_gprs = 256;
3083                 rdev->config.si.max_gs_threads = 16;
3084                 rdev->config.si.max_hw_contexts = 8;
3085
3086                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3087                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3088                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3089                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3090                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3091                 break;
3092         case CHIP_HAINAN:
3093                 rdev->config.si.max_shader_engines = 1;
3094                 rdev->config.si.max_tile_pipes = 4;
3095                 rdev->config.si.max_cu_per_sh = 5;
3096                 rdev->config.si.max_sh_per_se = 1;
3097                 rdev->config.si.max_backends_per_se = 1;
3098                 rdev->config.si.max_texture_channel_caches = 2;
3099                 rdev->config.si.max_gprs = 256;
3100                 rdev->config.si.max_gs_threads = 16;
3101                 rdev->config.si.max_hw_contexts = 8;
3102
3103                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3104                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3105                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3106                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3107                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3108                 break;
3109         }
3110
3111         /* Initialize HDP */
3112         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3113                 WREG32((0x2c14 + j), 0x00000000);
3114                 WREG32((0x2c18 + j), 0x00000000);
3115                 WREG32((0x2c1c + j), 0x00000000);
3116                 WREG32((0x2c20 + j), 0x00000000);
3117                 WREG32((0x2c24 + j), 0x00000000);
3118         }
3119
3120         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3121         WREG32(SRBM_INT_CNTL, 1);
3122         WREG32(SRBM_INT_ACK, 1);
3123
3124         evergreen_fix_pci_max_read_req_size(rdev);
3125
3126         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3127
3128         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3129         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3130
3131         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3132         rdev->config.si.mem_max_burst_length_bytes = 256;
3133         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3134         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3135         if (rdev->config.si.mem_row_size_in_kb > 4)
3136                 rdev->config.si.mem_row_size_in_kb = 4;
3137         /* XXX use MC settings? */
3138         rdev->config.si.shader_engine_tile_size = 32;
3139         rdev->config.si.num_gpus = 1;
3140         rdev->config.si.multi_gpu_tile_size = 64;
3141
3142         /* fix up row size */
3143         gb_addr_config &= ~ROW_SIZE_MASK;
3144         switch (rdev->config.si.mem_row_size_in_kb) {
3145         case 1:
3146         default:
3147                 gb_addr_config |= ROW_SIZE(0);
3148                 break;
3149         case 2:
3150                 gb_addr_config |= ROW_SIZE(1);
3151                 break;
3152         case 4:
3153                 gb_addr_config |= ROW_SIZE(2);
3154                 break;
3155         }
3156
3157         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3158          * not have bank info, so create a custom tiling dword.
3159          * bits 3:0   num_pipes
3160          * bits 7:4   num_banks
3161          * bits 11:8  group_size
3162          * bits 15:12 row_size
3163          */
3164         rdev->config.si.tile_config = 0;
3165         switch (rdev->config.si.num_tile_pipes) {
3166         case 1:
3167                 rdev->config.si.tile_config |= (0 << 0);
3168                 break;
3169         case 2:
3170                 rdev->config.si.tile_config |= (1 << 0);
3171                 break;
3172         case 4:
3173                 rdev->config.si.tile_config |= (2 << 0);
3174                 break;
3175         case 8:
3176         default:
3177                 /* XXX what about 12? */
3178                 rdev->config.si.tile_config |= (3 << 0);
3179                 break;
3180         }
3181         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3182         case 0: /* four banks */
3183                 rdev->config.si.tile_config |= 0 << 4;
3184                 break;
3185         case 1: /* eight banks */
3186                 rdev->config.si.tile_config |= 1 << 4;
3187                 break;
3188         case 2: /* sixteen banks */
3189         default:
3190                 rdev->config.si.tile_config |= 2 << 4;
3191                 break;
3192         }
3193         rdev->config.si.tile_config |=
3194                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3195         rdev->config.si.tile_config |=
3196                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3197
3198         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3199         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3200         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3201         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3202         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3203         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3204         if (rdev->has_uvd) {
3205                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3206                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3207                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3208         }
3209
3210         si_tiling_mode_table_init(rdev);
3211
3212         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3213                     rdev->config.si.max_sh_per_se,
3214                     rdev->config.si.max_backends_per_se);
3215
3216         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3217                      rdev->config.si.max_sh_per_se,
3218                      rdev->config.si.max_cu_per_sh);
3219
3220         rdev->config.si.active_cus = 0;
3221         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3222                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3223                         rdev->config.si.active_cus +=
3224                                 hweight32(si_get_cu_active_bitmap(rdev, i, j));
3225                 }
3226         }
3227
3228         /* set HW defaults for 3D engine */
3229         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3230                                      ROQ_IB2_START(0x2b)));
3231         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3232
3233         sx_debug_1 = RREG32(SX_DEBUG_1);
3234         WREG32(SX_DEBUG_1, sx_debug_1);
3235
3236         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3237
3238         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3239                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3240                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3241                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3242
3243         WREG32(VGT_NUM_INSTANCES, 1);
3244
3245         WREG32(CP_PERFMON_CNTL, 0);
3246
3247         WREG32(SQ_CONFIG, 0);
3248
3249         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3250                                           FORCE_EOV_MAX_REZ_CNT(255)));
3251
3252         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3253                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3254
3255         WREG32(VGT_GS_VERTEX_REUSE, 16);
3256         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3257
3258         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3259         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3260         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3261         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3262         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3263         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3264         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3265         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3266
3267         tmp = RREG32(HDP_MISC_CNTL);
3268         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3269         WREG32(HDP_MISC_CNTL, tmp);
3270
3271         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3272         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3273
3274         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3275
3276         udelay(50);
3277 }
3278
3279 /*
3280  * GPU scratch registers helpers function.
3281  */
3282 static void si_scratch_init(struct radeon_device *rdev)
3283 {
3284         int i;
3285
3286         rdev->scratch.num_reg = 7;
3287         rdev->scratch.reg_base = SCRATCH_REG0;
3288         for (i = 0; i < rdev->scratch.num_reg; i++) {
3289                 rdev->scratch.free[i] = true;
3290                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3291         }
3292 }
3293
3294 void si_fence_ring_emit(struct radeon_device *rdev,
3295                         struct radeon_fence *fence)
3296 {
3297         struct radeon_ring *ring = &rdev->ring[fence->ring];
3298         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3299
3300         /* flush read cache over gart */
3301         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3302         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3303         radeon_ring_write(ring, 0);
3304         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3305         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3306                           PACKET3_TC_ACTION_ENA |
3307                           PACKET3_SH_KCACHE_ACTION_ENA |
3308                           PACKET3_SH_ICACHE_ACTION_ENA);
3309         radeon_ring_write(ring, 0xFFFFFFFF);
3310         radeon_ring_write(ring, 0);
3311         radeon_ring_write(ring, 10); /* poll interval */
3312         /* EVENT_WRITE_EOP - flush caches, send int */
3313         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3314         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3315         radeon_ring_write(ring, lower_32_bits(addr));
3316         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3317         radeon_ring_write(ring, fence->seq);
3318         radeon_ring_write(ring, 0);
3319 }
3320
3321 /*
3322  * IB stuff
3323  */
3324 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3325 {
3326         struct radeon_ring *ring = &rdev->ring[ib->ring];
3327         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3328         u32 header;
3329
3330         if (ib->is_const_ib) {
3331                 /* set switch buffer packet before const IB */
3332                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3333                 radeon_ring_write(ring, 0);
3334
3335                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3336         } else {
3337                 u32 next_rptr;
3338                 if (ring->rptr_save_reg) {
3339                         next_rptr = ring->wptr + 3 + 4 + 8;
3340                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3341                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3342                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3343                         radeon_ring_write(ring, next_rptr);
3344                 } else if (rdev->wb.enabled) {
3345                         next_rptr = ring->wptr + 5 + 4 + 8;
3346                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3347                         radeon_ring_write(ring, (1 << 8));
3348                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3349                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3350                         radeon_ring_write(ring, next_rptr);
3351                 }
3352
3353                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3354         }
3355
3356         radeon_ring_write(ring, header);
3357         radeon_ring_write(ring,
3358 #ifdef __BIG_ENDIAN
3359                           (2 << 0) |
3360 #endif
3361                           (ib->gpu_addr & 0xFFFFFFFC));
3362         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3363         radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3364
3365         if (!ib->is_const_ib) {
3366                 /* flush read cache over gart for this vmid */
3367                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3368                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3369                 radeon_ring_write(ring, vm_id);
3370                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3371                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3372                                   PACKET3_TC_ACTION_ENA |
3373                                   PACKET3_SH_KCACHE_ACTION_ENA |
3374                                   PACKET3_SH_ICACHE_ACTION_ENA);
3375                 radeon_ring_write(ring, 0xFFFFFFFF);
3376                 radeon_ring_write(ring, 0);
3377                 radeon_ring_write(ring, 10); /* poll interval */
3378         }
3379 }
3380
3381 /*
3382  * CP.
3383  */
3384 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3385 {
3386         if (enable)
3387                 WREG32(CP_ME_CNTL, 0);
3388         else {
3389                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3390                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3391                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3392                 WREG32(SCRATCH_UMSK, 0);
3393                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3394                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3395                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3396         }
3397         udelay(50);
3398 }
3399
3400 static int si_cp_load_microcode(struct radeon_device *rdev)
3401 {
3402         int i;
3403
3404         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3405                 return -EINVAL;
3406
3407         si_cp_enable(rdev, false);
3408
3409         if (rdev->new_fw) {
3410                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3411                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3412                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3413                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3414                 const struct gfx_firmware_header_v1_0 *me_hdr =
3415                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3416                 const __le32 *fw_data;
3417                 u32 fw_size;
3418
3419                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3420                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3421                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3422
3423                 /* PFP */
3424                 fw_data = (const __le32 *)
3425                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3426                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3427                 WREG32(CP_PFP_UCODE_ADDR, 0);
3428                 for (i = 0; i < fw_size; i++)
3429                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3430                 WREG32(CP_PFP_UCODE_ADDR, 0);
3431
3432                 /* CE */
3433                 fw_data = (const __le32 *)
3434                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3435                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3436                 WREG32(CP_CE_UCODE_ADDR, 0);
3437                 for (i = 0; i < fw_size; i++)
3438                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3439                 WREG32(CP_CE_UCODE_ADDR, 0);
3440
3441                 /* ME */
3442                 fw_data = (const __be32 *)
3443                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3444                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3445                 WREG32(CP_ME_RAM_WADDR, 0);
3446                 for (i = 0; i < fw_size; i++)
3447                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3448                 WREG32(CP_ME_RAM_WADDR, 0);
3449         } else {
3450                 const __be32 *fw_data;
3451
3452                 /* PFP */
3453                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3454                 WREG32(CP_PFP_UCODE_ADDR, 0);
3455                 for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3456                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3457                 WREG32(CP_PFP_UCODE_ADDR, 0);
3458
3459                 /* CE */
3460                 fw_data = (const __be32 *)rdev->ce_fw->data;
3461                 WREG32(CP_CE_UCODE_ADDR, 0);
3462                 for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3463                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3464                 WREG32(CP_CE_UCODE_ADDR, 0);
3465
3466                 /* ME */
3467                 fw_data = (const __be32 *)rdev->me_fw->data;
3468                 WREG32(CP_ME_RAM_WADDR, 0);
3469                 for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3470                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3471                 WREG32(CP_ME_RAM_WADDR, 0);
3472         }
3473
3474         WREG32(CP_PFP_UCODE_ADDR, 0);
3475         WREG32(CP_CE_UCODE_ADDR, 0);
3476         WREG32(CP_ME_RAM_WADDR, 0);
3477         WREG32(CP_ME_RAM_RADDR, 0);
3478         return 0;
3479 }
3480
3481 static int si_cp_start(struct radeon_device *rdev)
3482 {
3483         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3484         int r, i;
3485
3486         r = radeon_ring_lock(rdev, ring, 7 + 4);
3487         if (r) {
3488                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3489                 return r;
3490         }
3491         /* init the CP */
3492         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3493         radeon_ring_write(ring, 0x1);
3494         radeon_ring_write(ring, 0x0);
3495         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3496         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3497         radeon_ring_write(ring, 0);
3498         radeon_ring_write(ring, 0);
3499
3500         /* init the CE partitions */
3501         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3502         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3503         radeon_ring_write(ring, 0xc000);
3504         radeon_ring_write(ring, 0xe000);
3505         radeon_ring_unlock_commit(rdev, ring, false);
3506
3507         si_cp_enable(rdev, true);
3508
3509         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3510         if (r) {
3511                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3512                 return r;
3513         }
3514
3515         /* setup clear context state */
3516         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3517         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3518
3519         for (i = 0; i < si_default_size; i++)
3520                 radeon_ring_write(ring, si_default_state[i]);
3521
3522         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3523         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3524
3525         /* set clear context state */
3526         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3527         radeon_ring_write(ring, 0);
3528
3529         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3530         radeon_ring_write(ring, 0x00000316);
3531         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3532         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3533
3534         radeon_ring_unlock_commit(rdev, ring, false);
3535
3536         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3537                 ring = &rdev->ring[i];
3538                 r = radeon_ring_lock(rdev, ring, 2);
3539
3540                 /* clear the compute context state */
3541                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3542                 radeon_ring_write(ring, 0);
3543
3544                 radeon_ring_unlock_commit(rdev, ring, false);
3545         }
3546
3547         return 0;
3548 }
3549
3550 static void si_cp_fini(struct radeon_device *rdev)
3551 {
3552         struct radeon_ring *ring;
3553         si_cp_enable(rdev, false);
3554
3555         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3556         radeon_ring_fini(rdev, ring);
3557         radeon_scratch_free(rdev, ring->rptr_save_reg);
3558
3559         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3560         radeon_ring_fini(rdev, ring);
3561         radeon_scratch_free(rdev, ring->rptr_save_reg);
3562
3563         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3564         radeon_ring_fini(rdev, ring);
3565         radeon_scratch_free(rdev, ring->rptr_save_reg);
3566 }
3567
3568 static int si_cp_resume(struct radeon_device *rdev)
3569 {
3570         struct radeon_ring *ring;
3571         u32 tmp;
3572         u32 rb_bufsz;
3573         int r;
3574
3575         si_enable_gui_idle_interrupt(rdev, false);
3576
3577         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3578         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3579
3580         /* Set the write pointer delay */
3581         WREG32(CP_RB_WPTR_DELAY, 0);
3582
3583         WREG32(CP_DEBUG, 0);
3584         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3585
3586         /* ring 0 - compute and gfx */
3587         /* Set ring buffer size */
3588         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3589         rb_bufsz = order_base_2(ring->ring_size / 8);
3590         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3591 #ifdef __BIG_ENDIAN
3592         tmp |= BUF_SWAP_32BIT;
3593 #endif
3594         WREG32(CP_RB0_CNTL, tmp);
3595
3596         /* Initialize the ring buffer's read and write pointers */
3597         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3598         ring->wptr = 0;
3599         WREG32(CP_RB0_WPTR, ring->wptr);
3600
3601         /* set the wb address whether it's enabled or not */
3602         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3603         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3604
3605         if (rdev->wb.enabled)
3606                 WREG32(SCRATCH_UMSK, 0xff);
3607         else {
3608                 tmp |= RB_NO_UPDATE;
3609                 WREG32(SCRATCH_UMSK, 0);
3610         }
3611
3612         mdelay(1);
3613         WREG32(CP_RB0_CNTL, tmp);
3614
3615         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3616
3617         /* ring1  - compute only */
3618         /* Set ring buffer size */
3619         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3620         rb_bufsz = order_base_2(ring->ring_size / 8);
3621         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3622 #ifdef __BIG_ENDIAN
3623         tmp |= BUF_SWAP_32BIT;
3624 #endif
3625         WREG32(CP_RB1_CNTL, tmp);
3626
3627         /* Initialize the ring buffer's read and write pointers */
3628         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3629         ring->wptr = 0;
3630         WREG32(CP_RB1_WPTR, ring->wptr);
3631
3632         /* set the wb address whether it's enabled or not */
3633         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3634         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3635
3636         mdelay(1);
3637         WREG32(CP_RB1_CNTL, tmp);
3638
3639         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3640
3641         /* ring2 - compute only */
3642         /* Set ring buffer size */
3643         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3644         rb_bufsz = order_base_2(ring->ring_size / 8);
3645         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3646 #ifdef __BIG_ENDIAN
3647         tmp |= BUF_SWAP_32BIT;
3648 #endif
3649         WREG32(CP_RB2_CNTL, tmp);
3650
3651         /* Initialize the ring buffer's read and write pointers */
3652         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3653         ring->wptr = 0;
3654         WREG32(CP_RB2_WPTR, ring->wptr);
3655
3656         /* set the wb address whether it's enabled or not */
3657         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3658         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3659
3660         mdelay(1);
3661         WREG32(CP_RB2_CNTL, tmp);
3662
3663         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3664
3665         /* start the rings */
3666         si_cp_start(rdev);
3667         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3668         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3669         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3670         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3671         if (r) {
3672                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3673                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3674                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3675                 return r;
3676         }
3677         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3678         if (r) {
3679                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3680         }
3681         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3682         if (r) {
3683                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3684         }
3685
3686         si_enable_gui_idle_interrupt(rdev, true);
3687
3688         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3689                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3690
3691         return 0;
3692 }
3693
3694 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3695 {
3696         u32 reset_mask = 0;
3697         u32 tmp;
3698
3699         /* GRBM_STATUS */
3700         tmp = RREG32(GRBM_STATUS);
3701         if (tmp & (PA_BUSY | SC_BUSY |
3702                    BCI_BUSY | SX_BUSY |
3703                    TA_BUSY | VGT_BUSY |
3704                    DB_BUSY | CB_BUSY |
3705                    GDS_BUSY | SPI_BUSY |
3706                    IA_BUSY | IA_BUSY_NO_DMA))
3707                 reset_mask |= RADEON_RESET_GFX;
3708
3709         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3710                    CP_BUSY | CP_COHERENCY_BUSY))
3711                 reset_mask |= RADEON_RESET_CP;
3712
3713         if (tmp & GRBM_EE_BUSY)
3714                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3715
3716         /* GRBM_STATUS2 */
3717         tmp = RREG32(GRBM_STATUS2);
3718         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3719                 reset_mask |= RADEON_RESET_RLC;
3720
3721         /* DMA_STATUS_REG 0 */
3722         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3723         if (!(tmp & DMA_IDLE))
3724                 reset_mask |= RADEON_RESET_DMA;
3725
3726         /* DMA_STATUS_REG 1 */
3727         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3728         if (!(tmp & DMA_IDLE))
3729                 reset_mask |= RADEON_RESET_DMA1;
3730
3731         /* SRBM_STATUS2 */
3732         tmp = RREG32(SRBM_STATUS2);
3733         if (tmp & DMA_BUSY)
3734                 reset_mask |= RADEON_RESET_DMA;
3735
3736         if (tmp & DMA1_BUSY)
3737                 reset_mask |= RADEON_RESET_DMA1;
3738
3739         /* SRBM_STATUS */
3740         tmp = RREG32(SRBM_STATUS);
3741
3742         if (tmp & IH_BUSY)
3743                 reset_mask |= RADEON_RESET_IH;
3744
3745         if (tmp & SEM_BUSY)
3746                 reset_mask |= RADEON_RESET_SEM;
3747
3748         if (tmp & GRBM_RQ_PENDING)
3749                 reset_mask |= RADEON_RESET_GRBM;
3750
3751         if (tmp & VMC_BUSY)
3752                 reset_mask |= RADEON_RESET_VMC;
3753
3754         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3755                    MCC_BUSY | MCD_BUSY))
3756                 reset_mask |= RADEON_RESET_MC;
3757
3758         if (evergreen_is_display_hung(rdev))
3759                 reset_mask |= RADEON_RESET_DISPLAY;
3760
3761         /* VM_L2_STATUS */
3762         tmp = RREG32(VM_L2_STATUS);
3763         if (tmp & L2_BUSY)
3764                 reset_mask |= RADEON_RESET_VMC;
3765
3766         /* Skip MC reset as it's mostly likely not hung, just busy */
3767         if (reset_mask & RADEON_RESET_MC) {
3768                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3769                 reset_mask &= ~RADEON_RESET_MC;
3770         }
3771
3772         return reset_mask;
3773 }
3774
3775 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3776 {
3777         struct evergreen_mc_save save;
3778         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3779         u32 tmp;
3780
3781         if (reset_mask == 0)
3782                 return;
3783
3784         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3785
3786         evergreen_print_gpu_status_regs(rdev);
3787         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3788                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3789         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3790                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3791
3792         /* disable PG/CG */
3793         si_fini_pg(rdev);
3794         si_fini_cg(rdev);
3795
3796         /* stop the rlc */
3797         si_rlc_stop(rdev);
3798
3799         /* Disable CP parsing/prefetching */
3800         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3801
3802         if (reset_mask & RADEON_RESET_DMA) {
3803                 /* dma0 */
3804                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3805                 tmp &= ~DMA_RB_ENABLE;
3806                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3807         }
3808         if (reset_mask & RADEON_RESET_DMA1) {
3809                 /* dma1 */
3810                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3811                 tmp &= ~DMA_RB_ENABLE;
3812                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3813         }
3814
3815         udelay(50);
3816
3817         evergreen_mc_stop(rdev, &save);
3818         if (evergreen_mc_wait_for_idle(rdev)) {
3819                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3820         }
3821
3822         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3823                 grbm_soft_reset = SOFT_RESET_CB |
3824                         SOFT_RESET_DB |
3825                         SOFT_RESET_GDS |
3826                         SOFT_RESET_PA |
3827                         SOFT_RESET_SC |
3828                         SOFT_RESET_BCI |
3829                         SOFT_RESET_SPI |
3830                         SOFT_RESET_SX |
3831                         SOFT_RESET_TC |
3832                         SOFT_RESET_TA |
3833                         SOFT_RESET_VGT |
3834                         SOFT_RESET_IA;
3835         }
3836
3837         if (reset_mask & RADEON_RESET_CP) {
3838                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3839
3840                 srbm_soft_reset |= SOFT_RESET_GRBM;
3841         }
3842
3843         if (reset_mask & RADEON_RESET_DMA)
3844                 srbm_soft_reset |= SOFT_RESET_DMA;
3845
3846         if (reset_mask & RADEON_RESET_DMA1)
3847                 srbm_soft_reset |= SOFT_RESET_DMA1;
3848
3849         if (reset_mask & RADEON_RESET_DISPLAY)
3850                 srbm_soft_reset |= SOFT_RESET_DC;
3851
3852         if (reset_mask & RADEON_RESET_RLC)
3853                 grbm_soft_reset |= SOFT_RESET_RLC;
3854
3855         if (reset_mask & RADEON_RESET_SEM)
3856                 srbm_soft_reset |= SOFT_RESET_SEM;
3857
3858         if (reset_mask & RADEON_RESET_IH)
3859                 srbm_soft_reset |= SOFT_RESET_IH;
3860
3861         if (reset_mask & RADEON_RESET_GRBM)
3862                 srbm_soft_reset |= SOFT_RESET_GRBM;
3863
3864         if (reset_mask & RADEON_RESET_VMC)
3865                 srbm_soft_reset |= SOFT_RESET_VMC;
3866
3867         if (reset_mask & RADEON_RESET_MC)
3868                 srbm_soft_reset |= SOFT_RESET_MC;
3869
3870         if (grbm_soft_reset) {
3871                 tmp = RREG32(GRBM_SOFT_RESET);
3872                 tmp |= grbm_soft_reset;
3873                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3874                 WREG32(GRBM_SOFT_RESET, tmp);
3875                 tmp = RREG32(GRBM_SOFT_RESET);
3876
3877                 udelay(50);
3878
3879                 tmp &= ~grbm_soft_reset;
3880                 WREG32(GRBM_SOFT_RESET, tmp);
3881                 tmp = RREG32(GRBM_SOFT_RESET);
3882         }
3883
3884         if (srbm_soft_reset) {
3885                 tmp = RREG32(SRBM_SOFT_RESET);
3886                 tmp |= srbm_soft_reset;
3887                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3888                 WREG32(SRBM_SOFT_RESET, tmp);
3889                 tmp = RREG32(SRBM_SOFT_RESET);
3890
3891                 udelay(50);
3892
3893                 tmp &= ~srbm_soft_reset;
3894                 WREG32(SRBM_SOFT_RESET, tmp);
3895                 tmp = RREG32(SRBM_SOFT_RESET);
3896         }
3897
3898         /* Wait a little for things to settle down */
3899         udelay(50);
3900
3901         evergreen_mc_resume(rdev, &save);
3902         udelay(50);
3903
3904         evergreen_print_gpu_status_regs(rdev);
3905 }
3906
3907 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3908 {
3909         u32 tmp, i;
3910
3911         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3912         tmp |= SPLL_BYPASS_EN;
3913         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3914
3915         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3916         tmp |= SPLL_CTLREQ_CHG;
3917         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3918
3919         for (i = 0; i < rdev->usec_timeout; i++) {
3920                 if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3921                         break;
3922                 udelay(1);
3923         }
3924
3925         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3926         tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3927         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3928
3929         tmp = RREG32(MPLL_CNTL_MODE);
3930         tmp &= ~MPLL_MCLK_SEL;
3931         WREG32(MPLL_CNTL_MODE, tmp);
3932 }
3933
3934 static void si_spll_powerdown(struct radeon_device *rdev)
3935 {
3936         u32 tmp;
3937
3938         tmp = RREG32(SPLL_CNTL_MODE);
3939         tmp |= SPLL_SW_DIR_CONTROL;
3940         WREG32(SPLL_CNTL_MODE, tmp);
3941
3942         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3943         tmp |= SPLL_RESET;
3944         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3945
3946         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3947         tmp |= SPLL_SLEEP;
3948         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3949
3950         tmp = RREG32(SPLL_CNTL_MODE);
3951         tmp &= ~SPLL_SW_DIR_CONTROL;
3952         WREG32(SPLL_CNTL_MODE, tmp);
3953 }
3954
3955 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
3956 {
3957         struct evergreen_mc_save save;
3958         u32 tmp, i;
3959
3960         dev_info(rdev->dev, "GPU pci config reset\n");
3961
3962         /* disable dpm? */
3963
3964         /* disable cg/pg */
3965         si_fini_pg(rdev);
3966         si_fini_cg(rdev);
3967
3968         /* Disable CP parsing/prefetching */
3969         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3970         /* dma0 */
3971         tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3972         tmp &= ~DMA_RB_ENABLE;
3973         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3974         /* dma1 */
3975         tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3976         tmp &= ~DMA_RB_ENABLE;
3977         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3978         /* XXX other engines? */
3979
3980         /* halt the rlc, disable cp internal ints */
3981         si_rlc_stop(rdev);
3982
3983         udelay(50);
3984
3985         /* disable mem access */
3986         evergreen_mc_stop(rdev, &save);
3987         if (evergreen_mc_wait_for_idle(rdev)) {
3988                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
3989         }
3990
3991         /* set mclk/sclk to bypass */
3992         si_set_clk_bypass_mode(rdev);
3993         /* powerdown spll */
3994         si_spll_powerdown(rdev);
3995         /* disable BM */
3996         pci_clear_master(rdev->pdev);
3997         /* reset */
3998         radeon_pci_config_reset(rdev);
3999         /* wait for asic to come out of reset */
4000         for (i = 0; i < rdev->usec_timeout; i++) {
4001                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4002                         break;
4003                 udelay(1);
4004         }
4005 }
4006
4007 int si_asic_reset(struct radeon_device *rdev, bool hard)
4008 {
4009         u32 reset_mask;
4010
4011         if (hard) {
4012                 si_gpu_pci_config_reset(rdev);
4013                 return 0;
4014         }
4015
4016         reset_mask = si_gpu_check_soft_reset(rdev);
4017
4018         if (reset_mask)
4019                 r600_set_bios_scratch_engine_hung(rdev, true);
4020
4021         /* try soft reset */
4022         si_gpu_soft_reset(rdev, reset_mask);
4023
4024         reset_mask = si_gpu_check_soft_reset(rdev);
4025
4026         /* try pci config reset */
4027         if (reset_mask && radeon_hard_reset)
4028                 si_gpu_pci_config_reset(rdev);
4029
4030         reset_mask = si_gpu_check_soft_reset(rdev);
4031
4032         if (!reset_mask)
4033                 r600_set_bios_scratch_engine_hung(rdev, false);
4034
4035         return 0;
4036 }
4037
4038 /**
4039  * si_gfx_is_lockup - Check if the GFX engine is locked up
4040  *
4041  * @rdev: radeon_device pointer
4042  * @ring: radeon_ring structure holding ring information
4043  *
4044  * Check if the GFX engine is locked up.
4045  * Returns true if the engine appears to be locked up, false if not.
4046  */
4047 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4048 {
4049         u32 reset_mask = si_gpu_check_soft_reset(rdev);
4050
4051         if (!(reset_mask & (RADEON_RESET_GFX |
4052                             RADEON_RESET_COMPUTE |
4053                             RADEON_RESET_CP))) {
4054                 radeon_ring_lockup_update(rdev, ring);
4055                 return false;
4056         }
4057         return radeon_ring_test_lockup(rdev, ring);
4058 }
4059
4060 /* MC */
4061 static void si_mc_program(struct radeon_device *rdev)
4062 {
4063         struct evergreen_mc_save save;
4064         u32 tmp;
4065         int i, j;
4066
4067         /* Initialize HDP */
4068         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4069                 WREG32((0x2c14 + j), 0x00000000);
4070                 WREG32((0x2c18 + j), 0x00000000);
4071                 WREG32((0x2c1c + j), 0x00000000);
4072                 WREG32((0x2c20 + j), 0x00000000);
4073                 WREG32((0x2c24 + j), 0x00000000);
4074         }
4075         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4076
4077         evergreen_mc_stop(rdev, &save);
4078         if (radeon_mc_wait_for_idle(rdev)) {
4079                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4080         }
4081         if (!ASIC_IS_NODCE(rdev))
4082                 /* Lockout access through VGA aperture*/
4083                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4084         /* Update configuration */
4085         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4086                rdev->mc.vram_start >> 12);
4087         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4088                rdev->mc.vram_end >> 12);
4089         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4090                rdev->vram_scratch.gpu_addr >> 12);
4091         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4092         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4093         WREG32(MC_VM_FB_LOCATION, tmp);
4094         /* XXX double check these! */
4095         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4096         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4097         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4098         WREG32(MC_VM_AGP_BASE, 0);
4099         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4100         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4101         if (radeon_mc_wait_for_idle(rdev)) {
4102                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4103         }
4104         evergreen_mc_resume(rdev, &save);
4105         if (!ASIC_IS_NODCE(rdev)) {
4106                 /* we need to own VRAM, so turn off the VGA renderer here
4107                  * to stop it overwriting our objects */
4108                 rv515_vga_render_disable(rdev);
4109         }
4110 }
4111
4112 void si_vram_gtt_location(struct radeon_device *rdev,
4113                           struct radeon_mc *mc)
4114 {
4115         if (mc->mc_vram_size > 0xFFC0000000ULL) {
4116                 /* leave room for at least 1024M GTT */
4117                 dev_warn(rdev->dev, "limiting VRAM\n");
4118                 mc->real_vram_size = 0xFFC0000000ULL;
4119                 mc->mc_vram_size = 0xFFC0000000ULL;
4120         }
4121         radeon_vram_location(rdev, &rdev->mc, 0);
4122         rdev->mc.gtt_base_align = 0;
4123         radeon_gtt_location(rdev, mc);
4124 }
4125
4126 static int si_mc_init(struct radeon_device *rdev)
4127 {
4128         u32 tmp;
4129         int chansize, numchan;
4130
4131         /* Get VRAM informations */
4132         rdev->mc.vram_is_ddr = true;
4133         tmp = RREG32(MC_ARB_RAMCFG);
4134         if (tmp & CHANSIZE_OVERRIDE) {
4135                 chansize = 16;
4136         } else if (tmp & CHANSIZE_MASK) {
4137                 chansize = 64;
4138         } else {
4139                 chansize = 32;
4140         }
4141         tmp = RREG32(MC_SHARED_CHMAP);
4142         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4143         case 0:
4144         default:
4145                 numchan = 1;
4146                 break;
4147         case 1:
4148                 numchan = 2;
4149                 break;
4150         case 2:
4151                 numchan = 4;
4152                 break;
4153         case 3:
4154                 numchan = 8;
4155                 break;
4156         case 4:
4157                 numchan = 3;
4158                 break;
4159         case 5:
4160                 numchan = 6;
4161                 break;
4162         case 6:
4163                 numchan = 10;
4164                 break;
4165         case 7:
4166                 numchan = 12;
4167                 break;
4168         case 8:
4169                 numchan = 16;
4170                 break;
4171         }
4172         rdev->mc.vram_width = numchan * chansize;
4173         /* Could aper size report 0 ? */
4174         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4175         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4176         /* size in MB on si */
4177         tmp = RREG32(CONFIG_MEMSIZE);
4178         /* some boards may have garbage in the upper 16 bits */
4179         if (tmp & 0xffff0000) {
4180                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4181                 if (tmp & 0xffff)
4182                         tmp &= 0xffff;
4183         }
4184         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4185         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4186         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4187         si_vram_gtt_location(rdev, &rdev->mc);
4188         radeon_update_bandwidth_info(rdev);
4189
4190         return 0;
4191 }
4192
4193 /*
4194  * GART
4195  */
4196 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4197 {
4198         /* flush hdp cache */
4199         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4200
4201         /* bits 0-15 are the VM contexts0-15 */
4202         WREG32(VM_INVALIDATE_REQUEST, 1);
4203 }
4204
4205 static int si_pcie_gart_enable(struct radeon_device *rdev)
4206 {
4207         int r, i;
4208
4209         if (rdev->gart.robj == NULL) {
4210                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4211                 return -EINVAL;
4212         }
4213         r = radeon_gart_table_vram_pin(rdev);
4214         if (r)
4215                 return r;
4216         /* Setup TLB control */
4217         WREG32(MC_VM_MX_L1_TLB_CNTL,
4218                (0xA << 7) |
4219                ENABLE_L1_TLB |
4220                ENABLE_L1_FRAGMENT_PROCESSING |
4221                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4222                ENABLE_ADVANCED_DRIVER_MODEL |
4223                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4224         /* Setup L2 cache */
4225         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4226                ENABLE_L2_FRAGMENT_PROCESSING |
4227                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4228                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4229                EFFECTIVE_L2_QUEUE_SIZE(7) |
4230                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4231         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4232         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4233                BANK_SELECT(4) |
4234                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4235         /* setup context0 */
4236         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4237         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4238         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4239         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4240                         (u32)(rdev->dummy_page.addr >> 12));
4241         WREG32(VM_CONTEXT0_CNTL2, 0);
4242         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4243                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4244
4245         WREG32(0x15D4, 0);
4246         WREG32(0x15D8, 0);
4247         WREG32(0x15DC, 0);
4248
4249         /* empty context1-15 */
4250         /* set vm size, must be a multiple of 4 */
4251         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4252         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4253         /* Assign the pt base to something valid for now; the pts used for
4254          * the VMs are determined by the application and setup and assigned
4255          * on the fly in the vm part of radeon_gart.c
4256          */
4257         for (i = 1; i < 16; i++) {
4258                 if (i < 8)
4259                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4260                                rdev->vm_manager.saved_table_addr[i]);
4261                 else
4262                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4263                                rdev->vm_manager.saved_table_addr[i]);
4264         }
4265
4266         /* enable context1-15 */
4267         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4268                (u32)(rdev->dummy_page.addr >> 12));
4269         WREG32(VM_CONTEXT1_CNTL2, 4);
4270         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4271                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4272                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4273                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4274                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4275                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4276                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4277                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4278                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4279                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4280                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4281                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4282                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4283                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4284
4285         si_pcie_gart_tlb_flush(rdev);
4286         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4287                  (unsigned)(rdev->mc.gtt_size >> 20),
4288                  (unsigned long long)rdev->gart.table_addr);
4289         rdev->gart.ready = true;
4290         return 0;
4291 }
4292
4293 static void si_pcie_gart_disable(struct radeon_device *rdev)
4294 {
4295         unsigned i;
4296
4297         for (i = 1; i < 16; ++i) {
4298                 uint32_t reg;
4299                 if (i < 8)
4300                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4301                 else
4302                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4303                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4304         }
4305
4306         /* Disable all tables */
4307         WREG32(VM_CONTEXT0_CNTL, 0);
4308         WREG32(VM_CONTEXT1_CNTL, 0);
4309         /* Setup TLB control */
4310         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4311                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4312         /* Setup L2 cache */
4313         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4314                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4315                EFFECTIVE_L2_QUEUE_SIZE(7) |
4316                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4317         WREG32(VM_L2_CNTL2, 0);
4318         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4319                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4320         radeon_gart_table_vram_unpin(rdev);
4321 }
4322
4323 static void si_pcie_gart_fini(struct radeon_device *rdev)
4324 {
4325         si_pcie_gart_disable(rdev);
4326         radeon_gart_table_vram_free(rdev);
4327         radeon_gart_fini(rdev);
4328 }
4329
4330 /* vm parser */
4331 static bool si_vm_reg_valid(u32 reg)
4332 {
4333         /* context regs are fine */
4334         if (reg >= 0x28000)
4335                 return true;
4336
4337         /* shader regs are also fine */
4338         if (reg >= 0xB000 && reg < 0xC000)
4339                 return true;
4340
4341         /* check config regs */
4342         switch (reg) {
4343         case GRBM_GFX_INDEX:
4344         case CP_STRMOUT_CNTL:
4345         case VGT_VTX_VECT_EJECT_REG:
4346         case VGT_CACHE_INVALIDATION:
4347         case VGT_ESGS_RING_SIZE:
4348         case VGT_GSVS_RING_SIZE:
4349         case VGT_GS_VERTEX_REUSE:
4350         case VGT_PRIMITIVE_TYPE:
4351         case VGT_INDEX_TYPE:
4352         case VGT_NUM_INDICES:
4353         case VGT_NUM_INSTANCES:
4354         case VGT_TF_RING_SIZE:
4355         case VGT_HS_OFFCHIP_PARAM:
4356         case VGT_TF_MEMORY_BASE:
4357         case PA_CL_ENHANCE:
4358         case PA_SU_LINE_STIPPLE_VALUE:
4359         case PA_SC_LINE_STIPPLE_STATE:
4360         case PA_SC_ENHANCE:
4361         case SQC_CACHES:
4362         case SPI_STATIC_THREAD_MGMT_1:
4363         case SPI_STATIC_THREAD_MGMT_2:
4364         case SPI_STATIC_THREAD_MGMT_3:
4365         case SPI_PS_MAX_WAVE_ID:
4366         case SPI_CONFIG_CNTL:
4367         case SPI_CONFIG_CNTL_1:
4368         case TA_CNTL_AUX:
4369         case TA_CS_BC_BASE_ADDR:
4370                 return true;
4371         default:
4372                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4373                 return false;
4374         }
4375 }
4376
4377 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4378                                   u32 *ib, struct radeon_cs_packet *pkt)
4379 {
4380         switch (pkt->opcode) {
4381         case PACKET3_NOP:
4382         case PACKET3_SET_BASE:
4383         case PACKET3_SET_CE_DE_COUNTERS:
4384         case PACKET3_LOAD_CONST_RAM:
4385         case PACKET3_WRITE_CONST_RAM:
4386         case PACKET3_WRITE_CONST_RAM_OFFSET:
4387         case PACKET3_DUMP_CONST_RAM:
4388         case PACKET3_INCREMENT_CE_COUNTER:
4389         case PACKET3_WAIT_ON_DE_COUNTER:
4390         case PACKET3_CE_WRITE:
4391                 break;
4392         default:
4393                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4394                 return -EINVAL;
4395         }
4396         return 0;
4397 }
4398
4399 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4400 {
4401         u32 start_reg, reg, i;
4402         u32 command = ib[idx + 4];
4403         u32 info = ib[idx + 1];
4404         u32 idx_value = ib[idx];
4405         if (command & PACKET3_CP_DMA_CMD_SAS) {
4406                 /* src address space is register */
4407                 if (((info & 0x60000000) >> 29) == 0) {
4408                         start_reg = idx_value << 2;
4409                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4410                                 reg = start_reg;
4411                                 if (!si_vm_reg_valid(reg)) {
4412                                         DRM_ERROR("CP DMA Bad SRC register\n");
4413                                         return -EINVAL;
4414                                 }
4415                         } else {
4416                                 for (i = 0; i < (command & 0x1fffff); i++) {
4417                                         reg = start_reg + (4 * i);
4418                                         if (!si_vm_reg_valid(reg)) {
4419                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4420                                                 return -EINVAL;
4421                                         }
4422                                 }
4423                         }
4424                 }
4425         }
4426         if (command & PACKET3_CP_DMA_CMD_DAS) {
4427                 /* dst address space is register */
4428                 if (((info & 0x00300000) >> 20) == 0) {
4429                         start_reg = ib[idx + 2];
4430                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4431                                 reg = start_reg;
4432                                 if (!si_vm_reg_valid(reg)) {
4433                                         DRM_ERROR("CP DMA Bad DST register\n");
4434                                         return -EINVAL;
4435                                 }
4436                         } else {
4437                                 for (i = 0; i < (command & 0x1fffff); i++) {
4438                                         reg = start_reg + (4 * i);
4439                                 if (!si_vm_reg_valid(reg)) {
4440                                                 DRM_ERROR("CP DMA Bad DST register\n");
4441                                                 return -EINVAL;
4442                                         }
4443                                 }
4444                         }
4445                 }
4446         }
4447         return 0;
4448 }
4449
4450 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4451                                    u32 *ib, struct radeon_cs_packet *pkt)
4452 {
4453         int r;
4454         u32 idx = pkt->idx + 1;
4455         u32 idx_value = ib[idx];
4456         u32 start_reg, end_reg, reg, i;
4457
4458         switch (pkt->opcode) {
4459         case PACKET3_NOP:
4460         case PACKET3_SET_BASE:
4461         case PACKET3_CLEAR_STATE:
4462         case PACKET3_INDEX_BUFFER_SIZE:
4463         case PACKET3_DISPATCH_DIRECT:
4464         case PACKET3_DISPATCH_INDIRECT:
4465         case PACKET3_ALLOC_GDS:
4466         case PACKET3_WRITE_GDS_RAM:
4467         case PACKET3_ATOMIC_GDS:
4468         case PACKET3_ATOMIC:
4469         case PACKET3_OCCLUSION_QUERY:
4470         case PACKET3_SET_PREDICATION:
4471         case PACKET3_COND_EXEC:
4472         case PACKET3_PRED_EXEC:
4473         case PACKET3_DRAW_INDIRECT:
4474         case PACKET3_DRAW_INDEX_INDIRECT:
4475         case PACKET3_INDEX_BASE:
4476         case PACKET3_DRAW_INDEX_2:
4477         case PACKET3_CONTEXT_CONTROL:
4478         case PACKET3_INDEX_TYPE:
4479         case PACKET3_DRAW_INDIRECT_MULTI:
4480         case PACKET3_DRAW_INDEX_AUTO:
4481         case PACKET3_DRAW_INDEX_IMMD:
4482         case PACKET3_NUM_INSTANCES:
4483         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4484         case PACKET3_STRMOUT_BUFFER_UPDATE:
4485         case PACKET3_DRAW_INDEX_OFFSET_2:
4486         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4487         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4488         case PACKET3_MPEG_INDEX:
4489         case PACKET3_WAIT_REG_MEM:
4490         case PACKET3_MEM_WRITE:
4491         case PACKET3_PFP_SYNC_ME:
4492         case PACKET3_SURFACE_SYNC:
4493         case PACKET3_EVENT_WRITE:
4494         case PACKET3_EVENT_WRITE_EOP:
4495         case PACKET3_EVENT_WRITE_EOS:
4496         case PACKET3_SET_CONTEXT_REG:
4497         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4498         case PACKET3_SET_SH_REG:
4499         case PACKET3_SET_SH_REG_OFFSET:
4500         case PACKET3_INCREMENT_DE_COUNTER:
4501         case PACKET3_WAIT_ON_CE_COUNTER:
4502         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4503         case PACKET3_ME_WRITE:
4504                 break;
4505         case PACKET3_COPY_DATA:
4506                 if ((idx_value & 0xf00) == 0) {
4507                         reg = ib[idx + 3] * 4;
4508                         if (!si_vm_reg_valid(reg))
4509                                 return -EINVAL;
4510                 }
4511                 break;
4512         case PACKET3_WRITE_DATA:
4513                 if ((idx_value & 0xf00) == 0) {
4514                         start_reg = ib[idx + 1] * 4;
4515                         if (idx_value & 0x10000) {
4516                                 if (!si_vm_reg_valid(start_reg))
4517                                         return -EINVAL;
4518                         } else {
4519                                 for (i = 0; i < (pkt->count - 2); i++) {
4520                                         reg = start_reg + (4 * i);
4521                                         if (!si_vm_reg_valid(reg))
4522                                                 return -EINVAL;
4523                                 }
4524                         }
4525                 }
4526                 break;
4527         case PACKET3_COND_WRITE:
4528                 if (idx_value & 0x100) {
4529                         reg = ib[idx + 5] * 4;
4530                         if (!si_vm_reg_valid(reg))
4531                                 return -EINVAL;
4532                 }
4533                 break;
4534         case PACKET3_COPY_DW:
4535                 if (idx_value & 0x2) {
4536                         reg = ib[idx + 3] * 4;
4537                         if (!si_vm_reg_valid(reg))
4538                                 return -EINVAL;
4539                 }
4540                 break;
4541         case PACKET3_SET_CONFIG_REG:
4542                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4543                 end_reg = 4 * pkt->count + start_reg - 4;
4544                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4545                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4546                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4547                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4548                         return -EINVAL;
4549                 }
4550                 for (i = 0; i < pkt->count; i++) {
4551                         reg = start_reg + (4 * i);
4552                         if (!si_vm_reg_valid(reg))
4553                                 return -EINVAL;
4554                 }
4555                 break;
4556         case PACKET3_CP_DMA:
4557                 r = si_vm_packet3_cp_dma_check(ib, idx);
4558                 if (r)
4559                         return r;
4560                 break;
4561         default:
4562                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4563                 return -EINVAL;
4564         }
4565         return 0;
4566 }
4567
4568 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4569                                        u32 *ib, struct radeon_cs_packet *pkt)
4570 {
4571         int r;
4572         u32 idx = pkt->idx + 1;
4573         u32 idx_value = ib[idx];
4574         u32 start_reg, reg, i;
4575
4576         switch (pkt->opcode) {
4577         case PACKET3_NOP:
4578         case PACKET3_SET_BASE:
4579         case PACKET3_CLEAR_STATE:
4580         case PACKET3_DISPATCH_DIRECT:
4581         case PACKET3_DISPATCH_INDIRECT:
4582         case PACKET3_ALLOC_GDS:
4583         case PACKET3_WRITE_GDS_RAM:
4584         case PACKET3_ATOMIC_GDS:
4585         case PACKET3_ATOMIC:
4586         case PACKET3_OCCLUSION_QUERY:
4587         case PACKET3_SET_PREDICATION:
4588         case PACKET3_COND_EXEC:
4589         case PACKET3_PRED_EXEC:
4590         case PACKET3_CONTEXT_CONTROL:
4591         case PACKET3_STRMOUT_BUFFER_UPDATE:
4592         case PACKET3_WAIT_REG_MEM:
4593         case PACKET3_MEM_WRITE:
4594         case PACKET3_PFP_SYNC_ME:
4595         case PACKET3_SURFACE_SYNC:
4596         case PACKET3_EVENT_WRITE:
4597         case PACKET3_EVENT_WRITE_EOP:
4598         case PACKET3_EVENT_WRITE_EOS:
4599         case PACKET3_SET_CONTEXT_REG:
4600         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4601         case PACKET3_SET_SH_REG:
4602         case PACKET3_SET_SH_REG_OFFSET:
4603         case PACKET3_INCREMENT_DE_COUNTER:
4604         case PACKET3_WAIT_ON_CE_COUNTER:
4605         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4606         case PACKET3_ME_WRITE:
4607                 break;
4608         case PACKET3_COPY_DATA:
4609                 if ((idx_value & 0xf00) == 0) {
4610                         reg = ib[idx + 3] * 4;
4611                         if (!si_vm_reg_valid(reg))
4612                                 return -EINVAL;
4613                 }
4614                 break;
4615         case PACKET3_WRITE_DATA:
4616                 if ((idx_value & 0xf00) == 0) {
4617                         start_reg = ib[idx + 1] * 4;
4618                         if (idx_value & 0x10000) {
4619                                 if (!si_vm_reg_valid(start_reg))
4620                                         return -EINVAL;
4621                         } else {
4622                                 for (i = 0; i < (pkt->count - 2); i++) {
4623                                         reg = start_reg + (4 * i);
4624                                         if (!si_vm_reg_valid(reg))
4625                                                 return -EINVAL;
4626                                 }
4627                         }
4628                 }
4629                 break;
4630         case PACKET3_COND_WRITE:
4631                 if (idx_value & 0x100) {
4632                         reg = ib[idx + 5] * 4;
4633                         if (!si_vm_reg_valid(reg))
4634                                 return -EINVAL;
4635                 }
4636                 break;
4637         case PACKET3_COPY_DW:
4638                 if (idx_value & 0x2) {
4639                         reg = ib[idx + 3] * 4;
4640                         if (!si_vm_reg_valid(reg))
4641                                 return -EINVAL;
4642                 }
4643                 break;
4644         case PACKET3_CP_DMA:
4645                 r = si_vm_packet3_cp_dma_check(ib, idx);
4646                 if (r)
4647                         return r;
4648                 break;
4649         default:
4650                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4651                 return -EINVAL;
4652         }
4653         return 0;
4654 }
4655
4656 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4657 {
4658         int ret = 0;
4659         u32 idx = 0, i;
4660         struct radeon_cs_packet pkt;
4661
4662         do {
4663                 pkt.idx = idx;
4664                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4665                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4666                 pkt.one_reg_wr = 0;
4667                 switch (pkt.type) {
4668                 case RADEON_PACKET_TYPE0:
4669                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4670                         ret = -EINVAL;
4671                         break;
4672                 case RADEON_PACKET_TYPE2:
4673                         idx += 1;
4674                         break;
4675                 case RADEON_PACKET_TYPE3:
4676                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4677                         if (ib->is_const_ib)
4678                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4679                         else {
4680                                 switch (ib->ring) {
4681                                 case RADEON_RING_TYPE_GFX_INDEX:
4682                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4683                                         break;
4684                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4685                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4686                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4687                                         break;
4688                                 default:
4689                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4690                                         ret = -EINVAL;
4691                                         break;
4692                                 }
4693                         }
4694                         idx += pkt.count + 2;
4695                         break;
4696                 default:
4697                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4698                         ret = -EINVAL;
4699                         break;
4700                 }
4701                 if (ret) {
4702                         for (i = 0; i < ib->length_dw; i++) {
4703                                 if (i == idx)
4704                                         printk("\t0x%08x <---\n", ib->ptr[i]);
4705                                 else
4706                                         printk("\t0x%08x\n", ib->ptr[i]);
4707                         }
4708                         break;
4709                 }
4710         } while (idx < ib->length_dw);
4711
4712         return ret;
4713 }
4714
4715 /*
4716  * vm
4717  */
4718 int si_vm_init(struct radeon_device *rdev)
4719 {
4720         /* number of VMs */
4721         rdev->vm_manager.nvm = 16;
4722         /* base offset of vram pages */
4723         rdev->vm_manager.vram_base_offset = 0;
4724
4725         return 0;
4726 }
4727
4728 void si_vm_fini(struct radeon_device *rdev)
4729 {
4730 }
4731
4732 /**
4733  * si_vm_decode_fault - print human readable fault info
4734  *
4735  * @rdev: radeon_device pointer
4736  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4737  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4738  *
4739  * Print human readable fault information (SI).
4740  */
4741 static void si_vm_decode_fault(struct radeon_device *rdev,
4742                                u32 status, u32 addr)
4743 {
4744         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4745         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4746         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4747         char *block;
4748
4749         if (rdev->family == CHIP_TAHITI) {
4750                 switch (mc_id) {
4751                 case 160:
4752                 case 144:
4753                 case 96:
4754                 case 80:
4755                 case 224:
4756                 case 208:
4757                 case 32:
4758                 case 16:
4759                         block = "CB";
4760                         break;
4761                 case 161:
4762                 case 145:
4763                 case 97:
4764                 case 81:
4765                 case 225:
4766                 case 209:
4767                 case 33:
4768                 case 17:
4769                         block = "CB_FMASK";
4770                         break;
4771                 case 162:
4772                 case 146:
4773                 case 98:
4774                 case 82:
4775                 case 226:
4776                 case 210:
4777                 case 34:
4778                 case 18:
4779                         block = "CB_CMASK";
4780                         break;
4781                 case 163:
4782                 case 147:
4783                 case 99:
4784                 case 83:
4785                 case 227:
4786                 case 211:
4787                 case 35:
4788                 case 19:
4789                         block = "CB_IMMED";
4790                         break;
4791                 case 164:
4792                 case 148:
4793                 case 100:
4794                 case 84:
4795                 case 228:
4796                 case 212:
4797                 case 36:
4798                 case 20:
4799                         block = "DB";
4800                         break;
4801                 case 165:
4802                 case 149:
4803                 case 101:
4804                 case 85:
4805                 case 229:
4806                 case 213:
4807                 case 37:
4808                 case 21:
4809                         block = "DB_HTILE";
4810                         break;
4811                 case 167:
4812                 case 151:
4813                 case 103:
4814                 case 87:
4815                 case 231:
4816                 case 215:
4817                 case 39:
4818                 case 23:
4819                         block = "DB_STEN";
4820                         break;
4821                 case 72:
4822                 case 68:
4823                 case 64:
4824                 case 8:
4825                 case 4:
4826                 case 0:
4827                 case 136:
4828                 case 132:
4829                 case 128:
4830                 case 200:
4831                 case 196:
4832                 case 192:
4833                         block = "TC";
4834                         break;
4835                 case 112:
4836                 case 48:
4837                         block = "CP";
4838                         break;
4839                 case 49:
4840                 case 177:
4841                 case 50:
4842                 case 178:
4843                         block = "SH";
4844                         break;
4845                 case 53:
4846                 case 190:
4847                         block = "VGT";
4848                         break;
4849                 case 117:
4850                         block = "IH";
4851                         break;
4852                 case 51:
4853                 case 115:
4854                         block = "RLC";
4855                         break;
4856                 case 119:
4857                 case 183:
4858                         block = "DMA0";
4859                         break;
4860                 case 61:
4861                         block = "DMA1";
4862                         break;
4863                 case 248:
4864                 case 120:
4865                         block = "HDP";
4866                         break;
4867                 default:
4868                         block = "unknown";
4869                         break;
4870                 }
4871         } else {
4872                 switch (mc_id) {
4873                 case 32:
4874                 case 16:
4875                 case 96:
4876                 case 80:
4877                 case 160:
4878                 case 144:
4879                 case 224:
4880                 case 208:
4881                         block = "CB";
4882                         break;
4883                 case 33:
4884                 case 17:
4885                 case 97:
4886                 case 81:
4887                 case 161:
4888                 case 145:
4889                 case 225:
4890                 case 209:
4891                         block = "CB_FMASK";
4892                         break;
4893                 case 34:
4894                 case 18:
4895                 case 98:
4896                 case 82:
4897                 case 162:
4898                 case 146:
4899                 case 226:
4900                 case 210:
4901                         block = "CB_CMASK";
4902                         break;
4903                 case 35:
4904                 case 19:
4905                 case 99:
4906                 case 83:
4907                 case 163:
4908                 case 147:
4909                 case 227:
4910                 case 211:
4911                         block = "CB_IMMED";
4912                         break;
4913                 case 36:
4914                 case 20:
4915                 case 100:
4916                 case 84:
4917                 case 164:
4918                 case 148:
4919                 case 228:
4920                 case 212:
4921                         block = "DB";
4922                         break;
4923                 case 37:
4924                 case 21:
4925                 case 101:
4926                 case 85:
4927                 case 165:
4928                 case 149:
4929                 case 229:
4930                 case 213:
4931                         block = "DB_HTILE";
4932                         break;
4933                 case 39:
4934                 case 23:
4935                 case 103:
4936                 case 87:
4937                 case 167:
4938                 case 151:
4939                 case 231:
4940                 case 215:
4941                         block = "DB_STEN";
4942                         break;
4943                 case 72:
4944                 case 68:
4945                 case 8:
4946                 case 4:
4947                 case 136:
4948                 case 132:
4949                 case 200:
4950                 case 196:
4951                         block = "TC";
4952                         break;
4953                 case 112:
4954                 case 48:
4955                         block = "CP";
4956                         break;
4957                 case 49:
4958                 case 177:
4959                 case 50:
4960                 case 178:
4961                         block = "SH";
4962                         break;
4963                 case 53:
4964                         block = "VGT";
4965                         break;
4966                 case 117:
4967                         block = "IH";
4968                         break;
4969                 case 51:
4970                 case 115:
4971                         block = "RLC";
4972                         break;
4973                 case 119:
4974                 case 183:
4975                         block = "DMA0";
4976                         break;
4977                 case 61:
4978                         block = "DMA1";
4979                         break;
4980                 case 248:
4981                 case 120:
4982                         block = "HDP";
4983                         break;
4984                 default:
4985                         block = "unknown";
4986                         break;
4987                 }
4988         }
4989
4990         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4991                protections, vmid, addr,
4992                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4993                block, mc_id);
4994 }
4995
4996 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
4997                  unsigned vm_id, uint64_t pd_addr)
4998 {
4999         /* write new base address */
5000         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5001         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5002                                  WRITE_DATA_DST_SEL(0)));
5003
5004         if (vm_id < 8) {
5005                 radeon_ring_write(ring,
5006                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5007         } else {
5008                 radeon_ring_write(ring,
5009                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5010         }
5011         radeon_ring_write(ring, 0);
5012         radeon_ring_write(ring, pd_addr >> 12);
5013
5014         /* flush hdp cache */
5015         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5016         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5017                                  WRITE_DATA_DST_SEL(0)));
5018         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5019         radeon_ring_write(ring, 0);
5020         radeon_ring_write(ring, 0x1);
5021
5022         /* bits 0-15 are the VM contexts0-15 */
5023         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5024         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5025                                  WRITE_DATA_DST_SEL(0)));
5026         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5027         radeon_ring_write(ring, 0);
5028         radeon_ring_write(ring, 1 << vm_id);
5029
5030         /* wait for the invalidate to complete */
5031         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5032         radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5033                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5034         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5035         radeon_ring_write(ring, 0);
5036         radeon_ring_write(ring, 0); /* ref */
5037         radeon_ring_write(ring, 0); /* mask */
5038         radeon_ring_write(ring, 0x20); /* poll interval */
5039
5040         /* sync PFP to ME, otherwise we might get invalid PFP reads */
5041         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5042         radeon_ring_write(ring, 0x0);
5043 }
5044
5045 /*
5046  *  Power and clock gating
5047  */
5048 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5049 {
5050         int i;
5051
5052         for (i = 0; i < rdev->usec_timeout; i++) {
5053                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5054                         break;
5055                 udelay(1);
5056         }
5057
5058         for (i = 0; i < rdev->usec_timeout; i++) {
5059                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5060                         break;
5061                 udelay(1);
5062         }
5063 }
5064
5065 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5066                                          bool enable)
5067 {
5068         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5069         u32 mask;
5070         int i;
5071
5072         if (enable)
5073                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5074         else
5075                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5076         WREG32(CP_INT_CNTL_RING0, tmp);
5077
5078         if (!enable) {
5079                 /* read a gfx register */
5080                 tmp = RREG32(DB_DEPTH_INFO);
5081
5082                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5083                 for (i = 0; i < rdev->usec_timeout; i++) {
5084                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5085                                 break;
5086                         udelay(1);
5087                 }
5088         }
5089 }
5090
5091 static void si_set_uvd_dcm(struct radeon_device *rdev,
5092                            bool sw_mode)
5093 {
5094         u32 tmp, tmp2;
5095
5096         tmp = RREG32(UVD_CGC_CTRL);
5097         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5098         tmp |= DCM | CG_DT(1) | CLK_OD(4);
5099
5100         if (sw_mode) {
5101                 tmp &= ~0x7ffff800;
5102                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5103         } else {
5104                 tmp |= 0x7ffff800;
5105                 tmp2 = 0;
5106         }
5107
5108         WREG32(UVD_CGC_CTRL, tmp);
5109         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5110 }
5111
5112 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5113 {
5114         bool hw_mode = true;
5115
5116         if (hw_mode) {
5117                 si_set_uvd_dcm(rdev, false);
5118         } else {
5119                 u32 tmp = RREG32(UVD_CGC_CTRL);
5120                 tmp &= ~DCM;
5121                 WREG32(UVD_CGC_CTRL, tmp);
5122         }
5123 }
5124
5125 static u32 si_halt_rlc(struct radeon_device *rdev)
5126 {
5127         u32 data, orig;
5128
5129         orig = data = RREG32(RLC_CNTL);
5130
5131         if (data & RLC_ENABLE) {
5132                 data &= ~RLC_ENABLE;
5133                 WREG32(RLC_CNTL, data);
5134
5135                 si_wait_for_rlc_serdes(rdev);
5136         }
5137
5138         return orig;
5139 }
5140
5141 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5142 {
5143         u32 tmp;
5144
5145         tmp = RREG32(RLC_CNTL);
5146         if (tmp != rlc)
5147                 WREG32(RLC_CNTL, rlc);
5148 }
5149
5150 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5151 {
5152         u32 data, orig;
5153
5154         orig = data = RREG32(DMA_PG);
5155         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5156                 data |= PG_CNTL_ENABLE;
5157         else
5158                 data &= ~PG_CNTL_ENABLE;
5159         if (orig != data)
5160                 WREG32(DMA_PG, data);
5161 }
5162
5163 static void si_init_dma_pg(struct radeon_device *rdev)
5164 {
5165         u32 tmp;
5166
5167         WREG32(DMA_PGFSM_WRITE,  0x00002000);
5168         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5169
5170         for (tmp = 0; tmp < 5; tmp++)
5171                 WREG32(DMA_PGFSM_WRITE, 0);
5172 }
5173
5174 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5175                                bool enable)
5176 {
5177         u32 tmp;
5178
5179         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5180                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5181                 WREG32(RLC_TTOP_D, tmp);
5182
5183                 tmp = RREG32(RLC_PG_CNTL);
5184                 tmp |= GFX_PG_ENABLE;
5185                 WREG32(RLC_PG_CNTL, tmp);
5186
5187                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5188                 tmp |= AUTO_PG_EN;
5189                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5190         } else {
5191                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5192                 tmp &= ~AUTO_PG_EN;
5193                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5194
5195                 tmp = RREG32(DB_RENDER_CONTROL);
5196         }
5197 }
5198
5199 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5200 {
5201         u32 tmp;
5202
5203         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5204
5205         tmp = RREG32(RLC_PG_CNTL);
5206         tmp |= GFX_PG_SRC;
5207         WREG32(RLC_PG_CNTL, tmp);
5208
5209         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5210
5211         tmp = RREG32(RLC_AUTO_PG_CTRL);
5212
5213         tmp &= ~GRBM_REG_SGIT_MASK;
5214         tmp |= GRBM_REG_SGIT(0x700);
5215         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5216         WREG32(RLC_AUTO_PG_CTRL, tmp);
5217 }
5218
5219 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5220 {
5221         u32 mask = 0, tmp, tmp1;
5222         int i;
5223
5224         si_select_se_sh(rdev, se, sh);
5225         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5226         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5227         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5228
5229         tmp &= 0xffff0000;
5230
5231         tmp |= tmp1;
5232         tmp >>= 16;
5233
5234         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5235                 mask <<= 1;
5236                 mask |= 1;
5237         }
5238
5239         return (~tmp) & mask;
5240 }
5241
5242 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5243 {
5244         u32 i, j, k, active_cu_number = 0;
5245         u32 mask, counter, cu_bitmap;
5246         u32 tmp = 0;
5247
5248         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5249                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5250                         mask = 1;
5251                         cu_bitmap = 0;
5252                         counter  = 0;
5253                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5254                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5255                                         if (counter < 2)
5256                                                 cu_bitmap |= mask;
5257                                         counter++;
5258                                 }
5259                                 mask <<= 1;
5260                         }
5261
5262                         active_cu_number += counter;
5263                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5264                 }
5265         }
5266
5267         WREG32(RLC_PG_AO_CU_MASK, tmp);
5268
5269         tmp = RREG32(RLC_MAX_PG_CU);
5270         tmp &= ~MAX_PU_CU_MASK;
5271         tmp |= MAX_PU_CU(active_cu_number);
5272         WREG32(RLC_MAX_PG_CU, tmp);
5273 }
5274
5275 static void si_enable_cgcg(struct radeon_device *rdev,
5276                            bool enable)
5277 {
5278         u32 data, orig, tmp;
5279
5280         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5281
5282         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5283                 si_enable_gui_idle_interrupt(rdev, true);
5284
5285                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5286
5287                 tmp = si_halt_rlc(rdev);
5288
5289                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5290                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5291                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5292
5293                 si_wait_for_rlc_serdes(rdev);
5294
5295                 si_update_rlc(rdev, tmp);
5296
5297                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5298
5299                 data |= CGCG_EN | CGLS_EN;
5300         } else {
5301                 si_enable_gui_idle_interrupt(rdev, false);
5302
5303                 RREG32(CB_CGTT_SCLK_CTRL);
5304                 RREG32(CB_CGTT_SCLK_CTRL);
5305                 RREG32(CB_CGTT_SCLK_CTRL);
5306                 RREG32(CB_CGTT_SCLK_CTRL);
5307
5308                 data &= ~(CGCG_EN | CGLS_EN);
5309         }
5310
5311         if (orig != data)
5312                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5313 }
5314
5315 static void si_enable_mgcg(struct radeon_device *rdev,
5316                            bool enable)
5317 {
5318         u32 data, orig, tmp = 0;
5319
5320         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5321                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5322                 data = 0x96940200;
5323                 if (orig != data)
5324                         WREG32(CGTS_SM_CTRL_REG, data);
5325
5326                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5327                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5328                         data |= CP_MEM_LS_EN;
5329                         if (orig != data)
5330                                 WREG32(CP_MEM_SLP_CNTL, data);
5331                 }
5332
5333                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5334                 data &= 0xffffffc0;
5335                 if (orig != data)
5336                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5337
5338                 tmp = si_halt_rlc(rdev);
5339
5340                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5341                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5342                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5343
5344                 si_update_rlc(rdev, tmp);
5345         } else {
5346                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5347                 data |= 0x00000003;
5348                 if (orig != data)
5349                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5350
5351                 data = RREG32(CP_MEM_SLP_CNTL);
5352                 if (data & CP_MEM_LS_EN) {
5353                         data &= ~CP_MEM_LS_EN;
5354                         WREG32(CP_MEM_SLP_CNTL, data);
5355                 }
5356                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5357                 data |= LS_OVERRIDE | OVERRIDE;
5358                 if (orig != data)
5359                         WREG32(CGTS_SM_CTRL_REG, data);
5360
5361                 tmp = si_halt_rlc(rdev);
5362
5363                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5364                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5365                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5366
5367                 si_update_rlc(rdev, tmp);
5368         }
5369 }
5370
5371 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5372                                bool enable)
5373 {
5374         u32 orig, data, tmp;
5375
5376         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5377                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5378                 tmp |= 0x3fff;
5379                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5380
5381                 orig = data = RREG32(UVD_CGC_CTRL);
5382                 data |= DCM;
5383                 if (orig != data)
5384                         WREG32(UVD_CGC_CTRL, data);
5385
5386                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5387                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5388         } else {
5389                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5390                 tmp &= ~0x3fff;
5391                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5392
5393                 orig = data = RREG32(UVD_CGC_CTRL);
5394                 data &= ~DCM;
5395                 if (orig != data)
5396                         WREG32(UVD_CGC_CTRL, data);
5397
5398                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5399                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5400         }
5401 }
5402
5403 static const u32 mc_cg_registers[] =
5404 {
5405         MC_HUB_MISC_HUB_CG,
5406         MC_HUB_MISC_SIP_CG,
5407         MC_HUB_MISC_VM_CG,
5408         MC_XPB_CLK_GAT,
5409         ATC_MISC_CG,
5410         MC_CITF_MISC_WR_CG,
5411         MC_CITF_MISC_RD_CG,
5412         MC_CITF_MISC_VM_CG,
5413         VM_L2_CG,
5414 };
5415
5416 static void si_enable_mc_ls(struct radeon_device *rdev,
5417                             bool enable)
5418 {
5419         int i;
5420         u32 orig, data;
5421
5422         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5423                 orig = data = RREG32(mc_cg_registers[i]);
5424                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5425                         data |= MC_LS_ENABLE;
5426                 else
5427                         data &= ~MC_LS_ENABLE;
5428                 if (data != orig)
5429                         WREG32(mc_cg_registers[i], data);
5430         }
5431 }
5432
5433 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5434                                bool enable)
5435 {
5436         int i;
5437         u32 orig, data;
5438
5439         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5440                 orig = data = RREG32(mc_cg_registers[i]);
5441                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5442                         data |= MC_CG_ENABLE;
5443                 else
5444                         data &= ~MC_CG_ENABLE;
5445                 if (data != orig)
5446                         WREG32(mc_cg_registers[i], data);
5447         }
5448 }
5449
5450 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5451                                bool enable)
5452 {
5453         u32 orig, data, offset;
5454         int i;
5455
5456         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5457                 for (i = 0; i < 2; i++) {
5458                         if (i == 0)
5459                                 offset = DMA0_REGISTER_OFFSET;
5460                         else
5461                                 offset = DMA1_REGISTER_OFFSET;
5462                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5463                         data &= ~MEM_POWER_OVERRIDE;
5464                         if (data != orig)
5465                                 WREG32(DMA_POWER_CNTL + offset, data);
5466                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5467                 }
5468         } else {
5469                 for (i = 0; i < 2; i++) {
5470                         if (i == 0)
5471                                 offset = DMA0_REGISTER_OFFSET;
5472                         else
5473                                 offset = DMA1_REGISTER_OFFSET;
5474                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5475                         data |= MEM_POWER_OVERRIDE;
5476                         if (data != orig)
5477                                 WREG32(DMA_POWER_CNTL + offset, data);
5478
5479                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5480                         data = 0xff000000;
5481                         if (data != orig)
5482                                 WREG32(DMA_CLK_CTRL + offset, data);
5483                 }
5484         }
5485 }
5486
5487 static void si_enable_bif_mgls(struct radeon_device *rdev,
5488                                bool enable)
5489 {
5490         u32 orig, data;
5491
5492         orig = data = RREG32_PCIE(PCIE_CNTL2);
5493
5494         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5495                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5496                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5497         else
5498                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5499                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5500
5501         if (orig != data)
5502                 WREG32_PCIE(PCIE_CNTL2, data);
5503 }
5504
5505 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5506                                bool enable)
5507 {
5508         u32 orig, data;
5509
5510         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5511
5512         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5513                 data &= ~CLOCK_GATING_DIS;
5514         else
5515                 data |= CLOCK_GATING_DIS;
5516
5517         if (orig != data)
5518                 WREG32(HDP_HOST_PATH_CNTL, data);
5519 }
5520
5521 static void si_enable_hdp_ls(struct radeon_device *rdev,
5522                              bool enable)
5523 {
5524         u32 orig, data;
5525
5526         orig = data = RREG32(HDP_MEM_POWER_LS);
5527
5528         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5529                 data |= HDP_LS_ENABLE;
5530         else
5531                 data &= ~HDP_LS_ENABLE;
5532
5533         if (orig != data)
5534                 WREG32(HDP_MEM_POWER_LS, data);
5535 }
5536
5537 static void si_update_cg(struct radeon_device *rdev,
5538                          u32 block, bool enable)
5539 {
5540         if (block & RADEON_CG_BLOCK_GFX) {
5541                 si_enable_gui_idle_interrupt(rdev, false);
5542                 /* order matters! */
5543                 if (enable) {
5544                         si_enable_mgcg(rdev, true);
5545                         si_enable_cgcg(rdev, true);
5546                 } else {
5547                         si_enable_cgcg(rdev, false);
5548                         si_enable_mgcg(rdev, false);
5549                 }
5550                 si_enable_gui_idle_interrupt(rdev, true);
5551         }
5552
5553         if (block & RADEON_CG_BLOCK_MC) {
5554                 si_enable_mc_mgcg(rdev, enable);
5555                 si_enable_mc_ls(rdev, enable);
5556         }
5557
5558         if (block & RADEON_CG_BLOCK_SDMA) {
5559                 si_enable_dma_mgcg(rdev, enable);
5560         }
5561
5562         if (block & RADEON_CG_BLOCK_BIF) {
5563                 si_enable_bif_mgls(rdev, enable);
5564         }
5565
5566         if (block & RADEON_CG_BLOCK_UVD) {
5567                 if (rdev->has_uvd) {
5568                         si_enable_uvd_mgcg(rdev, enable);
5569                 }
5570         }
5571
5572         if (block & RADEON_CG_BLOCK_HDP) {
5573                 si_enable_hdp_mgcg(rdev, enable);
5574                 si_enable_hdp_ls(rdev, enable);
5575         }
5576 }
5577
5578 static void si_init_cg(struct radeon_device *rdev)
5579 {
5580         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5581                             RADEON_CG_BLOCK_MC |
5582                             RADEON_CG_BLOCK_SDMA |
5583                             RADEON_CG_BLOCK_BIF |
5584                             RADEON_CG_BLOCK_HDP), true);
5585         if (rdev->has_uvd) {
5586                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5587                 si_init_uvd_internal_cg(rdev);
5588         }
5589 }
5590
5591 static void si_fini_cg(struct radeon_device *rdev)
5592 {
5593         if (rdev->has_uvd) {
5594                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5595         }
5596         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5597                             RADEON_CG_BLOCK_MC |
5598                             RADEON_CG_BLOCK_SDMA |
5599                             RADEON_CG_BLOCK_BIF |
5600                             RADEON_CG_BLOCK_HDP), false);
5601 }
5602
5603 u32 si_get_csb_size(struct radeon_device *rdev)
5604 {
5605         u32 count = 0;
5606         const struct cs_section_def *sect = NULL;
5607         const struct cs_extent_def *ext = NULL;
5608
5609         if (rdev->rlc.cs_data == NULL)
5610                 return 0;
5611
5612         /* begin clear state */
5613         count += 2;
5614         /* context control state */
5615         count += 3;
5616
5617         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5618                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5619                         if (sect->id == SECT_CONTEXT)
5620                                 count += 2 + ext->reg_count;
5621                         else
5622                                 return 0;
5623                 }
5624         }
5625         /* pa_sc_raster_config */
5626         count += 3;
5627         /* end clear state */
5628         count += 2;
5629         /* clear state */
5630         count += 2;
5631
5632         return count;
5633 }
5634
5635 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5636 {
5637         u32 count = 0, i;
5638         const struct cs_section_def *sect = NULL;
5639         const struct cs_extent_def *ext = NULL;
5640
5641         if (rdev->rlc.cs_data == NULL)
5642                 return;
5643         if (buffer == NULL)
5644                 return;
5645
5646         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5647         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5648
5649         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5650         buffer[count++] = cpu_to_le32(0x80000000);
5651         buffer[count++] = cpu_to_le32(0x80000000);
5652
5653         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5654                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5655                         if (sect->id == SECT_CONTEXT) {
5656                                 buffer[count++] =
5657                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5658                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5659                                 for (i = 0; i < ext->reg_count; i++)
5660                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5661                         } else {
5662                                 return;
5663                         }
5664                 }
5665         }
5666
5667         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5668         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5669         switch (rdev->family) {
5670         case CHIP_TAHITI:
5671         case CHIP_PITCAIRN:
5672                 buffer[count++] = cpu_to_le32(0x2a00126a);
5673                 break;
5674         case CHIP_VERDE:
5675                 buffer[count++] = cpu_to_le32(0x0000124a);
5676                 break;
5677         case CHIP_OLAND:
5678                 buffer[count++] = cpu_to_le32(0x00000082);
5679                 break;
5680         case CHIP_HAINAN:
5681                 buffer[count++] = cpu_to_le32(0x00000000);
5682                 break;
5683         default:
5684                 buffer[count++] = cpu_to_le32(0x00000000);
5685                 break;
5686         }
5687
5688         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5689         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5690
5691         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5692         buffer[count++] = cpu_to_le32(0);
5693 }
5694
5695 static void si_init_pg(struct radeon_device *rdev)
5696 {
5697         if (rdev->pg_flags) {
5698                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5699                         si_init_dma_pg(rdev);
5700                 }
5701                 si_init_ao_cu_mask(rdev);
5702                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5703                         si_init_gfx_cgpg(rdev);
5704                 } else {
5705                         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5706                         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5707                 }
5708                 si_enable_dma_pg(rdev, true);
5709                 si_enable_gfx_cgpg(rdev, true);
5710         } else {
5711                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5712                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5713         }
5714 }
5715
5716 static void si_fini_pg(struct radeon_device *rdev)
5717 {
5718         if (rdev->pg_flags) {
5719                 si_enable_dma_pg(rdev, false);
5720                 si_enable_gfx_cgpg(rdev, false);
5721         }
5722 }
5723
5724 /*
5725  * RLC
5726  */
5727 void si_rlc_reset(struct radeon_device *rdev)
5728 {
5729         u32 tmp = RREG32(GRBM_SOFT_RESET);
5730
5731         tmp |= SOFT_RESET_RLC;
5732         WREG32(GRBM_SOFT_RESET, tmp);
5733         udelay(50);
5734         tmp &= ~SOFT_RESET_RLC;
5735         WREG32(GRBM_SOFT_RESET, tmp);
5736         udelay(50);
5737 }
5738
5739 static void si_rlc_stop(struct radeon_device *rdev)
5740 {
5741         WREG32(RLC_CNTL, 0);
5742
5743         si_enable_gui_idle_interrupt(rdev, false);
5744
5745         si_wait_for_rlc_serdes(rdev);
5746 }
5747
5748 static void si_rlc_start(struct radeon_device *rdev)
5749 {
5750         WREG32(RLC_CNTL, RLC_ENABLE);
5751
5752         si_enable_gui_idle_interrupt(rdev, true);
5753
5754         udelay(50);
5755 }
5756
5757 static bool si_lbpw_supported(struct radeon_device *rdev)
5758 {
5759         u32 tmp;
5760
5761         /* Enable LBPW only for DDR3 */
5762         tmp = RREG32(MC_SEQ_MISC0);
5763         if ((tmp & 0xF0000000) == 0xB0000000)
5764                 return true;
5765         return false;
5766 }
5767
5768 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5769 {
5770         u32 tmp;
5771
5772         tmp = RREG32(RLC_LB_CNTL);
5773         if (enable)
5774                 tmp |= LOAD_BALANCE_ENABLE;
5775         else
5776                 tmp &= ~LOAD_BALANCE_ENABLE;
5777         WREG32(RLC_LB_CNTL, tmp);
5778
5779         if (!enable) {
5780                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5781                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5782         }
5783 }
5784
5785 static int si_rlc_resume(struct radeon_device *rdev)
5786 {
5787         u32 i;
5788
5789         if (!rdev->rlc_fw)
5790                 return -EINVAL;
5791
5792         si_rlc_stop(rdev);
5793
5794         si_rlc_reset(rdev);
5795
5796         si_init_pg(rdev);
5797
5798         si_init_cg(rdev);
5799
5800         WREG32(RLC_RL_BASE, 0);
5801         WREG32(RLC_RL_SIZE, 0);
5802         WREG32(RLC_LB_CNTL, 0);
5803         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5804         WREG32(RLC_LB_CNTR_INIT, 0);
5805         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5806
5807         WREG32(RLC_MC_CNTL, 0);
5808         WREG32(RLC_UCODE_CNTL, 0);
5809
5810         if (rdev->new_fw) {
5811                 const struct rlc_firmware_header_v1_0 *hdr =
5812                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5813                 u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5814                 const __le32 *fw_data = (const __le32 *)
5815                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5816
5817                 radeon_ucode_print_rlc_hdr(&hdr->header);
5818
5819                 for (i = 0; i < fw_size; i++) {
5820                         WREG32(RLC_UCODE_ADDR, i);
5821                         WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5822                 }
5823         } else {
5824                 const __be32 *fw_data =
5825                         (const __be32 *)rdev->rlc_fw->data;
5826                 for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5827                         WREG32(RLC_UCODE_ADDR, i);
5828                         WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5829                 }
5830         }
5831         WREG32(RLC_UCODE_ADDR, 0);
5832
5833         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5834
5835         si_rlc_start(rdev);
5836
5837         return 0;
5838 }
5839
5840 static void si_enable_interrupts(struct radeon_device *rdev)
5841 {
5842         u32 ih_cntl = RREG32(IH_CNTL);
5843         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5844
5845         ih_cntl |= ENABLE_INTR;
5846         ih_rb_cntl |= IH_RB_ENABLE;
5847         WREG32(IH_CNTL, ih_cntl);
5848         WREG32(IH_RB_CNTL, ih_rb_cntl);
5849         rdev->ih.enabled = true;
5850 }
5851
5852 static void si_disable_interrupts(struct radeon_device *rdev)
5853 {
5854         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5855         u32 ih_cntl = RREG32(IH_CNTL);
5856
5857         ih_rb_cntl &= ~IH_RB_ENABLE;
5858         ih_cntl &= ~ENABLE_INTR;
5859         WREG32(IH_RB_CNTL, ih_rb_cntl);
5860         WREG32(IH_CNTL, ih_cntl);
5861         /* set rptr, wptr to 0 */
5862         WREG32(IH_RB_RPTR, 0);
5863         WREG32(IH_RB_WPTR, 0);
5864         rdev->ih.enabled = false;
5865         rdev->ih.rptr = 0;
5866 }
5867
5868 static void si_disable_interrupt_state(struct radeon_device *rdev)
5869 {
5870         int i;
5871         u32 tmp;
5872
5873         tmp = RREG32(CP_INT_CNTL_RING0) &
5874                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5875         WREG32(CP_INT_CNTL_RING0, tmp);
5876         WREG32(CP_INT_CNTL_RING1, 0);
5877         WREG32(CP_INT_CNTL_RING2, 0);
5878         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5879         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5880         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5881         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5882         WREG32(GRBM_INT_CNTL, 0);
5883         WREG32(SRBM_INT_CNTL, 0);
5884         for (i = 0; i < rdev->num_crtc; i++)
5885                 WREG32(INT_MASK + crtc_offsets[i], 0);
5886         for (i = 0; i < rdev->num_crtc; i++)
5887                 WREG32(GRPH_INT_CONTROL + crtc_offsets[i], 0);
5888
5889         if (!ASIC_IS_NODCE(rdev)) {
5890                 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5891
5892                 for (i = 0; i < 6; i++)
5893                         WREG32_AND(DC_HPDx_INT_CONTROL(i),
5894                                    DC_HPDx_INT_POLARITY);
5895         }
5896 }
5897
5898 static int si_irq_init(struct radeon_device *rdev)
5899 {
5900         int ret = 0;
5901         int rb_bufsz;
5902         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5903
5904         /* allocate ring */
5905         ret = r600_ih_ring_alloc(rdev);
5906         if (ret)
5907                 return ret;
5908
5909         /* disable irqs */
5910         si_disable_interrupts(rdev);
5911
5912         /* init rlc */
5913         ret = si_rlc_resume(rdev);
5914         if (ret) {
5915                 r600_ih_ring_fini(rdev);
5916                 return ret;
5917         }
5918
5919         /* setup interrupt control */
5920         /* set dummy read address to dummy page address */
5921         WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
5922         interrupt_cntl = RREG32(INTERRUPT_CNTL);
5923         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5924          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5925          */
5926         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5927         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5928         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5929         WREG32(INTERRUPT_CNTL, interrupt_cntl);
5930
5931         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5932         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5933
5934         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5935                       IH_WPTR_OVERFLOW_CLEAR |
5936                       (rb_bufsz << 1));
5937
5938         if (rdev->wb.enabled)
5939                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5940
5941         /* set the writeback address whether it's enabled or not */
5942         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5943         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5944
5945         WREG32(IH_RB_CNTL, ih_rb_cntl);
5946
5947         /* set rptr, wptr to 0 */
5948         WREG32(IH_RB_RPTR, 0);
5949         WREG32(IH_RB_WPTR, 0);
5950
5951         /* Default settings for IH_CNTL (disabled at first) */
5952         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5953         /* RPTR_REARM only works if msi's are enabled */
5954         if (rdev->msi_enabled)
5955                 ih_cntl |= RPTR_REARM;
5956         WREG32(IH_CNTL, ih_cntl);
5957
5958         /* force the active interrupt state to all disabled */
5959         si_disable_interrupt_state(rdev);
5960
5961         pci_set_master(rdev->pdev);
5962
5963         /* enable irqs */
5964         si_enable_interrupts(rdev);
5965
5966         return ret;
5967 }
5968
5969 /* The order we write back each register here is important */
5970 int si_irq_set(struct radeon_device *rdev)
5971 {
5972         int i;
5973         u32 cp_int_cntl;
5974         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5975         u32 grbm_int_cntl = 0;
5976         u32 dma_cntl, dma_cntl1;
5977         u32 thermal_int = 0;
5978
5979         if (!rdev->irq.installed) {
5980                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5981                 return -EINVAL;
5982         }
5983         /* don't enable anything if the ih is disabled */
5984         if (!rdev->ih.enabled) {
5985                 si_disable_interrupts(rdev);
5986                 /* force the active interrupt state to all disabled */
5987                 si_disable_interrupt_state(rdev);
5988                 return 0;
5989         }
5990
5991         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5992                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5993
5994         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5995         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5996
5997         thermal_int = RREG32(CG_THERMAL_INT) &
5998                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5999
6000         /* enable CP interrupts on all rings */
6001         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6002                 DRM_DEBUG("si_irq_set: sw int gfx\n");
6003                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6004         }
6005         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6006                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6007                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6008         }
6009         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6010                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6011                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6012         }
6013         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6014                 DRM_DEBUG("si_irq_set: sw int dma\n");
6015                 dma_cntl |= TRAP_ENABLE;
6016         }
6017
6018         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6019                 DRM_DEBUG("si_irq_set: sw int dma1\n");
6020                 dma_cntl1 |= TRAP_ENABLE;
6021         }
6022
6023         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6024         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6025         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6026
6027         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6028         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6029
6030         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6031
6032         if (rdev->irq.dpm_thermal) {
6033                 DRM_DEBUG("dpm thermal\n");
6034                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6035         }
6036
6037         for (i = 0; i < rdev->num_crtc; i++) {
6038                 radeon_irq_kms_set_irq_n_enabled(
6039                     rdev, INT_MASK + crtc_offsets[i], VBLANK_INT_MASK,
6040                     rdev->irq.crtc_vblank_int[i] ||
6041                     atomic_read(&rdev->irq.pflip[i]), "vblank", i);
6042         }
6043
6044         for (i = 0; i < rdev->num_crtc; i++)
6045                 WREG32(GRPH_INT_CONTROL + crtc_offsets[i], GRPH_PFLIP_INT_MASK);
6046
6047         if (!ASIC_IS_NODCE(rdev)) {
6048                 for (i = 0; i < 6; i++) {
6049                         radeon_irq_kms_set_irq_n_enabled(
6050                             rdev, DC_HPDx_INT_CONTROL(i),
6051                             DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN,
6052                             rdev->irq.hpd[i], "HPD", i);
6053                 }
6054         }
6055
6056         WREG32(CG_THERMAL_INT, thermal_int);
6057
6058         /* posting read */
6059         RREG32(SRBM_STATUS);
6060
6061         return 0;
6062 }
6063
6064 /* The order we write back each register here is important */
6065 static inline void si_irq_ack(struct radeon_device *rdev)
6066 {
6067         int i, j;
6068         u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6069         u32 *grph_int = rdev->irq.stat_regs.evergreen.grph_int;
6070
6071         if (ASIC_IS_NODCE(rdev))
6072                 return;
6073
6074         for (i = 0; i < 6; i++) {
6075                 disp_int[i] = RREG32(si_disp_int_status[i]);
6076                 if (i < rdev->num_crtc)
6077                         grph_int[i] = RREG32(GRPH_INT_STATUS + crtc_offsets[i]);
6078         }
6079
6080         /* We write back each interrupt register in pairs of two */
6081         for (i = 0; i < rdev->num_crtc; i += 2) {
6082                 for (j = i; j < (i + 2); j++) {
6083                         if (grph_int[j] & GRPH_PFLIP_INT_OCCURRED)
6084                                 WREG32(GRPH_INT_STATUS + crtc_offsets[j],
6085                                        GRPH_PFLIP_INT_CLEAR);
6086                 }
6087
6088                 for (j = i; j < (i + 2); j++) {
6089                         if (disp_int[j] & LB_D1_VBLANK_INTERRUPT)
6090                                 WREG32(VBLANK_STATUS + crtc_offsets[j],
6091                                        VBLANK_ACK);
6092                         if (disp_int[j] & LB_D1_VLINE_INTERRUPT)
6093                                 WREG32(VLINE_STATUS + crtc_offsets[j],
6094                                        VLINE_ACK);
6095                 }
6096         }
6097
6098         for (i = 0; i < 6; i++) {
6099                 if (disp_int[i] & DC_HPD1_INTERRUPT)
6100                         WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_INT_ACK);
6101         }
6102
6103         for (i = 0; i < 6; i++) {
6104                 if (disp_int[i] & DC_HPD1_RX_INTERRUPT)
6105                         WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_RX_INT_ACK);
6106         }
6107 }
6108
6109 static void si_irq_disable(struct radeon_device *rdev)
6110 {
6111         si_disable_interrupts(rdev);
6112         /* Wait and acknowledge irq */
6113         mdelay(1);
6114         si_irq_ack(rdev);
6115         si_disable_interrupt_state(rdev);
6116 }
6117
6118 static void si_irq_suspend(struct radeon_device *rdev)
6119 {
6120         si_irq_disable(rdev);
6121         si_rlc_stop(rdev);
6122 }
6123
6124 static void si_irq_fini(struct radeon_device *rdev)
6125 {
6126         si_irq_suspend(rdev);
6127         r600_ih_ring_fini(rdev);
6128 }
6129
6130 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6131 {
6132         u32 wptr, tmp;
6133
6134         if (rdev->wb.enabled)
6135                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6136         else
6137                 wptr = RREG32(IH_RB_WPTR);
6138
6139         if (wptr & RB_OVERFLOW) {
6140                 wptr &= ~RB_OVERFLOW;
6141                 /* When a ring buffer overflow happen start parsing interrupt
6142                  * from the last not overwritten vector (wptr + 16). Hopefully
6143                  * this should allow us to catchup.
6144                  */
6145                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6146                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6147                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6148                 tmp = RREG32(IH_RB_CNTL);
6149                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6150                 WREG32(IH_RB_CNTL, tmp);
6151         }
6152         return (wptr & rdev->ih.ptr_mask);
6153 }
6154
6155 /*        SI IV Ring
6156  * Each IV ring entry is 128 bits:
6157  * [7:0]    - interrupt source id
6158  * [31:8]   - reserved
6159  * [59:32]  - interrupt source data
6160  * [63:60]  - reserved
6161  * [71:64]  - RINGID
6162  * [79:72]  - VMID
6163  * [127:80] - reserved
6164  */
6165 int si_irq_process(struct radeon_device *rdev)
6166 {
6167         u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6168         u32 crtc_idx, hpd_idx;
6169         u32 mask;
6170         u32 wptr;
6171         u32 rptr;
6172         u32 src_id, src_data, ring_id;
6173         u32 ring_index;
6174         bool queue_hotplug = false;
6175         bool queue_dp = false;
6176         bool queue_thermal = false;
6177         u32 status, addr;
6178         const char *event_name;
6179
6180         if (!rdev->ih.enabled || rdev->shutdown)
6181                 return IRQ_NONE;
6182
6183         wptr = si_get_ih_wptr(rdev);
6184
6185 restart_ih:
6186         /* is somebody else already processing irqs? */
6187         if (atomic_xchg(&rdev->ih.lock, 1))
6188                 return IRQ_NONE;
6189
6190         rptr = rdev->ih.rptr;
6191         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6192
6193         /* Order reading of wptr vs. reading of IH ring data */
6194         rmb();
6195
6196         /* display interrupts */
6197         si_irq_ack(rdev);
6198
6199         while (rptr != wptr) {
6200                 /* wptr/rptr are in bytes! */
6201                 ring_index = rptr / 4;
6202                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6203                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6204                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6205
6206                 switch (src_id) {
6207                 case 1: /* D1 vblank/vline */
6208                 case 2: /* D2 vblank/vline */
6209                 case 3: /* D3 vblank/vline */
6210                 case 4: /* D4 vblank/vline */
6211                 case 5: /* D5 vblank/vline */
6212                 case 6: /* D6 vblank/vline */
6213                         crtc_idx = src_id - 1;
6214
6215                         if (src_data == 0) { /* vblank */
6216                                 mask = LB_D1_VBLANK_INTERRUPT;
6217                                 event_name = "vblank";
6218
6219                                 if (rdev->irq.crtc_vblank_int[crtc_idx]) {
6220                                         drm_handle_vblank(rdev->ddev, crtc_idx);
6221                                         rdev->pm.vblank_sync = true;
6222                                         wake_up(&rdev->irq.vblank_queue);
6223                                 }
6224                                 if (atomic_read(&rdev->irq.pflip[crtc_idx])) {
6225                                         radeon_crtc_handle_vblank(rdev,
6226                                                                   crtc_idx);
6227                                 }
6228
6229                         } else if (src_data == 1) { /* vline */
6230                                 mask = LB_D1_VLINE_INTERRUPT;
6231                                 event_name = "vline";
6232                         } else {
6233                                 DRM_DEBUG("Unhandled interrupt: %d %d\n",
6234                                           src_id, src_data);
6235                                 break;
6236                         }
6237
6238                         if (!(disp_int[crtc_idx] & mask)) {
6239                                 DRM_DEBUG("IH: D%d %s - IH event w/o asserted irq bit?\n",
6240                                           crtc_idx + 1, event_name);
6241                         }
6242
6243                         disp_int[crtc_idx] &= ~mask;
6244                         DRM_DEBUG("IH: D%d %s\n", crtc_idx + 1, event_name);
6245
6246                         break;
6247                 case 8: /* D1 page flip */
6248                 case 10: /* D2 page flip */
6249                 case 12: /* D3 page flip */
6250                 case 14: /* D4 page flip */
6251                 case 16: /* D5 page flip */
6252                 case 18: /* D6 page flip */
6253                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6254                         if (radeon_use_pflipirq > 0)
6255                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6256                         break;
6257                 case 42: /* HPD hotplug */
6258                         if (src_data <= 5) {
6259                                 hpd_idx = src_data;
6260                                 mask = DC_HPD1_INTERRUPT;
6261                                 queue_hotplug = true;
6262                                 event_name = "HPD";
6263
6264                         } else if (src_data <= 11) {
6265                                 hpd_idx = src_data - 6;
6266                                 mask = DC_HPD1_RX_INTERRUPT;
6267                                 queue_dp = true;
6268                                 event_name = "HPD_RX";
6269
6270                         } else {
6271                                 DRM_DEBUG("Unhandled interrupt: %d %d\n",
6272                                           src_id, src_data);
6273                                 break;
6274                         }
6275
6276                         if (!(disp_int[hpd_idx] & mask))
6277                                 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6278
6279                         disp_int[hpd_idx] &= ~mask;
6280                         DRM_DEBUG("IH: %s%d\n", event_name, hpd_idx + 1);
6281                         break;
6282                 case 96:
6283                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6284                         WREG32(SRBM_INT_ACK, 0x1);
6285                         break;
6286                 case 124: /* UVD */
6287                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6288                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6289                         break;
6290                 case 146:
6291                 case 147:
6292                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6293                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6294                         /* reset addr and status */
6295                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6296                         if (addr == 0x0 && status == 0x0)
6297                                 break;
6298                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6299                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6300                                 addr);
6301                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6302                                 status);
6303                         si_vm_decode_fault(rdev, status, addr);
6304                         break;
6305                 case 176: /* RINGID0 CP_INT */
6306                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6307                         break;
6308                 case 177: /* RINGID1 CP_INT */
6309                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6310                         break;
6311                 case 178: /* RINGID2 CP_INT */
6312                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6313                         break;
6314                 case 181: /* CP EOP event */
6315                         DRM_DEBUG("IH: CP EOP\n");
6316                         switch (ring_id) {
6317                         case 0:
6318                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6319                                 break;
6320                         case 1:
6321                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6322                                 break;
6323                         case 2:
6324                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6325                                 break;
6326                         }
6327                         break;
6328                 case 224: /* DMA trap event */
6329                         DRM_DEBUG("IH: DMA trap\n");
6330                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6331                         break;
6332                 case 230: /* thermal low to high */
6333                         DRM_DEBUG("IH: thermal low to high\n");
6334                         rdev->pm.dpm.thermal.high_to_low = false;
6335                         queue_thermal = true;
6336                         break;
6337                 case 231: /* thermal high to low */
6338                         DRM_DEBUG("IH: thermal high to low\n");
6339                         rdev->pm.dpm.thermal.high_to_low = true;
6340                         queue_thermal = true;
6341                         break;
6342                 case 233: /* GUI IDLE */
6343                         DRM_DEBUG("IH: GUI idle\n");
6344                         break;
6345                 case 244: /* DMA trap event */
6346                         DRM_DEBUG("IH: DMA1 trap\n");
6347                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6348                         break;
6349                 default:
6350                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6351                         break;
6352                 }
6353
6354                 /* wptr/rptr are in bytes! */
6355                 rptr += 16;
6356                 rptr &= rdev->ih.ptr_mask;
6357                 WREG32(IH_RB_RPTR, rptr);
6358         }
6359         if (queue_dp)
6360                 schedule_work(&rdev->dp_work);
6361         if (queue_hotplug)
6362                 schedule_delayed_work(&rdev->hotplug_work, 0);
6363         if (queue_thermal && rdev->pm.dpm_enabled)
6364                 schedule_work(&rdev->pm.dpm.thermal.work);
6365         rdev->ih.rptr = rptr;
6366         atomic_set(&rdev->ih.lock, 0);
6367
6368         /* make sure wptr hasn't changed while processing */
6369         wptr = si_get_ih_wptr(rdev);
6370         if (wptr != rptr)
6371                 goto restart_ih;
6372
6373         return IRQ_HANDLED;
6374 }
6375
6376 /*
6377  * startup/shutdown callbacks
6378  */
6379 static void si_uvd_init(struct radeon_device *rdev)
6380 {
6381         int r;
6382
6383         if (!rdev->has_uvd)
6384                 return;
6385
6386         r = radeon_uvd_init(rdev);
6387         if (r) {
6388                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6389                 /*
6390                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6391                  * to early fails uvd_v2_2_resume() and thus nothing happens
6392                  * there. So it is pointless to try to go through that code
6393                  * hence why we disable uvd here.
6394                  */
6395                 rdev->has_uvd = false;
6396                 return;
6397         }
6398         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6399         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6400 }
6401
6402 static void si_uvd_start(struct radeon_device *rdev)
6403 {
6404         int r;
6405
6406         if (!rdev->has_uvd)
6407                 return;
6408
6409         r = uvd_v2_2_resume(rdev);
6410         if (r) {
6411                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6412                 goto error;
6413         }
6414         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6415         if (r) {
6416                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6417                 goto error;
6418         }
6419         return;
6420
6421 error:
6422         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6423 }
6424
6425 static void si_uvd_resume(struct radeon_device *rdev)
6426 {
6427         struct radeon_ring *ring;
6428         int r;
6429
6430         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6431                 return;
6432
6433         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6434         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6435         if (r) {
6436                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6437                 return;
6438         }
6439         r = uvd_v1_0_init(rdev);
6440         if (r) {
6441                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6442                 return;
6443         }
6444 }
6445
6446 static void si_vce_init(struct radeon_device *rdev)
6447 {
6448         int r;
6449
6450         if (!rdev->has_vce)
6451                 return;
6452
6453         r = radeon_vce_init(rdev);
6454         if (r) {
6455                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6456                 /*
6457                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
6458                  * to early fails si_vce_start() and thus nothing happens
6459                  * there. So it is pointless to try to go through that code
6460                  * hence why we disable vce here.
6461                  */
6462                 rdev->has_vce = false;
6463                 return;
6464         }
6465         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6466         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6467         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6468         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6469 }
6470
6471 static void si_vce_start(struct radeon_device *rdev)
6472 {
6473         int r;
6474
6475         if (!rdev->has_vce)
6476                 return;
6477
6478         r = radeon_vce_resume(rdev);
6479         if (r) {
6480                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6481                 goto error;
6482         }
6483         r = vce_v1_0_resume(rdev);
6484         if (r) {
6485                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6486                 goto error;
6487         }
6488         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6489         if (r) {
6490                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6491                 goto error;
6492         }
6493         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6494         if (r) {
6495                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6496                 goto error;
6497         }
6498         return;
6499
6500 error:
6501         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6502         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6503 }
6504
6505 static void si_vce_resume(struct radeon_device *rdev)
6506 {
6507         struct radeon_ring *ring;
6508         int r;
6509
6510         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6511                 return;
6512
6513         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6514         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6515         if (r) {
6516                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6517                 return;
6518         }
6519         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6520         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6521         if (r) {
6522                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6523                 return;
6524         }
6525         r = vce_v1_0_init(rdev);
6526         if (r) {
6527                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6528                 return;
6529         }
6530 }
6531
6532 static int si_startup(struct radeon_device *rdev)
6533 {
6534         struct radeon_ring *ring;
6535         int r;
6536
6537         /* enable pcie gen2/3 link */
6538         si_pcie_gen3_enable(rdev);
6539         /* enable aspm */
6540         si_program_aspm(rdev);
6541
6542         /* scratch needs to be initialized before MC */
6543         r = r600_vram_scratch_init(rdev);
6544         if (r)
6545                 return r;
6546
6547         si_mc_program(rdev);
6548
6549         if (!rdev->pm.dpm_enabled) {
6550                 r = si_mc_load_microcode(rdev);
6551                 if (r) {
6552                         DRM_ERROR("Failed to load MC firmware!\n");
6553                         return r;
6554                 }
6555         }
6556
6557         r = si_pcie_gart_enable(rdev);
6558         if (r)
6559                 return r;
6560         si_gpu_init(rdev);
6561
6562         /* allocate rlc buffers */
6563         if (rdev->family == CHIP_VERDE) {
6564                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6565                 rdev->rlc.reg_list_size =
6566                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6567         }
6568         rdev->rlc.cs_data = si_cs_data;
6569         r = sumo_rlc_init(rdev);
6570         if (r) {
6571                 DRM_ERROR("Failed to init rlc BOs!\n");
6572                 return r;
6573         }
6574
6575         /* allocate wb buffer */
6576         r = radeon_wb_init(rdev);
6577         if (r)
6578                 return r;
6579
6580         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6581         if (r) {
6582                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6583                 return r;
6584         }
6585
6586         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6587         if (r) {
6588                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6589                 return r;
6590         }
6591
6592         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6593         if (r) {
6594                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6595                 return r;
6596         }
6597
6598         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6599         if (r) {
6600                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6601                 return r;
6602         }
6603
6604         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6605         if (r) {
6606                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6607                 return r;
6608         }
6609
6610         si_uvd_start(rdev);
6611         si_vce_start(rdev);
6612
6613         /* Enable IRQ */
6614         if (!rdev->irq.installed) {
6615                 r = radeon_irq_kms_init(rdev);
6616                 if (r)
6617                         return r;
6618         }
6619
6620         r = si_irq_init(rdev);
6621         if (r) {
6622                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6623                 radeon_irq_kms_fini(rdev);
6624                 return r;
6625         }
6626         si_irq_set(rdev);
6627
6628         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6629         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6630                              RADEON_CP_PACKET2);
6631         if (r)
6632                 return r;
6633
6634         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6635         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6636                              RADEON_CP_PACKET2);
6637         if (r)
6638                 return r;
6639
6640         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6641         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6642                              RADEON_CP_PACKET2);
6643         if (r)
6644                 return r;
6645
6646         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6647         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6648                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6649         if (r)
6650                 return r;
6651
6652         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6653         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6654                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6655         if (r)
6656                 return r;
6657
6658         r = si_cp_load_microcode(rdev);
6659         if (r)
6660                 return r;
6661         r = si_cp_resume(rdev);
6662         if (r)
6663                 return r;
6664
6665         r = cayman_dma_resume(rdev);
6666         if (r)
6667                 return r;
6668
6669         si_uvd_resume(rdev);
6670         si_vce_resume(rdev);
6671
6672         r = radeon_ib_pool_init(rdev);
6673         if (r) {
6674                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6675                 return r;
6676         }
6677
6678         r = radeon_vm_manager_init(rdev);
6679         if (r) {
6680                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6681                 return r;
6682         }
6683
6684         r = radeon_audio_init(rdev);
6685         if (r)
6686                 return r;
6687
6688         return 0;
6689 }
6690
6691 int si_resume(struct radeon_device *rdev)
6692 {
6693         int r;
6694
6695         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6696          * posting will perform necessary task to bring back GPU into good
6697          * shape.
6698          */
6699         /* post card */
6700         atom_asic_init(rdev->mode_info.atom_context);
6701
6702         /* init golden registers */
6703         si_init_golden_registers(rdev);
6704
6705         if (rdev->pm.pm_method == PM_METHOD_DPM)
6706                 radeon_pm_resume(rdev);
6707
6708         rdev->accel_working = true;
6709         r = si_startup(rdev);
6710         if (r) {
6711                 DRM_ERROR("si startup failed on resume\n");
6712                 rdev->accel_working = false;
6713                 return r;
6714         }
6715
6716         return r;
6717
6718 }
6719
6720 int si_suspend(struct radeon_device *rdev)
6721 {
6722         radeon_pm_suspend(rdev);
6723         radeon_audio_fini(rdev);
6724         radeon_vm_manager_fini(rdev);
6725         si_cp_enable(rdev, false);
6726         cayman_dma_stop(rdev);
6727         if (rdev->has_uvd) {
6728                 uvd_v1_0_fini(rdev);
6729                 radeon_uvd_suspend(rdev);
6730         }
6731         if (rdev->has_vce)
6732                 radeon_vce_suspend(rdev);
6733         si_fini_pg(rdev);
6734         si_fini_cg(rdev);
6735         si_irq_suspend(rdev);
6736         radeon_wb_disable(rdev);
6737         si_pcie_gart_disable(rdev);
6738         return 0;
6739 }
6740
6741 /* Plan is to move initialization in that function and use
6742  * helper function so that radeon_device_init pretty much
6743  * do nothing more than calling asic specific function. This
6744  * should also allow to remove a bunch of callback function
6745  * like vram_info.
6746  */
6747 int si_init(struct radeon_device *rdev)
6748 {
6749         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6750         int r;
6751
6752         /* Read BIOS */
6753         if (!radeon_get_bios(rdev)) {
6754                 if (ASIC_IS_AVIVO(rdev))
6755                         return -EINVAL;
6756         }
6757         /* Must be an ATOMBIOS */
6758         if (!rdev->is_atom_bios) {
6759                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6760                 return -EINVAL;
6761         }
6762         r = radeon_atombios_init(rdev);
6763         if (r)
6764                 return r;
6765
6766         /* Post card if necessary */
6767         if (!radeon_card_posted(rdev)) {
6768                 if (!rdev->bios) {
6769                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6770                         return -EINVAL;
6771                 }
6772                 DRM_INFO("GPU not posted. posting now...\n");
6773                 atom_asic_init(rdev->mode_info.atom_context);
6774         }
6775         /* init golden registers */
6776         si_init_golden_registers(rdev);
6777         /* Initialize scratch registers */
6778         si_scratch_init(rdev);
6779         /* Initialize surface registers */
6780         radeon_surface_init(rdev);
6781         /* Initialize clocks */
6782         radeon_get_clock_info(rdev->ddev);
6783
6784         /* Fence driver */
6785         r = radeon_fence_driver_init(rdev);
6786         if (r)
6787                 return r;
6788
6789         /* initialize memory controller */
6790         r = si_mc_init(rdev);
6791         if (r)
6792                 return r;
6793         /* Memory manager */
6794         r = radeon_bo_init(rdev);
6795         if (r)
6796                 return r;
6797
6798         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6799             !rdev->rlc_fw || !rdev->mc_fw) {
6800                 r = si_init_microcode(rdev);
6801                 if (r) {
6802                         DRM_ERROR("Failed to load firmware!\n");
6803                         /*(DEBLOBBED)*/
6804                 }
6805         }
6806
6807         /* Initialize power management */
6808         radeon_pm_init(rdev);
6809
6810         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6811         ring->ring_obj = NULL;
6812         r600_ring_init(rdev, ring, 1024 * 1024);
6813
6814         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6815         ring->ring_obj = NULL;
6816         r600_ring_init(rdev, ring, 1024 * 1024);
6817
6818         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6819         ring->ring_obj = NULL;
6820         r600_ring_init(rdev, ring, 1024 * 1024);
6821
6822         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6823         ring->ring_obj = NULL;
6824         r600_ring_init(rdev, ring, 64 * 1024);
6825
6826         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6827         ring->ring_obj = NULL;
6828         r600_ring_init(rdev, ring, 64 * 1024);
6829
6830         si_uvd_init(rdev);
6831         si_vce_init(rdev);
6832
6833         rdev->ih.ring_obj = NULL;
6834         r600_ih_ring_init(rdev, 64 * 1024);
6835
6836         r = r600_pcie_gart_init(rdev);
6837         if (r)
6838                 return r;
6839
6840         rdev->accel_working = true;
6841         r = si_startup(rdev);
6842         if (r) {
6843                 dev_err(rdev->dev, "disabling GPU acceleration\n");
6844                 si_cp_fini(rdev);
6845                 cayman_dma_fini(rdev);
6846                 si_irq_fini(rdev);
6847                 sumo_rlc_fini(rdev);
6848                 radeon_wb_fini(rdev);
6849                 radeon_ib_pool_fini(rdev);
6850                 radeon_vm_manager_fini(rdev);
6851                 radeon_irq_kms_fini(rdev);
6852                 si_pcie_gart_fini(rdev);
6853                 rdev->accel_working = false;
6854         }
6855
6856         /* Don't start up if the MC ucode is missing.
6857          * The default clocks and voltages before the MC ucode
6858          * is loaded are not suffient for advanced operations.
6859          */
6860         if (!rdev->mc_fw) {
6861                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6862                 /*(DEBLOBBED)*/
6863         }
6864
6865         return 0;
6866 }
6867
6868 void si_fini(struct radeon_device *rdev)
6869 {
6870         radeon_pm_fini(rdev);
6871         si_cp_fini(rdev);
6872         cayman_dma_fini(rdev);
6873         si_fini_pg(rdev);
6874         si_fini_cg(rdev);
6875         si_irq_fini(rdev);
6876         sumo_rlc_fini(rdev);
6877         radeon_wb_fini(rdev);
6878         radeon_vm_manager_fini(rdev);
6879         radeon_ib_pool_fini(rdev);
6880         radeon_irq_kms_fini(rdev);
6881         if (rdev->has_uvd) {
6882                 uvd_v1_0_fini(rdev);
6883                 radeon_uvd_fini(rdev);
6884         }
6885         if (rdev->has_vce)
6886                 radeon_vce_fini(rdev);
6887         si_pcie_gart_fini(rdev);
6888         r600_vram_scratch_fini(rdev);
6889         radeon_gem_fini(rdev);
6890         radeon_fence_driver_fini(rdev);
6891         radeon_bo_fini(rdev);
6892         radeon_atombios_fini(rdev);
6893         kfree(rdev->bios);
6894         rdev->bios = NULL;
6895 }
6896
6897 /**
6898  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6899  *
6900  * @rdev: radeon_device pointer
6901  *
6902  * Fetches a GPU clock counter snapshot (SI).
6903  * Returns the 64 bit clock counter snapshot.
6904  */
6905 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6906 {
6907         uint64_t clock;
6908
6909         mutex_lock(&rdev->gpu_clock_mutex);
6910         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6911         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6912                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6913         mutex_unlock(&rdev->gpu_clock_mutex);
6914         return clock;
6915 }
6916
6917 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6918 {
6919         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6920         int r;
6921
6922         /* bypass vclk and dclk with bclk */
6923         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6924                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6925                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6926
6927         /* put PLL in bypass mode */
6928         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6929
6930         if (!vclk || !dclk) {
6931                 /* keep the Bypass mode */
6932                 return 0;
6933         }
6934
6935         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6936                                           16384, 0x03FFFFFF, 0, 128, 5,
6937                                           &fb_div, &vclk_div, &dclk_div);
6938         if (r)
6939                 return r;
6940
6941         /* set RESET_ANTI_MUX to 0 */
6942         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6943
6944         /* set VCO_MODE to 1 */
6945         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6946
6947         /* disable sleep mode */
6948         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6949
6950         /* deassert UPLL_RESET */
6951         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6952
6953         mdelay(1);
6954
6955         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6956         if (r)
6957                 return r;
6958
6959         /* assert UPLL_RESET again */
6960         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6961
6962         /* disable spread spectrum. */
6963         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6964
6965         /* set feedback divider */
6966         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6967
6968         /* set ref divider to 0 */
6969         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6970
6971         if (fb_div < 307200)
6972                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6973         else
6974                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6975
6976         /* set PDIV_A and PDIV_B */
6977         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6978                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6979                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6980
6981         /* give the PLL some time to settle */
6982         mdelay(15);
6983
6984         /* deassert PLL_RESET */
6985         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6986
6987         mdelay(15);
6988
6989         /* switch from bypass mode to normal mode */
6990         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6991
6992         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6993         if (r)
6994                 return r;
6995
6996         /* switch VCLK and DCLK selection */
6997         WREG32_P(CG_UPLL_FUNC_CNTL_2,
6998                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6999                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7000
7001         mdelay(100);
7002
7003         return 0;
7004 }
7005
7006 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7007 {
7008         struct pci_dev *root = rdev->pdev->bus->self;
7009         enum pci_bus_speed speed_cap;
7010         u32 speed_cntl, current_data_rate;
7011         int i;
7012         u16 tmp16;
7013
7014         if (pci_is_root_bus(rdev->pdev->bus))
7015                 return;
7016
7017         if (radeon_pcie_gen2 == 0)
7018                 return;
7019
7020         if (rdev->flags & RADEON_IS_IGP)
7021                 return;
7022
7023         if (!(rdev->flags & RADEON_IS_PCIE))
7024                 return;
7025
7026         speed_cap = pcie_get_speed_cap(root);
7027         if (speed_cap == PCI_SPEED_UNKNOWN)
7028                 return;
7029
7030         if ((speed_cap != PCIE_SPEED_8_0GT) &&
7031             (speed_cap != PCIE_SPEED_5_0GT))
7032                 return;
7033
7034         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7035         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7036                 LC_CURRENT_DATA_RATE_SHIFT;
7037         if (speed_cap == PCIE_SPEED_8_0GT) {
7038                 if (current_data_rate == 2) {
7039                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7040                         return;
7041                 }
7042                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7043         } else if (speed_cap == PCIE_SPEED_5_0GT) {
7044                 if (current_data_rate == 1) {
7045                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7046                         return;
7047                 }
7048                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7049         }
7050
7051         if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
7052                 return;
7053
7054         if (speed_cap == PCIE_SPEED_8_0GT) {
7055                 /* re-try equalization if gen3 is not already enabled */
7056                 if (current_data_rate != 2) {
7057                         u16 bridge_cfg, gpu_cfg;
7058                         u16 bridge_cfg2, gpu_cfg2;
7059                         u32 max_lw, current_lw, tmp;
7060
7061                         pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7062                                                   &bridge_cfg);
7063                         pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
7064                                                   &gpu_cfg);
7065
7066                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7067                         pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
7068
7069                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7070                         pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
7071                                                    tmp16);
7072
7073                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7074                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7075                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7076
7077                         if (current_lw < max_lw) {
7078                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7079                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7080                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7081                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7082                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7083                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7084                                 }
7085                         }
7086
7087                         for (i = 0; i < 10; i++) {
7088                                 /* check status */
7089                                 pcie_capability_read_word(rdev->pdev,
7090                                                           PCI_EXP_DEVSTA,
7091                                                           &tmp16);
7092                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7093                                         break;
7094
7095                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7096                                                           &bridge_cfg);
7097                                 pcie_capability_read_word(rdev->pdev,
7098                                                           PCI_EXP_LNKCTL,
7099                                                           &gpu_cfg);
7100
7101                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7102                                                           &bridge_cfg2);
7103                                 pcie_capability_read_word(rdev->pdev,
7104                                                           PCI_EXP_LNKCTL2,
7105                                                           &gpu_cfg2);
7106
7107                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7108                                 tmp |= LC_SET_QUIESCE;
7109                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7110
7111                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7112                                 tmp |= LC_REDO_EQ;
7113                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7114
7115                                 msleep(100);
7116
7117                                 /* linkctl */
7118                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7119                                                           &tmp16);
7120                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7121                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7122                                 pcie_capability_write_word(root,
7123                                                            PCI_EXP_LNKCTL,
7124                                                            tmp16);
7125
7126                                 pcie_capability_read_word(rdev->pdev,
7127                                                           PCI_EXP_LNKCTL,
7128                                                           &tmp16);
7129                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7130                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7131                                 pcie_capability_write_word(rdev->pdev,
7132                                                            PCI_EXP_LNKCTL,
7133                                                            tmp16);
7134
7135                                 /* linkctl2 */
7136                                 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7137                                                           &tmp16);
7138                                 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7139                                            PCI_EXP_LNKCTL2_TX_MARGIN);
7140                                 tmp16 |= (bridge_cfg2 &
7141                                           (PCI_EXP_LNKCTL2_ENTER_COMP |
7142                                            PCI_EXP_LNKCTL2_TX_MARGIN));
7143                                 pcie_capability_write_word(root,
7144                                                            PCI_EXP_LNKCTL2,
7145                                                            tmp16);
7146
7147                                 pcie_capability_read_word(rdev->pdev,
7148                                                           PCI_EXP_LNKCTL2,
7149                                                           &tmp16);
7150                                 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7151                                            PCI_EXP_LNKCTL2_TX_MARGIN);
7152                                 tmp16 |= (gpu_cfg2 &
7153                                           (PCI_EXP_LNKCTL2_ENTER_COMP |
7154                                            PCI_EXP_LNKCTL2_TX_MARGIN));
7155                                 pcie_capability_write_word(rdev->pdev,
7156                                                            PCI_EXP_LNKCTL2,
7157                                                            tmp16);
7158
7159                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7160                                 tmp &= ~LC_SET_QUIESCE;
7161                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7162                         }
7163                 }
7164         }
7165
7166         /* set the link speed */
7167         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7168         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7169         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7170
7171         pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
7172         tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
7173         if (speed_cap == PCIE_SPEED_8_0GT)
7174                 tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
7175         else if (speed_cap == PCIE_SPEED_5_0GT)
7176                 tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
7177         else
7178                 tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
7179         pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
7180
7181         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7182         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7183         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7184
7185         for (i = 0; i < rdev->usec_timeout; i++) {
7186                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7187                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7188                         break;
7189                 udelay(1);
7190         }
7191 }
7192
7193 static void si_program_aspm(struct radeon_device *rdev)
7194 {
7195         u32 data, orig;
7196         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7197         bool disable_clkreq = false;
7198
7199         if (radeon_aspm == 0)
7200                 return;
7201
7202         if (!(rdev->flags & RADEON_IS_PCIE))
7203                 return;
7204
7205         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7206         data &= ~LC_XMIT_N_FTS_MASK;
7207         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7208         if (orig != data)
7209                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7210
7211         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7212         data |= LC_GO_TO_RECOVERY;
7213         if (orig != data)
7214                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7215
7216         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7217         data |= P_IGNORE_EDB_ERR;
7218         if (orig != data)
7219                 WREG32_PCIE(PCIE_P_CNTL, data);
7220
7221         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7222         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7223         data |= LC_PMI_TO_L1_DIS;
7224         if (!disable_l0s)
7225                 data |= LC_L0S_INACTIVITY(7);
7226
7227         if (!disable_l1) {
7228                 data |= LC_L1_INACTIVITY(7);
7229                 data &= ~LC_PMI_TO_L1_DIS;
7230                 if (orig != data)
7231                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7232
7233                 if (!disable_plloff_in_l1) {
7234                         bool clk_req_support;
7235
7236                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7237                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7238                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7239                         if (orig != data)
7240                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7241
7242                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7243                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7244                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7245                         if (orig != data)
7246                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7247
7248                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7249                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7250                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7251                         if (orig != data)
7252                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7253
7254                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7255                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7256                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7257                         if (orig != data)
7258                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7259
7260                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7261                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7262                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7263                                 if (orig != data)
7264                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7265
7266                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7267                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7268                                 if (orig != data)
7269                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7270
7271                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7272                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7273                                 if (orig != data)
7274                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7275
7276                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7277                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7278                                 if (orig != data)
7279                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7280
7281                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7282                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7283                                 if (orig != data)
7284                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7285
7286                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7287                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7288                                 if (orig != data)
7289                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7290
7291                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7292                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7293                                 if (orig != data)
7294                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7295
7296                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7297                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7298                                 if (orig != data)
7299                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7300                         }
7301                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7302                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7303                         data |= LC_DYN_LANES_PWR_STATE(3);
7304                         if (orig != data)
7305                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7306
7307                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7308                         data &= ~LS2_EXIT_TIME_MASK;
7309                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7310                                 data |= LS2_EXIT_TIME(5);
7311                         if (orig != data)
7312                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7313
7314                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7315                         data &= ~LS2_EXIT_TIME_MASK;
7316                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7317                                 data |= LS2_EXIT_TIME(5);
7318                         if (orig != data)
7319                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7320
7321                         if (!disable_clkreq &&
7322                             !pci_is_root_bus(rdev->pdev->bus)) {
7323                                 struct pci_dev *root = rdev->pdev->bus->self;
7324                                 u32 lnkcap;
7325
7326                                 clk_req_support = false;
7327                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7328                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7329                                         clk_req_support = true;
7330                         } else {
7331                                 clk_req_support = false;
7332                         }
7333
7334                         if (clk_req_support) {
7335                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7336                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7337                                 if (orig != data)
7338                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7339
7340                                 orig = data = RREG32(THM_CLK_CNTL);
7341                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7342                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7343                                 if (orig != data)
7344                                         WREG32(THM_CLK_CNTL, data);
7345
7346                                 orig = data = RREG32(MISC_CLK_CNTL);
7347                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7348                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7349                                 if (orig != data)
7350                                         WREG32(MISC_CLK_CNTL, data);
7351
7352                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7353                                 data &= ~BCLK_AS_XCLK;
7354                                 if (orig != data)
7355                                         WREG32(CG_CLKPIN_CNTL, data);
7356
7357                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7358                                 data &= ~FORCE_BIF_REFCLK_EN;
7359                                 if (orig != data)
7360                                         WREG32(CG_CLKPIN_CNTL_2, data);
7361
7362                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7363                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7364                                 data |= MPLL_CLKOUT_SEL(4);
7365                                 if (orig != data)
7366                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7367
7368                                 orig = data = RREG32(SPLL_CNTL_MODE);
7369                                 data &= ~SPLL_REFCLK_SEL_MASK;
7370                                 if (orig != data)
7371                                         WREG32(SPLL_CNTL_MODE, data);
7372                         }
7373                 }
7374         } else {
7375                 if (orig != data)
7376                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7377         }
7378
7379         orig = data = RREG32_PCIE(PCIE_CNTL2);
7380         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7381         if (orig != data)
7382                 WREG32_PCIE(PCIE_CNTL2, data);
7383
7384         if (!disable_l0s) {
7385                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7386                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7387                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7388                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7389                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7390                                 data &= ~LC_L0S_INACTIVITY_MASK;
7391                                 if (orig != data)
7392                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7393                         }
7394                 }
7395         }
7396 }
7397
7398 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7399 {
7400         unsigned i;
7401
7402         /* make sure VCEPLL_CTLREQ is deasserted */
7403         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7404
7405         mdelay(10);
7406
7407         /* assert UPLL_CTLREQ */
7408         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7409
7410         /* wait for CTLACK and CTLACK2 to get asserted */
7411         for (i = 0; i < 100; ++i) {
7412                 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7413                 if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7414                         break;
7415                 mdelay(10);
7416         }
7417
7418         /* deassert UPLL_CTLREQ */
7419         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7420
7421         if (i == 100) {
7422                 DRM_ERROR("Timeout setting UVD clocks!\n");
7423                 return -ETIMEDOUT;
7424         }
7425
7426         return 0;
7427 }
7428
7429 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7430 {
7431         unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7432         int r;
7433
7434         /* bypass evclk and ecclk with bclk */
7435         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7436                      EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7437                      ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7438
7439         /* put PLL in bypass mode */
7440         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7441                      ~VCEPLL_BYPASS_EN_MASK);
7442
7443         if (!evclk || !ecclk) {
7444                 /* keep the Bypass mode, put PLL to sleep */
7445                 WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7446                              ~VCEPLL_SLEEP_MASK);
7447                 return 0;
7448         }
7449
7450         r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7451                                           16384, 0x03FFFFFF, 0, 128, 5,
7452                                           &fb_div, &evclk_div, &ecclk_div);
7453         if (r)
7454                 return r;
7455
7456         /* set RESET_ANTI_MUX to 0 */
7457         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7458
7459         /* set VCO_MODE to 1 */
7460         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7461                      ~VCEPLL_VCO_MODE_MASK);
7462
7463         /* toggle VCEPLL_SLEEP to 1 then back to 0 */
7464         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7465                      ~VCEPLL_SLEEP_MASK);
7466         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7467
7468         /* deassert VCEPLL_RESET */
7469         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7470
7471         mdelay(1);
7472
7473         r = si_vce_send_vcepll_ctlreq(rdev);
7474         if (r)
7475                 return r;
7476
7477         /* assert VCEPLL_RESET again */
7478         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7479
7480         /* disable spread spectrum. */
7481         WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7482
7483         /* set feedback divider */
7484         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7485
7486         /* set ref divider to 0 */
7487         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7488
7489         /* set PDIV_A and PDIV_B */
7490         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7491                      VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7492                      ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7493
7494         /* give the PLL some time to settle */
7495         mdelay(15);
7496
7497         /* deassert PLL_RESET */
7498         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7499
7500         mdelay(15);
7501
7502         /* switch from bypass mode to normal mode */
7503         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7504
7505         r = si_vce_send_vcepll_ctlreq(rdev);
7506         if (r)
7507                 return r;
7508
7509         /* switch VCLK and DCLK selection */
7510         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7511                      EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7512                      ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7513
7514         mdelay(100);
7515
7516         return 0;
7517 }