GNU Linux-libre 4.19.245-gnu1
[releases.git] / drivers / gpu / drm / i915 / intel_pm.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eugeni Dodonov <eugeni.dodonov@intel.com>
25  *
26  */
27
28 #include <linux/cpufreq.h>
29 #include <drm/drm_plane_helper.h>
30 #include "i915_drv.h"
31 #include "intel_drv.h"
32 #include "../../../platform/x86/intel_ips.h"
33 #include <linux/module.h>
34 #include <drm/drm_atomic_helper.h>
35
36 /**
37  * DOC: RC6
38  *
39  * RC6 is a special power stage which allows the GPU to enter an very
40  * low-voltage mode when idle, using down to 0V while at this stage.  This
41  * stage is entered automatically when the GPU is idle when RC6 support is
42  * enabled, and as soon as new workload arises GPU wakes up automatically as well.
43  *
44  * There are different RC6 modes available in Intel GPU, which differentiate
45  * among each other with the latency required to enter and leave RC6 and
46  * voltage consumed by the GPU in different states.
47  *
48  * The combination of the following flags define which states GPU is allowed
49  * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
50  * RC6pp is deepest RC6. Their support by hardware varies according to the
51  * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
52  * which brings the most power savings; deeper states save more power, but
53  * require higher latency to switch to and wake up.
54  */
55
56 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
57 {
58         if (HAS_LLC(dev_priv)) {
59                 /*
60                  * WaCompressedResourceDisplayNewHashMode:skl,kbl
61                  * Display WA #0390: skl,kbl
62                  *
63                  * Must match Sampler, Pixel Back End, and Media. See
64                  * WaCompressedResourceSamplerPbeMediaNewHashMode.
65                  */
66                 I915_WRITE(CHICKEN_PAR1_1,
67                            I915_READ(CHICKEN_PAR1_1) |
68                            SKL_DE_COMPRESSED_HASH_MODE);
69         }
70
71         /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
72         I915_WRITE(CHICKEN_PAR1_1,
73                    I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
74
75         /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */
76         I915_WRITE(GEN8_CHICKEN_DCPR_1,
77                    I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
78
79         /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl,cfl */
80         /* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl */
81         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
82                    DISP_FBC_WM_DIS |
83                    DISP_FBC_MEMORY_WAKE);
84
85         /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */
86         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
87                    ILK_DPFC_DISABLE_DUMMY0);
88
89         if (IS_SKYLAKE(dev_priv)) {
90                 /* WaDisableDopClockGating */
91                 I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL)
92                            & ~GEN7_DOP_CLOCK_GATE_ENABLE);
93         }
94 }
95
96 static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
97 {
98         gen9_init_clock_gating(dev_priv);
99
100         /* WaDisableSDEUnitClockGating:bxt */
101         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
102                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
103
104         /*
105          * FIXME:
106          * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
107          */
108         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
109                    GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
110
111         /*
112          * Wa: Backlight PWM may stop in the asserted state, causing backlight
113          * to stay fully on.
114          */
115         I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
116                    PWM1_GATING_DIS | PWM2_GATING_DIS);
117
118         /*
119          * Lower the display internal timeout.
120          * This is needed to avoid any hard hangs when DSI port PLL
121          * is off and a MMIO access is attempted by any privilege
122          * application, using batch buffers or any other means.
123          */
124         I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950));
125 }
126
127 static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
128 {
129         gen9_init_clock_gating(dev_priv);
130
131         /*
132          * WaDisablePWMClockGating:glk
133          * Backlight PWM may stop in the asserted state, causing backlight
134          * to stay fully on.
135          */
136         I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
137                    PWM1_GATING_DIS | PWM2_GATING_DIS);
138
139         /* WaDDIIOTimeout:glk */
140         if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) {
141                 u32 val = I915_READ(CHICKEN_MISC_2);
142                 val &= ~(GLK_CL0_PWR_DOWN |
143                          GLK_CL1_PWR_DOWN |
144                          GLK_CL2_PWR_DOWN);
145                 I915_WRITE(CHICKEN_MISC_2, val);
146         }
147
148 }
149
150 static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv)
151 {
152         u32 tmp;
153
154         tmp = I915_READ(CLKCFG);
155
156         switch (tmp & CLKCFG_FSB_MASK) {
157         case CLKCFG_FSB_533:
158                 dev_priv->fsb_freq = 533; /* 133*4 */
159                 break;
160         case CLKCFG_FSB_800:
161                 dev_priv->fsb_freq = 800; /* 200*4 */
162                 break;
163         case CLKCFG_FSB_667:
164                 dev_priv->fsb_freq =  667; /* 167*4 */
165                 break;
166         case CLKCFG_FSB_400:
167                 dev_priv->fsb_freq = 400; /* 100*4 */
168                 break;
169         }
170
171         switch (tmp & CLKCFG_MEM_MASK) {
172         case CLKCFG_MEM_533:
173                 dev_priv->mem_freq = 533;
174                 break;
175         case CLKCFG_MEM_667:
176                 dev_priv->mem_freq = 667;
177                 break;
178         case CLKCFG_MEM_800:
179                 dev_priv->mem_freq = 800;
180                 break;
181         }
182
183         /* detect pineview DDR3 setting */
184         tmp = I915_READ(CSHRDDR3CTL);
185         dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
186 }
187
188 static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
189 {
190         u16 ddrpll, csipll;
191
192         ddrpll = I915_READ16(DDRMPLL1);
193         csipll = I915_READ16(CSIPLL0);
194
195         switch (ddrpll & 0xff) {
196         case 0xc:
197                 dev_priv->mem_freq = 800;
198                 break;
199         case 0x10:
200                 dev_priv->mem_freq = 1066;
201                 break;
202         case 0x14:
203                 dev_priv->mem_freq = 1333;
204                 break;
205         case 0x18:
206                 dev_priv->mem_freq = 1600;
207                 break;
208         default:
209                 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
210                                  ddrpll & 0xff);
211                 dev_priv->mem_freq = 0;
212                 break;
213         }
214
215         dev_priv->ips.r_t = dev_priv->mem_freq;
216
217         switch (csipll & 0x3ff) {
218         case 0x00c:
219                 dev_priv->fsb_freq = 3200;
220                 break;
221         case 0x00e:
222                 dev_priv->fsb_freq = 3733;
223                 break;
224         case 0x010:
225                 dev_priv->fsb_freq = 4266;
226                 break;
227         case 0x012:
228                 dev_priv->fsb_freq = 4800;
229                 break;
230         case 0x014:
231                 dev_priv->fsb_freq = 5333;
232                 break;
233         case 0x016:
234                 dev_priv->fsb_freq = 5866;
235                 break;
236         case 0x018:
237                 dev_priv->fsb_freq = 6400;
238                 break;
239         default:
240                 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
241                                  csipll & 0x3ff);
242                 dev_priv->fsb_freq = 0;
243                 break;
244         }
245
246         if (dev_priv->fsb_freq == 3200) {
247                 dev_priv->ips.c_m = 0;
248         } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
249                 dev_priv->ips.c_m = 1;
250         } else {
251                 dev_priv->ips.c_m = 2;
252         }
253 }
254
255 static const struct cxsr_latency cxsr_latency_table[] = {
256         {1, 0, 800, 400, 3382, 33382, 3983, 33983},    /* DDR2-400 SC */
257         {1, 0, 800, 667, 3354, 33354, 3807, 33807},    /* DDR2-667 SC */
258         {1, 0, 800, 800, 3347, 33347, 3763, 33763},    /* DDR2-800 SC */
259         {1, 1, 800, 667, 6420, 36420, 6873, 36873},    /* DDR3-667 SC */
260         {1, 1, 800, 800, 5902, 35902, 6318, 36318},    /* DDR3-800 SC */
261
262         {1, 0, 667, 400, 3400, 33400, 4021, 34021},    /* DDR2-400 SC */
263         {1, 0, 667, 667, 3372, 33372, 3845, 33845},    /* DDR2-667 SC */
264         {1, 0, 667, 800, 3386, 33386, 3822, 33822},    /* DDR2-800 SC */
265         {1, 1, 667, 667, 6438, 36438, 6911, 36911},    /* DDR3-667 SC */
266         {1, 1, 667, 800, 5941, 35941, 6377, 36377},    /* DDR3-800 SC */
267
268         {1, 0, 400, 400, 3472, 33472, 4173, 34173},    /* DDR2-400 SC */
269         {1, 0, 400, 667, 3443, 33443, 3996, 33996},    /* DDR2-667 SC */
270         {1, 0, 400, 800, 3430, 33430, 3946, 33946},    /* DDR2-800 SC */
271         {1, 1, 400, 667, 6509, 36509, 7062, 37062},    /* DDR3-667 SC */
272         {1, 1, 400, 800, 5985, 35985, 6501, 36501},    /* DDR3-800 SC */
273
274         {0, 0, 800, 400, 3438, 33438, 4065, 34065},    /* DDR2-400 SC */
275         {0, 0, 800, 667, 3410, 33410, 3889, 33889},    /* DDR2-667 SC */
276         {0, 0, 800, 800, 3403, 33403, 3845, 33845},    /* DDR2-800 SC */
277         {0, 1, 800, 667, 6476, 36476, 6955, 36955},    /* DDR3-667 SC */
278         {0, 1, 800, 800, 5958, 35958, 6400, 36400},    /* DDR3-800 SC */
279
280         {0, 0, 667, 400, 3456, 33456, 4103, 34106},    /* DDR2-400 SC */
281         {0, 0, 667, 667, 3428, 33428, 3927, 33927},    /* DDR2-667 SC */
282         {0, 0, 667, 800, 3443, 33443, 3905, 33905},    /* DDR2-800 SC */
283         {0, 1, 667, 667, 6494, 36494, 6993, 36993},    /* DDR3-667 SC */
284         {0, 1, 667, 800, 5998, 35998, 6460, 36460},    /* DDR3-800 SC */
285
286         {0, 0, 400, 400, 3528, 33528, 4255, 34255},    /* DDR2-400 SC */
287         {0, 0, 400, 667, 3500, 33500, 4079, 34079},    /* DDR2-667 SC */
288         {0, 0, 400, 800, 3487, 33487, 4029, 34029},    /* DDR2-800 SC */
289         {0, 1, 400, 667, 6566, 36566, 7145, 37145},    /* DDR3-667 SC */
290         {0, 1, 400, 800, 6042, 36042, 6584, 36584},    /* DDR3-800 SC */
291 };
292
293 static const struct cxsr_latency *intel_get_cxsr_latency(bool is_desktop,
294                                                          bool is_ddr3,
295                                                          int fsb,
296                                                          int mem)
297 {
298         const struct cxsr_latency *latency;
299         int i;
300
301         if (fsb == 0 || mem == 0)
302                 return NULL;
303
304         for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
305                 latency = &cxsr_latency_table[i];
306                 if (is_desktop == latency->is_desktop &&
307                     is_ddr3 == latency->is_ddr3 &&
308                     fsb == latency->fsb_freq && mem == latency->mem_freq)
309                         return latency;
310         }
311
312         DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
313
314         return NULL;
315 }
316
317 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
318 {
319         u32 val;
320
321         mutex_lock(&dev_priv->pcu_lock);
322
323         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
324         if (enable)
325                 val &= ~FORCE_DDR_HIGH_FREQ;
326         else
327                 val |= FORCE_DDR_HIGH_FREQ;
328         val &= ~FORCE_DDR_LOW_FREQ;
329         val |= FORCE_DDR_FREQ_REQ_ACK;
330         vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
331
332         if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
333                       FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
334                 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
335
336         mutex_unlock(&dev_priv->pcu_lock);
337 }
338
339 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
340 {
341         u32 val;
342
343         mutex_lock(&dev_priv->pcu_lock);
344
345         val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
346         if (enable)
347                 val |= DSP_MAXFIFO_PM5_ENABLE;
348         else
349                 val &= ~DSP_MAXFIFO_PM5_ENABLE;
350         vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
351
352         mutex_unlock(&dev_priv->pcu_lock);
353 }
354
355 #define FW_WM(value, plane) \
356         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
357
358 static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
359 {
360         bool was_enabled;
361         u32 val;
362
363         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
364                 was_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
365                 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
366                 POSTING_READ(FW_BLC_SELF_VLV);
367         } else if (IS_G4X(dev_priv) || IS_I965GM(dev_priv)) {
368                 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
369                 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
370                 POSTING_READ(FW_BLC_SELF);
371         } else if (IS_PINEVIEW(dev_priv)) {
372                 val = I915_READ(DSPFW3);
373                 was_enabled = val & PINEVIEW_SELF_REFRESH_EN;
374                 if (enable)
375                         val |= PINEVIEW_SELF_REFRESH_EN;
376                 else
377                         val &= ~PINEVIEW_SELF_REFRESH_EN;
378                 I915_WRITE(DSPFW3, val);
379                 POSTING_READ(DSPFW3);
380         } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) {
381                 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
382                 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
383                                _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
384                 I915_WRITE(FW_BLC_SELF, val);
385                 POSTING_READ(FW_BLC_SELF);
386         } else if (IS_I915GM(dev_priv)) {
387                 /*
388                  * FIXME can't find a bit like this for 915G, and
389                  * and yet it does have the related watermark in
390                  * FW_BLC_SELF. What's going on?
391                  */
392                 was_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
393                 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
394                                _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
395                 I915_WRITE(INSTPM, val);
396                 POSTING_READ(INSTPM);
397         } else {
398                 return false;
399         }
400
401         trace_intel_memory_cxsr(dev_priv, was_enabled, enable);
402
403         DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n",
404                       enableddisabled(enable),
405                       enableddisabled(was_enabled));
406
407         return was_enabled;
408 }
409
410 /**
411  * intel_set_memory_cxsr - Configure CxSR state
412  * @dev_priv: i915 device
413  * @enable: Allow vs. disallow CxSR
414  *
415  * Allow or disallow the system to enter a special CxSR
416  * (C-state self refresh) state. What typically happens in CxSR mode
417  * is that several display FIFOs may get combined into a single larger
418  * FIFO for a particular plane (so called max FIFO mode) to allow the
419  * system to defer memory fetches longer, and the memory will enter
420  * self refresh.
421  *
422  * Note that enabling CxSR does not guarantee that the system enter
423  * this special mode, nor does it guarantee that the system stays
424  * in that mode once entered. So this just allows/disallows the system
425  * to autonomously utilize the CxSR mode. Other factors such as core
426  * C-states will affect when/if the system actually enters/exits the
427  * CxSR mode.
428  *
429  * Note that on VLV/CHV this actually only controls the max FIFO mode,
430  * and the system is free to enter/exit memory self refresh at any time
431  * even when the use of CxSR has been disallowed.
432  *
433  * While the system is actually in the CxSR/max FIFO mode, some plane
434  * control registers will not get latched on vblank. Thus in order to
435  * guarantee the system will respond to changes in the plane registers
436  * we must always disallow CxSR prior to making changes to those registers.
437  * Unfortunately the system will re-evaluate the CxSR conditions at
438  * frame start which happens after vblank start (which is when the plane
439  * registers would get latched), so we can't proceed with the plane update
440  * during the same frame where we disallowed CxSR.
441  *
442  * Certain platforms also have a deeper HPLL SR mode. Fortunately the
443  * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
444  * the hardware w.r.t. HPLL SR when writing to plane registers.
445  * Disallowing just CxSR is sufficient.
446  */
447 bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
448 {
449         bool ret;
450
451         mutex_lock(&dev_priv->wm.wm_mutex);
452         ret = _intel_set_memory_cxsr(dev_priv, enable);
453         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
454                 dev_priv->wm.vlv.cxsr = enable;
455         else if (IS_G4X(dev_priv))
456                 dev_priv->wm.g4x.cxsr = enable;
457         mutex_unlock(&dev_priv->wm.wm_mutex);
458
459         return ret;
460 }
461
462 /*
463  * Latency for FIFO fetches is dependent on several factors:
464  *   - memory configuration (speed, channels)
465  *   - chipset
466  *   - current MCH state
467  * It can be fairly high in some situations, so here we assume a fairly
468  * pessimal value.  It's a tradeoff between extra memory fetches (if we
469  * set this value too high, the FIFO will fetch frequently to stay full)
470  * and power consumption (set it too low to save power and we might see
471  * FIFO underruns and display "flicker").
472  *
473  * A value of 5us seems to be a good balance; safe for very low end
474  * platforms but not overly aggressive on lower latency configs.
475  */
476 static const int pessimal_latency_ns = 5000;
477
478 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
479         ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
480
481 static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
482 {
483         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
484         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
485         struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
486         enum pipe pipe = crtc->pipe;
487         int sprite0_start, sprite1_start;
488
489         switch (pipe) {
490                 uint32_t dsparb, dsparb2, dsparb3;
491         case PIPE_A:
492                 dsparb = I915_READ(DSPARB);
493                 dsparb2 = I915_READ(DSPARB2);
494                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
495                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
496                 break;
497         case PIPE_B:
498                 dsparb = I915_READ(DSPARB);
499                 dsparb2 = I915_READ(DSPARB2);
500                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
501                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
502                 break;
503         case PIPE_C:
504                 dsparb2 = I915_READ(DSPARB2);
505                 dsparb3 = I915_READ(DSPARB3);
506                 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
507                 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
508                 break;
509         default:
510                 MISSING_CASE(pipe);
511                 return;
512         }
513
514         fifo_state->plane[PLANE_PRIMARY] = sprite0_start;
515         fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start;
516         fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start;
517         fifo_state->plane[PLANE_CURSOR] = 63;
518 }
519
520 static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv,
521                               enum i9xx_plane_id i9xx_plane)
522 {
523         uint32_t dsparb = I915_READ(DSPARB);
524         int size;
525
526         size = dsparb & 0x7f;
527         if (i9xx_plane == PLANE_B)
528                 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
529
530         DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
531                       dsparb, plane_name(i9xx_plane), size);
532
533         return size;
534 }
535
536 static int i830_get_fifo_size(struct drm_i915_private *dev_priv,
537                               enum i9xx_plane_id i9xx_plane)
538 {
539         uint32_t dsparb = I915_READ(DSPARB);
540         int size;
541
542         size = dsparb & 0x1ff;
543         if (i9xx_plane == PLANE_B)
544                 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
545         size >>= 1; /* Convert to cachelines */
546
547         DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
548                       dsparb, plane_name(i9xx_plane), size);
549
550         return size;
551 }
552
553 static int i845_get_fifo_size(struct drm_i915_private *dev_priv,
554                               enum i9xx_plane_id i9xx_plane)
555 {
556         uint32_t dsparb = I915_READ(DSPARB);
557         int size;
558
559         size = dsparb & 0x7f;
560         size >>= 2; /* Convert to cachelines */
561
562         DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
563                       dsparb, plane_name(i9xx_plane), size);
564
565         return size;
566 }
567
568 /* Pineview has different values for various configs */
569 static const struct intel_watermark_params pineview_display_wm = {
570         .fifo_size = PINEVIEW_DISPLAY_FIFO,
571         .max_wm = PINEVIEW_MAX_WM,
572         .default_wm = PINEVIEW_DFT_WM,
573         .guard_size = PINEVIEW_GUARD_WM,
574         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
575 };
576 static const struct intel_watermark_params pineview_display_hplloff_wm = {
577         .fifo_size = PINEVIEW_DISPLAY_FIFO,
578         .max_wm = PINEVIEW_MAX_WM,
579         .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
580         .guard_size = PINEVIEW_GUARD_WM,
581         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
582 };
583 static const struct intel_watermark_params pineview_cursor_wm = {
584         .fifo_size = PINEVIEW_CURSOR_FIFO,
585         .max_wm = PINEVIEW_CURSOR_MAX_WM,
586         .default_wm = PINEVIEW_CURSOR_DFT_WM,
587         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
588         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
589 };
590 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
591         .fifo_size = PINEVIEW_CURSOR_FIFO,
592         .max_wm = PINEVIEW_CURSOR_MAX_WM,
593         .default_wm = PINEVIEW_CURSOR_DFT_WM,
594         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
595         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
596 };
597 static const struct intel_watermark_params i965_cursor_wm_info = {
598         .fifo_size = I965_CURSOR_FIFO,
599         .max_wm = I965_CURSOR_MAX_WM,
600         .default_wm = I965_CURSOR_DFT_WM,
601         .guard_size = 2,
602         .cacheline_size = I915_FIFO_LINE_SIZE,
603 };
604 static const struct intel_watermark_params i945_wm_info = {
605         .fifo_size = I945_FIFO_SIZE,
606         .max_wm = I915_MAX_WM,
607         .default_wm = 1,
608         .guard_size = 2,
609         .cacheline_size = I915_FIFO_LINE_SIZE,
610 };
611 static const struct intel_watermark_params i915_wm_info = {
612         .fifo_size = I915_FIFO_SIZE,
613         .max_wm = I915_MAX_WM,
614         .default_wm = 1,
615         .guard_size = 2,
616         .cacheline_size = I915_FIFO_LINE_SIZE,
617 };
618 static const struct intel_watermark_params i830_a_wm_info = {
619         .fifo_size = I855GM_FIFO_SIZE,
620         .max_wm = I915_MAX_WM,
621         .default_wm = 1,
622         .guard_size = 2,
623         .cacheline_size = I830_FIFO_LINE_SIZE,
624 };
625 static const struct intel_watermark_params i830_bc_wm_info = {
626         .fifo_size = I855GM_FIFO_SIZE,
627         .max_wm = I915_MAX_WM/2,
628         .default_wm = 1,
629         .guard_size = 2,
630         .cacheline_size = I830_FIFO_LINE_SIZE,
631 };
632 static const struct intel_watermark_params i845_wm_info = {
633         .fifo_size = I830_FIFO_SIZE,
634         .max_wm = I915_MAX_WM,
635         .default_wm = 1,
636         .guard_size = 2,
637         .cacheline_size = I830_FIFO_LINE_SIZE,
638 };
639
640 /**
641  * intel_wm_method1 - Method 1 / "small buffer" watermark formula
642  * @pixel_rate: Pipe pixel rate in kHz
643  * @cpp: Plane bytes per pixel
644  * @latency: Memory wakeup latency in 0.1us units
645  *
646  * Compute the watermark using the method 1 or "small buffer"
647  * formula. The caller may additonally add extra cachelines
648  * to account for TLB misses and clock crossings.
649  *
650  * This method is concerned with the short term drain rate
651  * of the FIFO, ie. it does not account for blanking periods
652  * which would effectively reduce the average drain rate across
653  * a longer period. The name "small" refers to the fact the
654  * FIFO is relatively small compared to the amount of data
655  * fetched.
656  *
657  * The FIFO level vs. time graph might look something like:
658  *
659  *   |\   |\
660  *   | \  | \
661  * __---__---__ (- plane active, _ blanking)
662  * -> time
663  *
664  * or perhaps like this:
665  *
666  *   |\|\  |\|\
667  * __----__----__ (- plane active, _ blanking)
668  * -> time
669  *
670  * Returns:
671  * The watermark in bytes
672  */
673 static unsigned int intel_wm_method1(unsigned int pixel_rate,
674                                      unsigned int cpp,
675                                      unsigned int latency)
676 {
677         uint64_t ret;
678
679         ret = (uint64_t) pixel_rate * cpp * latency;
680         ret = DIV_ROUND_UP_ULL(ret, 10000);
681
682         return ret;
683 }
684
685 /**
686  * intel_wm_method2 - Method 2 / "large buffer" watermark formula
687  * @pixel_rate: Pipe pixel rate in kHz
688  * @htotal: Pipe horizontal total
689  * @width: Plane width in pixels
690  * @cpp: Plane bytes per pixel
691  * @latency: Memory wakeup latency in 0.1us units
692  *
693  * Compute the watermark using the method 2 or "large buffer"
694  * formula. The caller may additonally add extra cachelines
695  * to account for TLB misses and clock crossings.
696  *
697  * This method is concerned with the long term drain rate
698  * of the FIFO, ie. it does account for blanking periods
699  * which effectively reduce the average drain rate across
700  * a longer period. The name "large" refers to the fact the
701  * FIFO is relatively large compared to the amount of data
702  * fetched.
703  *
704  * The FIFO level vs. time graph might look something like:
705  *
706  *    |\___       |\___
707  *    |    \___   |    \___
708  *    |        \  |        \
709  * __ --__--__--__--__--__--__ (- plane active, _ blanking)
710  * -> time
711  *
712  * Returns:
713  * The watermark in bytes
714  */
715 static unsigned int intel_wm_method2(unsigned int pixel_rate,
716                                      unsigned int htotal,
717                                      unsigned int width,
718                                      unsigned int cpp,
719                                      unsigned int latency)
720 {
721         unsigned int ret;
722
723         /*
724          * FIXME remove once all users are computing
725          * watermarks in the correct place.
726          */
727         if (WARN_ON_ONCE(htotal == 0))
728                 htotal = 1;
729
730         ret = (latency * pixel_rate) / (htotal * 10000);
731         ret = (ret + 1) * width * cpp;
732
733         return ret;
734 }
735
736 /**
737  * intel_calculate_wm - calculate watermark level
738  * @pixel_rate: pixel clock
739  * @wm: chip FIFO params
740  * @fifo_size: size of the FIFO buffer
741  * @cpp: bytes per pixel
742  * @latency_ns: memory latency for the platform
743  *
744  * Calculate the watermark level (the level at which the display plane will
745  * start fetching from memory again).  Each chip has a different display
746  * FIFO size and allocation, so the caller needs to figure that out and pass
747  * in the correct intel_watermark_params structure.
748  *
749  * As the pixel clock runs, the FIFO will be drained at a rate that depends
750  * on the pixel size.  When it reaches the watermark level, it'll start
751  * fetching FIFO line sized based chunks from memory until the FIFO fills
752  * past the watermark point.  If the FIFO drains completely, a FIFO underrun
753  * will occur, and a display engine hang could result.
754  */
755 static unsigned int intel_calculate_wm(int pixel_rate,
756                                        const struct intel_watermark_params *wm,
757                                        int fifo_size, int cpp,
758                                        unsigned int latency_ns)
759 {
760         int entries, wm_size;
761
762         /*
763          * Note: we need to make sure we don't overflow for various clock &
764          * latency values.
765          * clocks go from a few thousand to several hundred thousand.
766          * latency is usually a few thousand
767          */
768         entries = intel_wm_method1(pixel_rate, cpp,
769                                    latency_ns / 100);
770         entries = DIV_ROUND_UP(entries, wm->cacheline_size) +
771                 wm->guard_size;
772         DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries);
773
774         wm_size = fifo_size - entries;
775         DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
776
777         /* Don't promote wm_size to unsigned... */
778         if (wm_size > wm->max_wm)
779                 wm_size = wm->max_wm;
780         if (wm_size <= 0)
781                 wm_size = wm->default_wm;
782
783         /*
784          * Bspec seems to indicate that the value shouldn't be lower than
785          * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
786          * Lets go for 8 which is the burst size since certain platforms
787          * already use a hardcoded 8 (which is what the spec says should be
788          * done).
789          */
790         if (wm_size <= 8)
791                 wm_size = 8;
792
793         return wm_size;
794 }
795
796 static bool is_disabling(int old, int new, int threshold)
797 {
798         return old >= threshold && new < threshold;
799 }
800
801 static bool is_enabling(int old, int new, int threshold)
802 {
803         return old < threshold && new >= threshold;
804 }
805
806 static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
807 {
808         return dev_priv->wm.max_level + 1;
809 }
810
811 static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
812                                    const struct intel_plane_state *plane_state)
813 {
814         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
815
816         /* FIXME check the 'enable' instead */
817         if (!crtc_state->base.active)
818                 return false;
819
820         /*
821          * Treat cursor with fb as always visible since cursor updates
822          * can happen faster than the vrefresh rate, and the current
823          * watermark code doesn't handle that correctly. Cursor updates
824          * which set/clear the fb or change the cursor size are going
825          * to get throttled by intel_legacy_cursor_update() to work
826          * around this problem with the watermark code.
827          */
828         if (plane->id == PLANE_CURSOR)
829                 return plane_state->base.fb != NULL;
830         else
831                 return plane_state->base.visible;
832 }
833
834 static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
835 {
836         struct intel_crtc *crtc, *enabled = NULL;
837
838         for_each_intel_crtc(&dev_priv->drm, crtc) {
839                 if (intel_crtc_active(crtc)) {
840                         if (enabled)
841                                 return NULL;
842                         enabled = crtc;
843                 }
844         }
845
846         return enabled;
847 }
848
849 static void pineview_update_wm(struct intel_crtc *unused_crtc)
850 {
851         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
852         struct intel_crtc *crtc;
853         const struct cxsr_latency *latency;
854         u32 reg;
855         unsigned int wm;
856
857         latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
858                                          dev_priv->is_ddr3,
859                                          dev_priv->fsb_freq,
860                                          dev_priv->mem_freq);
861         if (!latency) {
862                 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
863                 intel_set_memory_cxsr(dev_priv, false);
864                 return;
865         }
866
867         crtc = single_enabled_crtc(dev_priv);
868         if (crtc) {
869                 const struct drm_display_mode *adjusted_mode =
870                         &crtc->config->base.adjusted_mode;
871                 const struct drm_framebuffer *fb =
872                         crtc->base.primary->state->fb;
873                 int cpp = fb->format->cpp[0];
874                 int clock = adjusted_mode->crtc_clock;
875
876                 /* Display SR */
877                 wm = intel_calculate_wm(clock, &pineview_display_wm,
878                                         pineview_display_wm.fifo_size,
879                                         cpp, latency->display_sr);
880                 reg = I915_READ(DSPFW1);
881                 reg &= ~DSPFW_SR_MASK;
882                 reg |= FW_WM(wm, SR);
883                 I915_WRITE(DSPFW1, reg);
884                 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
885
886                 /* cursor SR */
887                 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
888                                         pineview_display_wm.fifo_size,
889                                         4, latency->cursor_sr);
890                 reg = I915_READ(DSPFW3);
891                 reg &= ~DSPFW_CURSOR_SR_MASK;
892                 reg |= FW_WM(wm, CURSOR_SR);
893                 I915_WRITE(DSPFW3, reg);
894
895                 /* Display HPLL off SR */
896                 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
897                                         pineview_display_hplloff_wm.fifo_size,
898                                         cpp, latency->display_hpll_disable);
899                 reg = I915_READ(DSPFW3);
900                 reg &= ~DSPFW_HPLL_SR_MASK;
901                 reg |= FW_WM(wm, HPLL_SR);
902                 I915_WRITE(DSPFW3, reg);
903
904                 /* cursor HPLL off SR */
905                 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
906                                         pineview_display_hplloff_wm.fifo_size,
907                                         4, latency->cursor_hpll_disable);
908                 reg = I915_READ(DSPFW3);
909                 reg &= ~DSPFW_HPLL_CURSOR_MASK;
910                 reg |= FW_WM(wm, HPLL_CURSOR);
911                 I915_WRITE(DSPFW3, reg);
912                 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
913
914                 intel_set_memory_cxsr(dev_priv, true);
915         } else {
916                 intel_set_memory_cxsr(dev_priv, false);
917         }
918 }
919
920 /*
921  * Documentation says:
922  * "If the line size is small, the TLB fetches can get in the way of the
923  *  data fetches, causing some lag in the pixel data return which is not
924  *  accounted for in the above formulas. The following adjustment only
925  *  needs to be applied if eight whole lines fit in the buffer at once.
926  *  The WM is adjusted upwards by the difference between the FIFO size
927  *  and the size of 8 whole lines. This adjustment is always performed
928  *  in the actual pixel depth regardless of whether FBC is enabled or not."
929  */
930 static unsigned int g4x_tlb_miss_wa(int fifo_size, int width, int cpp)
931 {
932         int tlb_miss = fifo_size * 64 - width * cpp * 8;
933
934         return max(0, tlb_miss);
935 }
936
937 static void g4x_write_wm_values(struct drm_i915_private *dev_priv,
938                                 const struct g4x_wm_values *wm)
939 {
940         enum pipe pipe;
941
942         for_each_pipe(dev_priv, pipe)
943                 trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
944
945         I915_WRITE(DSPFW1,
946                    FW_WM(wm->sr.plane, SR) |
947                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
948                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
949                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
950         I915_WRITE(DSPFW2,
951                    (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) |
952                    FW_WM(wm->sr.fbc, FBC_SR) |
953                    FW_WM(wm->hpll.fbc, FBC_HPLL_SR) |
954                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) |
955                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
956                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
957         I915_WRITE(DSPFW3,
958                    (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) |
959                    FW_WM(wm->sr.cursor, CURSOR_SR) |
960                    FW_WM(wm->hpll.cursor, HPLL_CURSOR) |
961                    FW_WM(wm->hpll.plane, HPLL_SR));
962
963         POSTING_READ(DSPFW1);
964 }
965
966 #define FW_WM_VLV(value, plane) \
967         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
968
969 static void vlv_write_wm_values(struct drm_i915_private *dev_priv,
970                                 const struct vlv_wm_values *wm)
971 {
972         enum pipe pipe;
973
974         for_each_pipe(dev_priv, pipe) {
975                 trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
976
977                 I915_WRITE(VLV_DDL(pipe),
978                            (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) |
979                            (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) |
980                            (wm->ddl[pipe].plane[PLANE_SPRITE0] << DDL_SPRITE_SHIFT(0)) |
981                            (wm->ddl[pipe].plane[PLANE_PRIMARY] << DDL_PLANE_SHIFT));
982         }
983
984         /*
985          * Zero the (unused) WM1 watermarks, and also clear all the
986          * high order bits so that there are no out of bounds values
987          * present in the registers during the reprogramming.
988          */
989         I915_WRITE(DSPHOWM, 0);
990         I915_WRITE(DSPHOWM1, 0);
991         I915_WRITE(DSPFW4, 0);
992         I915_WRITE(DSPFW5, 0);
993         I915_WRITE(DSPFW6, 0);
994
995         I915_WRITE(DSPFW1,
996                    FW_WM(wm->sr.plane, SR) |
997                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
998                    FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
999                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
1000         I915_WRITE(DSPFW2,
1001                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE1], SPRITEB) |
1002                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
1003                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
1004         I915_WRITE(DSPFW3,
1005                    FW_WM(wm->sr.cursor, CURSOR_SR));
1006
1007         if (IS_CHERRYVIEW(dev_priv)) {
1008                 I915_WRITE(DSPFW7_CHV,
1009                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1010                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1011                 I915_WRITE(DSPFW8_CHV,
1012                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE1], SPRITEF) |
1013                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE0], SPRITEE));
1014                 I915_WRITE(DSPFW9_CHV,
1015                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_PRIMARY], PLANEC) |
1016                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_CURSOR], CURSORC));
1017                 I915_WRITE(DSPHOWM,
1018                            FW_WM(wm->sr.plane >> 9, SR_HI) |
1019                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE1] >> 8, SPRITEF_HI) |
1020                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE0] >> 8, SPRITEE_HI) |
1021                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_PRIMARY] >> 8, PLANEC_HI) |
1022                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1023                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1024                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1025                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1026                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1027                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1028         } else {
1029                 I915_WRITE(DSPFW7,
1030                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1031                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1032                 I915_WRITE(DSPHOWM,
1033                            FW_WM(wm->sr.plane >> 9, SR_HI) |
1034                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1035                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1036                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1037                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1038                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1039                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1040         }
1041
1042         POSTING_READ(DSPFW1);
1043 }
1044
1045 #undef FW_WM_VLV
1046
1047 static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv)
1048 {
1049         /* all latencies in usec */
1050         dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5;
1051         dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12;
1052         dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35;
1053
1054         dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL;
1055 }
1056
1057 static int g4x_plane_fifo_size(enum plane_id plane_id, int level)
1058 {
1059         /*
1060          * DSPCNTR[13] supposedly controls whether the
1061          * primary plane can use the FIFO space otherwise
1062          * reserved for the sprite plane. It's not 100% clear
1063          * what the actual FIFO size is, but it looks like we
1064          * can happily set both primary and sprite watermarks
1065          * up to 127 cachelines. So that would seem to mean
1066          * that either DSPCNTR[13] doesn't do anything, or that
1067          * the total FIFO is >= 256 cachelines in size. Either
1068          * way, we don't seem to have to worry about this
1069          * repartitioning as the maximum watermark value the
1070          * register can hold for each plane is lower than the
1071          * minimum FIFO size.
1072          */
1073         switch (plane_id) {
1074         case PLANE_CURSOR:
1075                 return 63;
1076         case PLANE_PRIMARY:
1077                 return level == G4X_WM_LEVEL_NORMAL ? 127 : 511;
1078         case PLANE_SPRITE0:
1079                 return level == G4X_WM_LEVEL_NORMAL ? 127 : 0;
1080         default:
1081                 MISSING_CASE(plane_id);
1082                 return 0;
1083         }
1084 }
1085
1086 static int g4x_fbc_fifo_size(int level)
1087 {
1088         switch (level) {
1089         case G4X_WM_LEVEL_SR:
1090                 return 7;
1091         case G4X_WM_LEVEL_HPLL:
1092                 return 15;
1093         default:
1094                 MISSING_CASE(level);
1095                 return 0;
1096         }
1097 }
1098
1099 static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state,
1100                                const struct intel_plane_state *plane_state,
1101                                int level)
1102 {
1103         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1104         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1105         const struct drm_display_mode *adjusted_mode =
1106                 &crtc_state->base.adjusted_mode;
1107         unsigned int latency = dev_priv->wm.pri_latency[level] * 10;
1108         unsigned int clock, htotal, cpp, width, wm;
1109
1110         if (latency == 0)
1111                 return USHRT_MAX;
1112
1113         if (!intel_wm_plane_visible(crtc_state, plane_state))
1114                 return 0;
1115
1116         /*
1117          * Not 100% sure which way ELK should go here as the
1118          * spec only says CL/CTG should assume 32bpp and BW
1119          * doesn't need to. But as these things followed the
1120          * mobile vs. desktop lines on gen3 as well, let's
1121          * assume ELK doesn't need this.
1122          *
1123          * The spec also fails to list such a restriction for
1124          * the HPLL watermark, which seems a little strange.
1125          * Let's use 32bpp for the HPLL watermark as well.
1126          */
1127         if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY &&
1128             level != G4X_WM_LEVEL_NORMAL)
1129                 cpp = 4;
1130         else
1131                 cpp = plane_state->base.fb->format->cpp[0];
1132
1133         clock = adjusted_mode->crtc_clock;
1134         htotal = adjusted_mode->crtc_htotal;
1135
1136         if (plane->id == PLANE_CURSOR)
1137                 width = plane_state->base.crtc_w;
1138         else
1139                 width = drm_rect_width(&plane_state->base.dst);
1140
1141         if (plane->id == PLANE_CURSOR) {
1142                 wm = intel_wm_method2(clock, htotal, width, cpp, latency);
1143         } else if (plane->id == PLANE_PRIMARY &&
1144                    level == G4X_WM_LEVEL_NORMAL) {
1145                 wm = intel_wm_method1(clock, cpp, latency);
1146         } else {
1147                 unsigned int small, large;
1148
1149                 small = intel_wm_method1(clock, cpp, latency);
1150                 large = intel_wm_method2(clock, htotal, width, cpp, latency);
1151
1152                 wm = min(small, large);
1153         }
1154
1155         wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level),
1156                               width, cpp);
1157
1158         wm = DIV_ROUND_UP(wm, 64) + 2;
1159
1160         return min_t(unsigned int, wm, USHRT_MAX);
1161 }
1162
1163 static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1164                                  int level, enum plane_id plane_id, u16 value)
1165 {
1166         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1167         bool dirty = false;
1168
1169         for (; level < intel_wm_num_levels(dev_priv); level++) {
1170                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1171
1172                 dirty |= raw->plane[plane_id] != value;
1173                 raw->plane[plane_id] = value;
1174         }
1175
1176         return dirty;
1177 }
1178
1179 static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state,
1180                                int level, u16 value)
1181 {
1182         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1183         bool dirty = false;
1184
1185         /* NORMAL level doesn't have an FBC watermark */
1186         level = max(level, G4X_WM_LEVEL_SR);
1187
1188         for (; level < intel_wm_num_levels(dev_priv); level++) {
1189                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1190
1191                 dirty |= raw->fbc != value;
1192                 raw->fbc = value;
1193         }
1194
1195         return dirty;
1196 }
1197
1198 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
1199                                    const struct intel_plane_state *pstate,
1200                                    uint32_t pri_val);
1201
1202 static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1203                                      const struct intel_plane_state *plane_state)
1204 {
1205         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1206         int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1207         enum plane_id plane_id = plane->id;
1208         bool dirty = false;
1209         int level;
1210
1211         if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1212                 dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1213                 if (plane_id == PLANE_PRIMARY)
1214                         dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0);
1215                 goto out;
1216         }
1217
1218         for (level = 0; level < num_levels; level++) {
1219                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1220                 int wm, max_wm;
1221
1222                 wm = g4x_compute_wm(crtc_state, plane_state, level);
1223                 max_wm = g4x_plane_fifo_size(plane_id, level);
1224
1225                 if (wm > max_wm)
1226                         break;
1227
1228                 dirty |= raw->plane[plane_id] != wm;
1229                 raw->plane[plane_id] = wm;
1230
1231                 if (plane_id != PLANE_PRIMARY ||
1232                     level == G4X_WM_LEVEL_NORMAL)
1233                         continue;
1234
1235                 wm = ilk_compute_fbc_wm(crtc_state, plane_state,
1236                                         raw->plane[plane_id]);
1237                 max_wm = g4x_fbc_fifo_size(level);
1238
1239                 /*
1240                  * FBC wm is not mandatory as we
1241                  * can always just disable its use.
1242                  */
1243                 if (wm > max_wm)
1244                         wm = USHRT_MAX;
1245
1246                 dirty |= raw->fbc != wm;
1247                 raw->fbc = wm;
1248         }
1249
1250         /* mark watermarks as invalid */
1251         dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1252
1253         if (plane_id == PLANE_PRIMARY)
1254                 dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
1255
1256  out:
1257         if (dirty) {
1258                 DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
1259                               plane->base.name,
1260                               crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
1261                               crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
1262                               crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
1263
1264                 if (plane_id == PLANE_PRIMARY)
1265                         DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
1266                                       crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
1267                                       crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
1268         }
1269
1270         return dirty;
1271 }
1272
1273 static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1274                                       enum plane_id plane_id, int level)
1275 {
1276         const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1277
1278         return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level);
1279 }
1280
1281 static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
1282                                      int level)
1283 {
1284         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1285
1286         if (level > dev_priv->wm.max_level)
1287                 return false;
1288
1289         return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1290                 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1291                 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1292 }
1293
1294 /* mark all levels starting from 'level' as invalid */
1295 static void g4x_invalidate_wms(struct intel_crtc *crtc,
1296                                struct g4x_wm_state *wm_state, int level)
1297 {
1298         if (level <= G4X_WM_LEVEL_NORMAL) {
1299                 enum plane_id plane_id;
1300
1301                 for_each_plane_id_on_crtc(crtc, plane_id)
1302                         wm_state->wm.plane[plane_id] = USHRT_MAX;
1303         }
1304
1305         if (level <= G4X_WM_LEVEL_SR) {
1306                 wm_state->cxsr = false;
1307                 wm_state->sr.cursor = USHRT_MAX;
1308                 wm_state->sr.plane = USHRT_MAX;
1309                 wm_state->sr.fbc = USHRT_MAX;
1310         }
1311
1312         if (level <= G4X_WM_LEVEL_HPLL) {
1313                 wm_state->hpll_en = false;
1314                 wm_state->hpll.cursor = USHRT_MAX;
1315                 wm_state->hpll.plane = USHRT_MAX;
1316                 wm_state->hpll.fbc = USHRT_MAX;
1317         }
1318 }
1319
1320 static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1321 {
1322         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1323         struct intel_atomic_state *state =
1324                 to_intel_atomic_state(crtc_state->base.state);
1325         struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
1326         int num_active_planes = hweight32(crtc_state->active_planes &
1327                                           ~BIT(PLANE_CURSOR));
1328         const struct g4x_pipe_wm *raw;
1329         const struct intel_plane_state *old_plane_state;
1330         const struct intel_plane_state *new_plane_state;
1331         struct intel_plane *plane;
1332         enum plane_id plane_id;
1333         int i, level;
1334         unsigned int dirty = 0;
1335
1336         for_each_oldnew_intel_plane_in_state(state, plane,
1337                                              old_plane_state,
1338                                              new_plane_state, i) {
1339                 if (new_plane_state->base.crtc != &crtc->base &&
1340                     old_plane_state->base.crtc != &crtc->base)
1341                         continue;
1342
1343                 if (g4x_raw_plane_wm_compute(crtc_state, new_plane_state))
1344                         dirty |= BIT(plane->id);
1345         }
1346
1347         if (!dirty)
1348                 return 0;
1349
1350         level = G4X_WM_LEVEL_NORMAL;
1351         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1352                 goto out;
1353
1354         raw = &crtc_state->wm.g4x.raw[level];
1355         for_each_plane_id_on_crtc(crtc, plane_id)
1356                 wm_state->wm.plane[plane_id] = raw->plane[plane_id];
1357
1358         level = G4X_WM_LEVEL_SR;
1359
1360         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1361                 goto out;
1362
1363         raw = &crtc_state->wm.g4x.raw[level];
1364         wm_state->sr.plane = raw->plane[PLANE_PRIMARY];
1365         wm_state->sr.cursor = raw->plane[PLANE_CURSOR];
1366         wm_state->sr.fbc = raw->fbc;
1367
1368         wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY);
1369
1370         level = G4X_WM_LEVEL_HPLL;
1371
1372         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1373                 goto out;
1374
1375         raw = &crtc_state->wm.g4x.raw[level];
1376         wm_state->hpll.plane = raw->plane[PLANE_PRIMARY];
1377         wm_state->hpll.cursor = raw->plane[PLANE_CURSOR];
1378         wm_state->hpll.fbc = raw->fbc;
1379
1380         wm_state->hpll_en = wm_state->cxsr;
1381
1382         level++;
1383
1384  out:
1385         if (level == G4X_WM_LEVEL_NORMAL)
1386                 return -EINVAL;
1387
1388         /* invalidate the higher levels */
1389         g4x_invalidate_wms(crtc, wm_state, level);
1390
1391         /*
1392          * Determine if the FBC watermark(s) can be used. IF
1393          * this isn't the case we prefer to disable the FBC
1394          ( watermark(s) rather than disable the SR/HPLL
1395          * level(s) entirely.
1396          */
1397         wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL;
1398
1399         if (level >= G4X_WM_LEVEL_SR &&
1400             wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR))
1401                 wm_state->fbc_en = false;
1402         else if (level >= G4X_WM_LEVEL_HPLL &&
1403                  wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL))
1404                 wm_state->fbc_en = false;
1405
1406         return 0;
1407 }
1408
1409 static int g4x_compute_intermediate_wm(struct drm_device *dev,
1410                                        struct intel_crtc *crtc,
1411                                        struct intel_crtc_state *new_crtc_state)
1412 {
1413         struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate;
1414         const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal;
1415         struct intel_atomic_state *intel_state =
1416                 to_intel_atomic_state(new_crtc_state->base.state);
1417         const struct intel_crtc_state *old_crtc_state =
1418                 intel_atomic_get_old_crtc_state(intel_state, crtc);
1419         const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal;
1420         enum plane_id plane_id;
1421
1422         if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
1423                 *intermediate = *optimal;
1424
1425                 intermediate->cxsr = false;
1426                 intermediate->hpll_en = false;
1427                 goto out;
1428         }
1429
1430         intermediate->cxsr = optimal->cxsr && active->cxsr &&
1431                 !new_crtc_state->disable_cxsr;
1432         intermediate->hpll_en = optimal->hpll_en && active->hpll_en &&
1433                 !new_crtc_state->disable_cxsr;
1434         intermediate->fbc_en = optimal->fbc_en && active->fbc_en;
1435
1436         for_each_plane_id_on_crtc(crtc, plane_id) {
1437                 intermediate->wm.plane[plane_id] =
1438                         max(optimal->wm.plane[plane_id],
1439                             active->wm.plane[plane_id]);
1440
1441                 WARN_ON(intermediate->wm.plane[plane_id] >
1442                         g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL));
1443         }
1444
1445         intermediate->sr.plane = max(optimal->sr.plane,
1446                                      active->sr.plane);
1447         intermediate->sr.cursor = max(optimal->sr.cursor,
1448                                       active->sr.cursor);
1449         intermediate->sr.fbc = max(optimal->sr.fbc,
1450                                    active->sr.fbc);
1451
1452         intermediate->hpll.plane = max(optimal->hpll.plane,
1453                                        active->hpll.plane);
1454         intermediate->hpll.cursor = max(optimal->hpll.cursor,
1455                                         active->hpll.cursor);
1456         intermediate->hpll.fbc = max(optimal->hpll.fbc,
1457                                      active->hpll.fbc);
1458
1459         WARN_ON((intermediate->sr.plane >
1460                  g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) ||
1461                  intermediate->sr.cursor >
1462                  g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) &&
1463                 intermediate->cxsr);
1464         WARN_ON((intermediate->sr.plane >
1465                  g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) ||
1466                  intermediate->sr.cursor >
1467                  g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) &&
1468                 intermediate->hpll_en);
1469
1470         WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) &&
1471                 intermediate->fbc_en && intermediate->cxsr);
1472         WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) &&
1473                 intermediate->fbc_en && intermediate->hpll_en);
1474
1475 out:
1476         /*
1477          * If our intermediate WM are identical to the final WM, then we can
1478          * omit the post-vblank programming; only update if it's different.
1479          */
1480         if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
1481                 new_crtc_state->wm.need_postvbl_update = true;
1482
1483         return 0;
1484 }
1485
1486 static void g4x_merge_wm(struct drm_i915_private *dev_priv,
1487                          struct g4x_wm_values *wm)
1488 {
1489         struct intel_crtc *crtc;
1490         int num_active_crtcs = 0;
1491
1492         wm->cxsr = true;
1493         wm->hpll_en = true;
1494         wm->fbc_en = true;
1495
1496         for_each_intel_crtc(&dev_priv->drm, crtc) {
1497                 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1498
1499                 if (!crtc->active)
1500                         continue;
1501
1502                 if (!wm_state->cxsr)
1503                         wm->cxsr = false;
1504                 if (!wm_state->hpll_en)
1505                         wm->hpll_en = false;
1506                 if (!wm_state->fbc_en)
1507                         wm->fbc_en = false;
1508
1509                 num_active_crtcs++;
1510         }
1511
1512         if (num_active_crtcs != 1) {
1513                 wm->cxsr = false;
1514                 wm->hpll_en = false;
1515                 wm->fbc_en = false;
1516         }
1517
1518         for_each_intel_crtc(&dev_priv->drm, crtc) {
1519                 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1520                 enum pipe pipe = crtc->pipe;
1521
1522                 wm->pipe[pipe] = wm_state->wm;
1523                 if (crtc->active && wm->cxsr)
1524                         wm->sr = wm_state->sr;
1525                 if (crtc->active && wm->hpll_en)
1526                         wm->hpll = wm_state->hpll;
1527         }
1528 }
1529
1530 static void g4x_program_watermarks(struct drm_i915_private *dev_priv)
1531 {
1532         struct g4x_wm_values *old_wm = &dev_priv->wm.g4x;
1533         struct g4x_wm_values new_wm = {};
1534
1535         g4x_merge_wm(dev_priv, &new_wm);
1536
1537         if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
1538                 return;
1539
1540         if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
1541                 _intel_set_memory_cxsr(dev_priv, false);
1542
1543         g4x_write_wm_values(dev_priv, &new_wm);
1544
1545         if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
1546                 _intel_set_memory_cxsr(dev_priv, true);
1547
1548         *old_wm = new_wm;
1549 }
1550
1551 static void g4x_initial_watermarks(struct intel_atomic_state *state,
1552                                    struct intel_crtc_state *crtc_state)
1553 {
1554         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1555         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1556
1557         mutex_lock(&dev_priv->wm.wm_mutex);
1558         crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate;
1559         g4x_program_watermarks(dev_priv);
1560         mutex_unlock(&dev_priv->wm.wm_mutex);
1561 }
1562
1563 static void g4x_optimize_watermarks(struct intel_atomic_state *state,
1564                                     struct intel_crtc_state *crtc_state)
1565 {
1566         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1567         struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
1568
1569         if (!crtc_state->wm.need_postvbl_update)
1570                 return;
1571
1572         mutex_lock(&dev_priv->wm.wm_mutex);
1573         intel_crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
1574         g4x_program_watermarks(dev_priv);
1575         mutex_unlock(&dev_priv->wm.wm_mutex);
1576 }
1577
1578 /* latency must be in 0.1us units. */
1579 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
1580                                    unsigned int htotal,
1581                                    unsigned int width,
1582                                    unsigned int cpp,
1583                                    unsigned int latency)
1584 {
1585         unsigned int ret;
1586
1587         ret = intel_wm_method2(pixel_rate, htotal,
1588                                width, cpp, latency);
1589         ret = DIV_ROUND_UP(ret, 64);
1590
1591         return ret;
1592 }
1593
1594 static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv)
1595 {
1596         /* all latencies in usec */
1597         dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
1598
1599         dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
1600
1601         if (IS_CHERRYVIEW(dev_priv)) {
1602                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
1603                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
1604
1605                 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
1606         }
1607 }
1608
1609 static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
1610                                      const struct intel_plane_state *plane_state,
1611                                      int level)
1612 {
1613         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1614         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1615         const struct drm_display_mode *adjusted_mode =
1616                 &crtc_state->base.adjusted_mode;
1617         unsigned int clock, htotal, cpp, width, wm;
1618
1619         if (dev_priv->wm.pri_latency[level] == 0)
1620                 return USHRT_MAX;
1621
1622         if (!intel_wm_plane_visible(crtc_state, plane_state))
1623                 return 0;
1624
1625         cpp = plane_state->base.fb->format->cpp[0];
1626         clock = adjusted_mode->crtc_clock;
1627         htotal = adjusted_mode->crtc_htotal;
1628         width = crtc_state->pipe_src_w;
1629
1630         if (plane->id == PLANE_CURSOR) {
1631                 /*
1632                  * FIXME the formula gives values that are
1633                  * too big for the cursor FIFO, and hence we
1634                  * would never be able to use cursors. For
1635                  * now just hardcode the watermark.
1636                  */
1637                 wm = 63;
1638         } else {
1639                 wm = vlv_wm_method2(clock, htotal, width, cpp,
1640                                     dev_priv->wm.pri_latency[level] * 10);
1641         }
1642
1643         return min_t(unsigned int, wm, USHRT_MAX);
1644 }
1645
1646 static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
1647 {
1648         return (active_planes & (BIT(PLANE_SPRITE0) |
1649                                  BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1);
1650 }
1651
1652 static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
1653 {
1654         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1655         const struct g4x_pipe_wm *raw =
1656                 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
1657         struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
1658         unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
1659         int num_active_planes = hweight32(active_planes);
1660         const int fifo_size = 511;
1661         int fifo_extra, fifo_left = fifo_size;
1662         int sprite0_fifo_extra = 0;
1663         unsigned int total_rate;
1664         enum plane_id plane_id;
1665
1666         /*
1667          * When enabling sprite0 after sprite1 has already been enabled
1668          * we tend to get an underrun unless sprite0 already has some
1669          * FIFO space allcoated. Hence we always allocate at least one
1670          * cacheline for sprite0 whenever sprite1 is enabled.
1671          *
1672          * All other plane enable sequences appear immune to this problem.
1673          */
1674         if (vlv_need_sprite0_fifo_workaround(active_planes))
1675                 sprite0_fifo_extra = 1;
1676
1677         total_rate = raw->plane[PLANE_PRIMARY] +
1678                 raw->plane[PLANE_SPRITE0] +
1679                 raw->plane[PLANE_SPRITE1] +
1680                 sprite0_fifo_extra;
1681
1682         if (total_rate > fifo_size)
1683                 return -EINVAL;
1684
1685         if (total_rate == 0)
1686                 total_rate = 1;
1687
1688         for_each_plane_id_on_crtc(crtc, plane_id) {
1689                 unsigned int rate;
1690
1691                 if ((active_planes & BIT(plane_id)) == 0) {
1692                         fifo_state->plane[plane_id] = 0;
1693                         continue;
1694                 }
1695
1696                 rate = raw->plane[plane_id];
1697                 fifo_state->plane[plane_id] = fifo_size * rate / total_rate;
1698                 fifo_left -= fifo_state->plane[plane_id];
1699         }
1700
1701         fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra;
1702         fifo_left -= sprite0_fifo_extra;
1703
1704         fifo_state->plane[PLANE_CURSOR] = 63;
1705
1706         fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1);
1707
1708         /* spread the remainder evenly */
1709         for_each_plane_id_on_crtc(crtc, plane_id) {
1710                 int plane_extra;
1711
1712                 if (fifo_left == 0)
1713                         break;
1714
1715                 if ((active_planes & BIT(plane_id)) == 0)
1716                         continue;
1717
1718                 plane_extra = min(fifo_extra, fifo_left);
1719                 fifo_state->plane[plane_id] += plane_extra;
1720                 fifo_left -= plane_extra;
1721         }
1722
1723         WARN_ON(active_planes != 0 && fifo_left != 0);
1724
1725         /* give it all to the first plane if none are active */
1726         if (active_planes == 0) {
1727                 WARN_ON(fifo_left != fifo_size);
1728                 fifo_state->plane[PLANE_PRIMARY] = fifo_left;
1729         }
1730
1731         return 0;
1732 }
1733
1734 /* mark all levels starting from 'level' as invalid */
1735 static void vlv_invalidate_wms(struct intel_crtc *crtc,
1736                                struct vlv_wm_state *wm_state, int level)
1737 {
1738         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1739
1740         for (; level < intel_wm_num_levels(dev_priv); level++) {
1741                 enum plane_id plane_id;
1742
1743                 for_each_plane_id_on_crtc(crtc, plane_id)
1744                         wm_state->wm[level].plane[plane_id] = USHRT_MAX;
1745
1746                 wm_state->sr[level].cursor = USHRT_MAX;
1747                 wm_state->sr[level].plane = USHRT_MAX;
1748         }
1749 }
1750
1751 static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size)
1752 {
1753         if (wm > fifo_size)
1754                 return USHRT_MAX;
1755         else
1756                 return fifo_size - wm;
1757 }
1758
1759 /*
1760  * Starting from 'level' set all higher
1761  * levels to 'value' in the "raw" watermarks.
1762  */
1763 static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1764                                  int level, enum plane_id plane_id, u16 value)
1765 {
1766         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1767         int num_levels = intel_wm_num_levels(dev_priv);
1768         bool dirty = false;
1769
1770         for (; level < num_levels; level++) {
1771                 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1772
1773                 dirty |= raw->plane[plane_id] != value;
1774                 raw->plane[plane_id] = value;
1775         }
1776
1777         return dirty;
1778 }
1779
1780 static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1781                                      const struct intel_plane_state *plane_state)
1782 {
1783         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1784         enum plane_id plane_id = plane->id;
1785         int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1786         int level;
1787         bool dirty = false;
1788
1789         if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1790                 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1791                 goto out;
1792         }
1793
1794         for (level = 0; level < num_levels; level++) {
1795                 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1796                 int wm = vlv_compute_wm_level(crtc_state, plane_state, level);
1797                 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511;
1798
1799                 if (wm > max_wm)
1800                         break;
1801
1802                 dirty |= raw->plane[plane_id] != wm;
1803                 raw->plane[plane_id] = wm;
1804         }
1805
1806         /* mark all higher levels as invalid */
1807         dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1808
1809 out:
1810         if (dirty)
1811                 DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
1812                               plane->base.name,
1813                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
1814                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
1815                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
1816
1817         return dirty;
1818 }
1819
1820 static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1821                                       enum plane_id plane_id, int level)
1822 {
1823         const struct g4x_pipe_wm *raw =
1824                 &crtc_state->wm.vlv.raw[level];
1825         const struct vlv_fifo_state *fifo_state =
1826                 &crtc_state->wm.vlv.fifo_state;
1827
1828         return raw->plane[plane_id] <= fifo_state->plane[plane_id];
1829 }
1830
1831 static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
1832 {
1833         return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1834                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1835                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
1836                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1837 }
1838
1839 static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1840 {
1841         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1842         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1843         struct intel_atomic_state *state =
1844                 to_intel_atomic_state(crtc_state->base.state);
1845         struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
1846         const struct vlv_fifo_state *fifo_state =
1847                 &crtc_state->wm.vlv.fifo_state;
1848         int num_active_planes = hweight32(crtc_state->active_planes &
1849                                           ~BIT(PLANE_CURSOR));
1850         bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base);
1851         const struct intel_plane_state *old_plane_state;
1852         const struct intel_plane_state *new_plane_state;
1853         struct intel_plane *plane;
1854         enum plane_id plane_id;
1855         int level, ret, i;
1856         unsigned int dirty = 0;
1857
1858         for_each_oldnew_intel_plane_in_state(state, plane,
1859                                              old_plane_state,
1860                                              new_plane_state, i) {
1861                 if (new_plane_state->base.crtc != &crtc->base &&
1862                     old_plane_state->base.crtc != &crtc->base)
1863                         continue;
1864
1865                 if (vlv_raw_plane_wm_compute(crtc_state, new_plane_state))
1866                         dirty |= BIT(plane->id);
1867         }
1868
1869         /*
1870          * DSPARB registers may have been reset due to the
1871          * power well being turned off. Make sure we restore
1872          * them to a consistent state even if no primary/sprite
1873          * planes are initially active.
1874          */
1875         if (needs_modeset)
1876                 crtc_state->fifo_changed = true;
1877
1878         if (!dirty)
1879                 return 0;
1880
1881         /* cursor changes don't warrant a FIFO recompute */
1882         if (dirty & ~BIT(PLANE_CURSOR)) {
1883                 const struct intel_crtc_state *old_crtc_state =
1884                         intel_atomic_get_old_crtc_state(state, crtc);
1885                 const struct vlv_fifo_state *old_fifo_state =
1886                         &old_crtc_state->wm.vlv.fifo_state;
1887
1888                 ret = vlv_compute_fifo(crtc_state);
1889                 if (ret)
1890                         return ret;
1891
1892                 if (needs_modeset ||
1893                     memcmp(old_fifo_state, fifo_state,
1894                            sizeof(*fifo_state)) != 0)
1895                         crtc_state->fifo_changed = true;
1896         }
1897
1898         /* initially allow all levels */
1899         wm_state->num_levels = intel_wm_num_levels(dev_priv);
1900         /*
1901          * Note that enabling cxsr with no primary/sprite planes
1902          * enabled can wedge the pipe. Hence we only allow cxsr
1903          * with exactly one enabled primary/sprite plane.
1904          */
1905         wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1;
1906
1907         for (level = 0; level < wm_state->num_levels; level++) {
1908                 const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1909                 const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
1910
1911                 if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
1912                         break;
1913
1914                 for_each_plane_id_on_crtc(crtc, plane_id) {
1915                         wm_state->wm[level].plane[plane_id] =
1916                                 vlv_invert_wm_value(raw->plane[plane_id],
1917                                                     fifo_state->plane[plane_id]);
1918                 }
1919
1920                 wm_state->sr[level].plane =
1921                         vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY],
1922                                                  raw->plane[PLANE_SPRITE0],
1923                                                  raw->plane[PLANE_SPRITE1]),
1924                                             sr_fifo_size);
1925
1926                 wm_state->sr[level].cursor =
1927                         vlv_invert_wm_value(raw->plane[PLANE_CURSOR],
1928                                             63);
1929         }
1930
1931         if (level == 0)
1932                 return -EINVAL;
1933
1934         /* limit to only levels we can actually handle */
1935         wm_state->num_levels = level;
1936
1937         /* invalidate the higher levels */
1938         vlv_invalidate_wms(crtc, wm_state, level);
1939
1940         return 0;
1941 }
1942
1943 #define VLV_FIFO(plane, value) \
1944         (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1945
1946 static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
1947                                    struct intel_crtc_state *crtc_state)
1948 {
1949         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1950         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1951         const struct vlv_fifo_state *fifo_state =
1952                 &crtc_state->wm.vlv.fifo_state;
1953         int sprite0_start, sprite1_start, fifo_size;
1954
1955         if (!crtc_state->fifo_changed)
1956                 return;
1957
1958         sprite0_start = fifo_state->plane[PLANE_PRIMARY];
1959         sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start;
1960         fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start;
1961
1962         WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63);
1963         WARN_ON(fifo_size != 511);
1964
1965         trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size);
1966
1967         /*
1968          * uncore.lock serves a double purpose here. It allows us to
1969          * use the less expensive I915_{READ,WRITE}_FW() functions, and
1970          * it protects the DSPARB registers from getting clobbered by
1971          * parallel updates from multiple pipes.
1972          *
1973          * intel_pipe_update_start() has already disabled interrupts
1974          * for us, so a plain spin_lock() is sufficient here.
1975          */
1976         spin_lock(&dev_priv->uncore.lock);
1977
1978         switch (crtc->pipe) {
1979                 uint32_t dsparb, dsparb2, dsparb3;
1980         case PIPE_A:
1981                 dsparb = I915_READ_FW(DSPARB);
1982                 dsparb2 = I915_READ_FW(DSPARB2);
1983
1984                 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1985                             VLV_FIFO(SPRITEB, 0xff));
1986                 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1987                            VLV_FIFO(SPRITEB, sprite1_start));
1988
1989                 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1990                              VLV_FIFO(SPRITEB_HI, 0x1));
1991                 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1992                            VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1993
1994                 I915_WRITE_FW(DSPARB, dsparb);
1995                 I915_WRITE_FW(DSPARB2, dsparb2);
1996                 break;
1997         case PIPE_B:
1998                 dsparb = I915_READ_FW(DSPARB);
1999                 dsparb2 = I915_READ_FW(DSPARB2);
2000
2001                 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
2002                             VLV_FIFO(SPRITED, 0xff));
2003                 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
2004                            VLV_FIFO(SPRITED, sprite1_start));
2005
2006                 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
2007                              VLV_FIFO(SPRITED_HI, 0xff));
2008                 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
2009                            VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
2010
2011                 I915_WRITE_FW(DSPARB, dsparb);
2012                 I915_WRITE_FW(DSPARB2, dsparb2);
2013                 break;
2014         case PIPE_C:
2015                 dsparb3 = I915_READ_FW(DSPARB3);
2016                 dsparb2 = I915_READ_FW(DSPARB2);
2017
2018                 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
2019                              VLV_FIFO(SPRITEF, 0xff));
2020                 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
2021                             VLV_FIFO(SPRITEF, sprite1_start));
2022
2023                 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
2024                              VLV_FIFO(SPRITEF_HI, 0xff));
2025                 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
2026                            VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
2027
2028                 I915_WRITE_FW(DSPARB3, dsparb3);
2029                 I915_WRITE_FW(DSPARB2, dsparb2);
2030                 break;
2031         default:
2032                 break;
2033         }
2034
2035         POSTING_READ_FW(DSPARB);
2036
2037         spin_unlock(&dev_priv->uncore.lock);
2038 }
2039
2040 #undef VLV_FIFO
2041
2042 static int vlv_compute_intermediate_wm(struct drm_device *dev,
2043                                        struct intel_crtc *crtc,
2044                                        struct intel_crtc_state *new_crtc_state)
2045 {
2046         struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate;
2047         const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal;
2048         struct intel_atomic_state *intel_state =
2049                 to_intel_atomic_state(new_crtc_state->base.state);
2050         const struct intel_crtc_state *old_crtc_state =
2051                 intel_atomic_get_old_crtc_state(intel_state, crtc);
2052         const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal;
2053         int level;
2054
2055         if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
2056                 *intermediate = *optimal;
2057
2058                 intermediate->cxsr = false;
2059                 goto out;
2060         }
2061
2062         intermediate->num_levels = min(optimal->num_levels, active->num_levels);
2063         intermediate->cxsr = optimal->cxsr && active->cxsr &&
2064                 !new_crtc_state->disable_cxsr;
2065
2066         for (level = 0; level < intermediate->num_levels; level++) {
2067                 enum plane_id plane_id;
2068
2069                 for_each_plane_id_on_crtc(crtc, plane_id) {
2070                         intermediate->wm[level].plane[plane_id] =
2071                                 min(optimal->wm[level].plane[plane_id],
2072                                     active->wm[level].plane[plane_id]);
2073                 }
2074
2075                 intermediate->sr[level].plane = min(optimal->sr[level].plane,
2076                                                     active->sr[level].plane);
2077                 intermediate->sr[level].cursor = min(optimal->sr[level].cursor,
2078                                                      active->sr[level].cursor);
2079         }
2080
2081         vlv_invalidate_wms(crtc, intermediate, level);
2082
2083 out:
2084         /*
2085          * If our intermediate WM are identical to the final WM, then we can
2086          * omit the post-vblank programming; only update if it's different.
2087          */
2088         if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
2089                 new_crtc_state->wm.need_postvbl_update = true;
2090
2091         return 0;
2092 }
2093
2094 static void vlv_merge_wm(struct drm_i915_private *dev_priv,
2095                          struct vlv_wm_values *wm)
2096 {
2097         struct intel_crtc *crtc;
2098         int num_active_crtcs = 0;
2099
2100         wm->level = dev_priv->wm.max_level;
2101         wm->cxsr = true;
2102
2103         for_each_intel_crtc(&dev_priv->drm, crtc) {
2104                 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2105
2106                 if (!crtc->active)
2107                         continue;
2108
2109                 if (!wm_state->cxsr)
2110                         wm->cxsr = false;
2111
2112                 num_active_crtcs++;
2113                 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
2114         }
2115
2116         if (num_active_crtcs != 1)
2117                 wm->cxsr = false;
2118
2119         if (num_active_crtcs > 1)
2120                 wm->level = VLV_WM_LEVEL_PM2;
2121
2122         for_each_intel_crtc(&dev_priv->drm, crtc) {
2123                 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2124                 enum pipe pipe = crtc->pipe;
2125
2126                 wm->pipe[pipe] = wm_state->wm[wm->level];
2127                 if (crtc->active && wm->cxsr)
2128                         wm->sr = wm_state->sr[wm->level];
2129
2130                 wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2;
2131                 wm->ddl[pipe].plane[PLANE_SPRITE0] = DDL_PRECISION_HIGH | 2;
2132                 wm->ddl[pipe].plane[PLANE_SPRITE1] = DDL_PRECISION_HIGH | 2;
2133                 wm->ddl[pipe].plane[PLANE_CURSOR] = DDL_PRECISION_HIGH | 2;
2134         }
2135 }
2136
2137 static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
2138 {
2139         struct vlv_wm_values *old_wm = &dev_priv->wm.vlv;
2140         struct vlv_wm_values new_wm = {};
2141
2142         vlv_merge_wm(dev_priv, &new_wm);
2143
2144         if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
2145                 return;
2146
2147         if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2148                 chv_set_memory_dvfs(dev_priv, false);
2149
2150         if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2151                 chv_set_memory_pm5(dev_priv, false);
2152
2153         if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
2154                 _intel_set_memory_cxsr(dev_priv, false);
2155
2156         vlv_write_wm_values(dev_priv, &new_wm);
2157
2158         if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
2159                 _intel_set_memory_cxsr(dev_priv, true);
2160
2161         if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2162                 chv_set_memory_pm5(dev_priv, true);
2163
2164         if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2165                 chv_set_memory_dvfs(dev_priv, true);
2166
2167         *old_wm = new_wm;
2168 }
2169
2170 static void vlv_initial_watermarks(struct intel_atomic_state *state,
2171                                    struct intel_crtc_state *crtc_state)
2172 {
2173         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2174         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
2175
2176         mutex_lock(&dev_priv->wm.wm_mutex);
2177         crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate;
2178         vlv_program_watermarks(dev_priv);
2179         mutex_unlock(&dev_priv->wm.wm_mutex);
2180 }
2181
2182 static void vlv_optimize_watermarks(struct intel_atomic_state *state,
2183                                     struct intel_crtc_state *crtc_state)
2184 {
2185         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2186         struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
2187
2188         if (!crtc_state->wm.need_postvbl_update)
2189                 return;
2190
2191         mutex_lock(&dev_priv->wm.wm_mutex);
2192         intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
2193         vlv_program_watermarks(dev_priv);
2194         mutex_unlock(&dev_priv->wm.wm_mutex);
2195 }
2196
2197 static void i965_update_wm(struct intel_crtc *unused_crtc)
2198 {
2199         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2200         struct intel_crtc *crtc;
2201         int srwm = 1;
2202         int cursor_sr = 16;
2203         bool cxsr_enabled;
2204
2205         /* Calc sr entries for one plane configs */
2206         crtc = single_enabled_crtc(dev_priv);
2207         if (crtc) {
2208                 /* self-refresh has much higher latency */
2209                 static const int sr_latency_ns = 12000;
2210                 const struct drm_display_mode *adjusted_mode =
2211                         &crtc->config->base.adjusted_mode;
2212                 const struct drm_framebuffer *fb =
2213                         crtc->base.primary->state->fb;
2214                 int clock = adjusted_mode->crtc_clock;
2215                 int htotal = adjusted_mode->crtc_htotal;
2216                 int hdisplay = crtc->config->pipe_src_w;
2217                 int cpp = fb->format->cpp[0];
2218                 int entries;
2219
2220                 entries = intel_wm_method2(clock, htotal,
2221                                            hdisplay, cpp, sr_latency_ns / 100);
2222                 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
2223                 srwm = I965_FIFO_SIZE - entries;
2224                 if (srwm < 0)
2225                         srwm = 1;
2226                 srwm &= 0x1ff;
2227                 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
2228                               entries, srwm);
2229
2230                 entries = intel_wm_method2(clock, htotal,
2231                                            crtc->base.cursor->state->crtc_w, 4,
2232                                            sr_latency_ns / 100);
2233                 entries = DIV_ROUND_UP(entries,
2234                                        i965_cursor_wm_info.cacheline_size) +
2235                         i965_cursor_wm_info.guard_size;
2236
2237                 cursor_sr = i965_cursor_wm_info.fifo_size - entries;
2238                 if (cursor_sr > i965_cursor_wm_info.max_wm)
2239                         cursor_sr = i965_cursor_wm_info.max_wm;
2240
2241                 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
2242                               "cursor %d\n", srwm, cursor_sr);
2243
2244                 cxsr_enabled = true;
2245         } else {
2246                 cxsr_enabled = false;
2247                 /* Turn off self refresh if both pipes are enabled */
2248                 intel_set_memory_cxsr(dev_priv, false);
2249         }
2250
2251         DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
2252                       srwm);
2253
2254         /* 965 has limitations... */
2255         I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
2256                    FW_WM(8, CURSORB) |
2257                    FW_WM(8, PLANEB) |
2258                    FW_WM(8, PLANEA));
2259         I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
2260                    FW_WM(8, PLANEC_OLD));
2261         /* update cursor SR watermark */
2262         I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
2263
2264         if (cxsr_enabled)
2265                 intel_set_memory_cxsr(dev_priv, true);
2266 }
2267
2268 #undef FW_WM
2269
2270 static void i9xx_update_wm(struct intel_crtc *unused_crtc)
2271 {
2272         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2273         const struct intel_watermark_params *wm_info;
2274         uint32_t fwater_lo;
2275         uint32_t fwater_hi;
2276         int cwm, srwm = 1;
2277         int fifo_size;
2278         int planea_wm, planeb_wm;
2279         struct intel_crtc *crtc, *enabled = NULL;
2280
2281         if (IS_I945GM(dev_priv))
2282                 wm_info = &i945_wm_info;
2283         else if (!IS_GEN2(dev_priv))
2284                 wm_info = &i915_wm_info;
2285         else
2286                 wm_info = &i830_a_wm_info;
2287
2288         fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_A);
2289         crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A);
2290         if (intel_crtc_active(crtc)) {
2291                 const struct drm_display_mode *adjusted_mode =
2292                         &crtc->config->base.adjusted_mode;
2293                 const struct drm_framebuffer *fb =
2294                         crtc->base.primary->state->fb;
2295                 int cpp;
2296
2297                 if (IS_GEN2(dev_priv))
2298                         cpp = 4;
2299                 else
2300                         cpp = fb->format->cpp[0];
2301
2302                 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2303                                                wm_info, fifo_size, cpp,
2304                                                pessimal_latency_ns);
2305                 enabled = crtc;
2306         } else {
2307                 planea_wm = fifo_size - wm_info->guard_size;
2308                 if (planea_wm > (long)wm_info->max_wm)
2309                         planea_wm = wm_info->max_wm;
2310         }
2311
2312         if (IS_GEN2(dev_priv))
2313                 wm_info = &i830_bc_wm_info;
2314
2315         fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_B);
2316         crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B);
2317         if (intel_crtc_active(crtc)) {
2318                 const struct drm_display_mode *adjusted_mode =
2319                         &crtc->config->base.adjusted_mode;
2320                 const struct drm_framebuffer *fb =
2321                         crtc->base.primary->state->fb;
2322                 int cpp;
2323
2324                 if (IS_GEN2(dev_priv))
2325                         cpp = 4;
2326                 else
2327                         cpp = fb->format->cpp[0];
2328
2329                 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2330                                                wm_info, fifo_size, cpp,
2331                                                pessimal_latency_ns);
2332                 if (enabled == NULL)
2333                         enabled = crtc;
2334                 else
2335                         enabled = NULL;
2336         } else {
2337                 planeb_wm = fifo_size - wm_info->guard_size;
2338                 if (planeb_wm > (long)wm_info->max_wm)
2339                         planeb_wm = wm_info->max_wm;
2340         }
2341
2342         DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
2343
2344         if (IS_I915GM(dev_priv) && enabled) {
2345                 struct drm_i915_gem_object *obj;
2346
2347                 obj = intel_fb_obj(enabled->base.primary->state->fb);
2348
2349                 /* self-refresh seems busted with untiled */
2350                 if (!i915_gem_object_is_tiled(obj))
2351                         enabled = NULL;
2352         }
2353
2354         /*
2355          * Overlay gets an aggressive default since video jitter is bad.
2356          */
2357         cwm = 2;
2358
2359         /* Play safe and disable self-refresh before adjusting watermarks. */
2360         intel_set_memory_cxsr(dev_priv, false);
2361
2362         /* Calc sr entries for one plane configs */
2363         if (HAS_FW_BLC(dev_priv) && enabled) {
2364                 /* self-refresh has much higher latency */
2365                 static const int sr_latency_ns = 6000;
2366                 const struct drm_display_mode *adjusted_mode =
2367                         &enabled->config->base.adjusted_mode;
2368                 const struct drm_framebuffer *fb =
2369                         enabled->base.primary->state->fb;
2370                 int clock = adjusted_mode->crtc_clock;
2371                 int htotal = adjusted_mode->crtc_htotal;
2372                 int hdisplay = enabled->config->pipe_src_w;
2373                 int cpp;
2374                 int entries;
2375
2376                 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
2377                         cpp = 4;
2378                 else
2379                         cpp = fb->format->cpp[0];
2380
2381                 entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
2382                                            sr_latency_ns / 100);
2383                 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
2384                 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
2385                 srwm = wm_info->fifo_size - entries;
2386                 if (srwm < 0)
2387                         srwm = 1;
2388
2389                 if (IS_I945G(dev_priv) || IS_I945GM(dev_priv))
2390                         I915_WRITE(FW_BLC_SELF,
2391                                    FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
2392                 else
2393                         I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
2394         }
2395
2396         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
2397                       planea_wm, planeb_wm, cwm, srwm);
2398
2399         fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
2400         fwater_hi = (cwm & 0x1f);
2401
2402         /* Set request length to 8 cachelines per fetch */
2403         fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
2404         fwater_hi = fwater_hi | (1 << 8);
2405
2406         I915_WRITE(FW_BLC, fwater_lo);
2407         I915_WRITE(FW_BLC2, fwater_hi);
2408
2409         if (enabled)
2410                 intel_set_memory_cxsr(dev_priv, true);
2411 }
2412
2413 static void i845_update_wm(struct intel_crtc *unused_crtc)
2414 {
2415         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2416         struct intel_crtc *crtc;
2417         const struct drm_display_mode *adjusted_mode;
2418         uint32_t fwater_lo;
2419         int planea_wm;
2420
2421         crtc = single_enabled_crtc(dev_priv);
2422         if (crtc == NULL)
2423                 return;
2424
2425         adjusted_mode = &crtc->config->base.adjusted_mode;
2426         planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2427                                        &i845_wm_info,
2428                                        dev_priv->display.get_fifo_size(dev_priv, PLANE_A),
2429                                        4, pessimal_latency_ns);
2430         fwater_lo = I915_READ(FW_BLC) & ~0xfff;
2431         fwater_lo |= (3<<8) | planea_wm;
2432
2433         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
2434
2435         I915_WRITE(FW_BLC, fwater_lo);
2436 }
2437
2438 /* latency must be in 0.1us units. */
2439 static unsigned int ilk_wm_method1(unsigned int pixel_rate,
2440                                    unsigned int cpp,
2441                                    unsigned int latency)
2442 {
2443         unsigned int ret;
2444
2445         ret = intel_wm_method1(pixel_rate, cpp, latency);
2446         ret = DIV_ROUND_UP(ret, 64) + 2;
2447
2448         return ret;
2449 }
2450
2451 /* latency must be in 0.1us units. */
2452 static unsigned int ilk_wm_method2(unsigned int pixel_rate,
2453                                    unsigned int htotal,
2454                                    unsigned int width,
2455                                    unsigned int cpp,
2456                                    unsigned int latency)
2457 {
2458         unsigned int ret;
2459
2460         ret = intel_wm_method2(pixel_rate, htotal,
2461                                width, cpp, latency);
2462         ret = DIV_ROUND_UP(ret, 64) + 2;
2463
2464         return ret;
2465 }
2466
2467 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
2468                            uint8_t cpp)
2469 {
2470         /*
2471          * Neither of these should be possible since this function shouldn't be
2472          * called if the CRTC is off or the plane is invisible.  But let's be
2473          * extra paranoid to avoid a potential divide-by-zero if we screw up
2474          * elsewhere in the driver.
2475          */
2476         if (WARN_ON(!cpp))
2477                 return 0;
2478         if (WARN_ON(!horiz_pixels))
2479                 return 0;
2480
2481         return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2;
2482 }
2483
2484 struct ilk_wm_maximums {
2485         uint16_t pri;
2486         uint16_t spr;
2487         uint16_t cur;
2488         uint16_t fbc;
2489 };
2490
2491 /*
2492  * For both WM_PIPE and WM_LP.
2493  * mem_value must be in 0.1us units.
2494  */
2495 static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
2496                                    const struct intel_plane_state *pstate,
2497                                    uint32_t mem_value,
2498                                    bool is_lp)
2499 {
2500         uint32_t method1, method2;
2501         int cpp;
2502
2503         if (mem_value == 0)
2504                 return U32_MAX;
2505
2506         if (!intel_wm_plane_visible(cstate, pstate))
2507                 return 0;
2508
2509         cpp = pstate->base.fb->format->cpp[0];
2510
2511         method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2512
2513         if (!is_lp)
2514                 return method1;
2515
2516         method2 = ilk_wm_method2(cstate->pixel_rate,
2517                                  cstate->base.adjusted_mode.crtc_htotal,
2518                                  drm_rect_width(&pstate->base.dst),
2519                                  cpp, mem_value);
2520
2521         return min(method1, method2);
2522 }
2523
2524 /*
2525  * For both WM_PIPE and WM_LP.
2526  * mem_value must be in 0.1us units.
2527  */
2528 static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
2529                                    const struct intel_plane_state *pstate,
2530                                    uint32_t mem_value)
2531 {
2532         uint32_t method1, method2;
2533         int cpp;
2534
2535         if (mem_value == 0)
2536                 return U32_MAX;
2537
2538         if (!intel_wm_plane_visible(cstate, pstate))
2539                 return 0;
2540
2541         cpp = pstate->base.fb->format->cpp[0];
2542
2543         method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2544         method2 = ilk_wm_method2(cstate->pixel_rate,
2545                                  cstate->base.adjusted_mode.crtc_htotal,
2546                                  drm_rect_width(&pstate->base.dst),
2547                                  cpp, mem_value);
2548         return min(method1, method2);
2549 }
2550
2551 /*
2552  * For both WM_PIPE and WM_LP.
2553  * mem_value must be in 0.1us units.
2554  */
2555 static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
2556                                    const struct intel_plane_state *pstate,
2557                                    uint32_t mem_value)
2558 {
2559         int cpp;
2560
2561         if (mem_value == 0)
2562                 return U32_MAX;
2563
2564         if (!intel_wm_plane_visible(cstate, pstate))
2565                 return 0;
2566
2567         cpp = pstate->base.fb->format->cpp[0];
2568
2569         return ilk_wm_method2(cstate->pixel_rate,
2570                               cstate->base.adjusted_mode.crtc_htotal,
2571                               pstate->base.crtc_w, cpp, mem_value);
2572 }
2573
2574 /* Only for WM_LP. */
2575 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
2576                                    const struct intel_plane_state *pstate,
2577                                    uint32_t pri_val)
2578 {
2579         int cpp;
2580
2581         if (!intel_wm_plane_visible(cstate, pstate))
2582                 return 0;
2583
2584         cpp = pstate->base.fb->format->cpp[0];
2585
2586         return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp);
2587 }
2588
2589 static unsigned int
2590 ilk_display_fifo_size(const struct drm_i915_private *dev_priv)
2591 {
2592         if (INTEL_GEN(dev_priv) >= 8)
2593                 return 3072;
2594         else if (INTEL_GEN(dev_priv) >= 7)
2595                 return 768;
2596         else
2597                 return 512;
2598 }
2599
2600 static unsigned int
2601 ilk_plane_wm_reg_max(const struct drm_i915_private *dev_priv,
2602                      int level, bool is_sprite)
2603 {
2604         if (INTEL_GEN(dev_priv) >= 8)
2605                 /* BDW primary/sprite plane watermarks */
2606                 return level == 0 ? 255 : 2047;
2607         else if (INTEL_GEN(dev_priv) >= 7)
2608                 /* IVB/HSW primary/sprite plane watermarks */
2609                 return level == 0 ? 127 : 1023;
2610         else if (!is_sprite)
2611                 /* ILK/SNB primary plane watermarks */
2612                 return level == 0 ? 127 : 511;
2613         else
2614                 /* ILK/SNB sprite plane watermarks */
2615                 return level == 0 ? 63 : 255;
2616 }
2617
2618 static unsigned int
2619 ilk_cursor_wm_reg_max(const struct drm_i915_private *dev_priv, int level)
2620 {
2621         if (INTEL_GEN(dev_priv) >= 7)
2622                 return level == 0 ? 63 : 255;
2623         else
2624                 return level == 0 ? 31 : 63;
2625 }
2626
2627 static unsigned int ilk_fbc_wm_reg_max(const struct drm_i915_private *dev_priv)
2628 {
2629         if (INTEL_GEN(dev_priv) >= 8)
2630                 return 31;
2631         else
2632                 return 15;
2633 }
2634
2635 /* Calculate the maximum primary/sprite plane watermark */
2636 static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
2637                                      int level,
2638                                      const struct intel_wm_config *config,
2639                                      enum intel_ddb_partitioning ddb_partitioning,
2640                                      bool is_sprite)
2641 {
2642         struct drm_i915_private *dev_priv = to_i915(dev);
2643         unsigned int fifo_size = ilk_display_fifo_size(dev_priv);
2644
2645         /* if sprites aren't enabled, sprites get nothing */
2646         if (is_sprite && !config->sprites_enabled)
2647                 return 0;
2648
2649         /* HSW allows LP1+ watermarks even with multiple pipes */
2650         if (level == 0 || config->num_pipes_active > 1) {
2651                 fifo_size /= INTEL_INFO(dev_priv)->num_pipes;
2652
2653                 /*
2654                  * For some reason the non self refresh
2655                  * FIFO size is only half of the self
2656                  * refresh FIFO size on ILK/SNB.
2657                  */
2658                 if (INTEL_GEN(dev_priv) <= 6)
2659                         fifo_size /= 2;
2660         }
2661
2662         if (config->sprites_enabled) {
2663                 /* level 0 is always calculated with 1:1 split */
2664                 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
2665                         if (is_sprite)
2666                                 fifo_size *= 5;
2667                         fifo_size /= 6;
2668                 } else {
2669                         fifo_size /= 2;
2670                 }
2671         }
2672
2673         /* clamp to max that the registers can hold */
2674         return min(fifo_size, ilk_plane_wm_reg_max(dev_priv, level, is_sprite));
2675 }
2676
2677 /* Calculate the maximum cursor plane watermark */
2678 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
2679                                       int level,
2680                                       const struct intel_wm_config *config)
2681 {
2682         /* HSW LP1+ watermarks w/ multiple pipes */
2683         if (level > 0 && config->num_pipes_active > 1)
2684                 return 64;
2685
2686         /* otherwise just report max that registers can hold */
2687         return ilk_cursor_wm_reg_max(to_i915(dev), level);
2688 }
2689
2690 static void ilk_compute_wm_maximums(const struct drm_device *dev,
2691                                     int level,
2692                                     const struct intel_wm_config *config,
2693                                     enum intel_ddb_partitioning ddb_partitioning,
2694                                     struct ilk_wm_maximums *max)
2695 {
2696         max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
2697         max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
2698         max->cur = ilk_cursor_wm_max(dev, level, config);
2699         max->fbc = ilk_fbc_wm_reg_max(to_i915(dev));
2700 }
2701
2702 static void ilk_compute_wm_reg_maximums(const struct drm_i915_private *dev_priv,
2703                                         int level,
2704                                         struct ilk_wm_maximums *max)
2705 {
2706         max->pri = ilk_plane_wm_reg_max(dev_priv, level, false);
2707         max->spr = ilk_plane_wm_reg_max(dev_priv, level, true);
2708         max->cur = ilk_cursor_wm_reg_max(dev_priv, level);
2709         max->fbc = ilk_fbc_wm_reg_max(dev_priv);
2710 }
2711
2712 static bool ilk_validate_wm_level(int level,
2713                                   const struct ilk_wm_maximums *max,
2714                                   struct intel_wm_level *result)
2715 {
2716         bool ret;
2717
2718         /* already determined to be invalid? */
2719         if (!result->enable)
2720                 return false;
2721
2722         result->enable = result->pri_val <= max->pri &&
2723                          result->spr_val <= max->spr &&
2724                          result->cur_val <= max->cur;
2725
2726         ret = result->enable;
2727
2728         /*
2729          * HACK until we can pre-compute everything,
2730          * and thus fail gracefully if LP0 watermarks
2731          * are exceeded...
2732          */
2733         if (level == 0 && !result->enable) {
2734                 if (result->pri_val > max->pri)
2735                         DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
2736                                       level, result->pri_val, max->pri);
2737                 if (result->spr_val > max->spr)
2738                         DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
2739                                       level, result->spr_val, max->spr);
2740                 if (result->cur_val > max->cur)
2741                         DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
2742                                       level, result->cur_val, max->cur);
2743
2744                 result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
2745                 result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
2746                 result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
2747                 result->enable = true;
2748         }
2749
2750         return ret;
2751 }
2752
2753 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
2754                                  const struct intel_crtc *intel_crtc,
2755                                  int level,
2756                                  struct intel_crtc_state *cstate,
2757                                  const struct intel_plane_state *pristate,
2758                                  const struct intel_plane_state *sprstate,
2759                                  const struct intel_plane_state *curstate,
2760                                  struct intel_wm_level *result)
2761 {
2762         uint16_t pri_latency = dev_priv->wm.pri_latency[level];
2763         uint16_t spr_latency = dev_priv->wm.spr_latency[level];
2764         uint16_t cur_latency = dev_priv->wm.cur_latency[level];
2765
2766         /* WM1+ latency values stored in 0.5us units */
2767         if (level > 0) {
2768                 pri_latency *= 5;
2769                 spr_latency *= 5;
2770                 cur_latency *= 5;
2771         }
2772
2773         if (pristate) {
2774                 result->pri_val = ilk_compute_pri_wm(cstate, pristate,
2775                                                      pri_latency, level);
2776                 result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val);
2777         }
2778
2779         if (sprstate)
2780                 result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency);
2781
2782         if (curstate)
2783                 result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency);
2784
2785         result->enable = true;
2786 }
2787
2788 static uint32_t
2789 hsw_compute_linetime_wm(const struct intel_crtc_state *cstate)
2790 {
2791         const struct intel_atomic_state *intel_state =
2792                 to_intel_atomic_state(cstate->base.state);
2793         const struct drm_display_mode *adjusted_mode =
2794                 &cstate->base.adjusted_mode;
2795         u32 linetime, ips_linetime;
2796
2797         if (!cstate->base.active)
2798                 return 0;
2799         if (WARN_ON(adjusted_mode->crtc_clock == 0))
2800                 return 0;
2801         if (WARN_ON(intel_state->cdclk.logical.cdclk == 0))
2802                 return 0;
2803
2804         /* The WM are computed with base on how long it takes to fill a single
2805          * row at the given clock rate, multiplied by 8.
2806          * */
2807         linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2808                                      adjusted_mode->crtc_clock);
2809         ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2810                                          intel_state->cdclk.logical.cdclk);
2811
2812         return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2813                PIPE_WM_LINETIME_TIME(linetime);
2814 }
2815
2816 static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
2817                                   uint16_t wm[8])
2818 {
2819         if (INTEL_GEN(dev_priv) >= 9) {
2820                 uint32_t val;
2821                 int ret, i;
2822                 int level, max_level = ilk_wm_max_level(dev_priv);
2823
2824                 /* read the first set of memory latencies[0:3] */
2825                 val = 0; /* data0 to be programmed to 0 for first set */
2826                 mutex_lock(&dev_priv->pcu_lock);
2827                 ret = sandybridge_pcode_read(dev_priv,
2828                                              GEN9_PCODE_READ_MEM_LATENCY,
2829                                              &val);
2830                 mutex_unlock(&dev_priv->pcu_lock);
2831
2832                 if (ret) {
2833                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2834                         return;
2835                 }
2836
2837                 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2838                 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2839                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2840                 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2841                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2842                 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2843                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2844
2845                 /* read the second set of memory latencies[4:7] */
2846                 val = 1; /* data0 to be programmed to 1 for second set */
2847                 mutex_lock(&dev_priv->pcu_lock);
2848                 ret = sandybridge_pcode_read(dev_priv,
2849                                              GEN9_PCODE_READ_MEM_LATENCY,
2850                                              &val);
2851                 mutex_unlock(&dev_priv->pcu_lock);
2852                 if (ret) {
2853                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2854                         return;
2855                 }
2856
2857                 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2858                 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2859                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2860                 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2861                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2862                 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2863                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2864
2865                 /*
2866                  * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2867                  * need to be disabled. We make sure to sanitize the values out
2868                  * of the punit to satisfy this requirement.
2869                  */
2870                 for (level = 1; level <= max_level; level++) {
2871                         if (wm[level] == 0) {
2872                                 for (i = level + 1; i <= max_level; i++)
2873                                         wm[i] = 0;
2874                                 break;
2875                         }
2876                 }
2877
2878                 /*
2879                  * WaWmMemoryReadLatency:skl+,glk
2880                  *
2881                  * punit doesn't take into account the read latency so we need
2882                  * to add 2us to the various latency levels we retrieve from the
2883                  * punit when level 0 response data us 0us.
2884                  */
2885                 if (wm[0] == 0) {
2886                         wm[0] += 2;
2887                         for (level = 1; level <= max_level; level++) {
2888                                 if (wm[level] == 0)
2889                                         break;
2890                                 wm[level] += 2;
2891                         }
2892                 }
2893
2894         } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
2895                 uint64_t sskpd = I915_READ64(MCH_SSKPD);
2896
2897                 wm[0] = (sskpd >> 56) & 0xFF;
2898                 if (wm[0] == 0)
2899                         wm[0] = sskpd & 0xF;
2900                 wm[1] = (sskpd >> 4) & 0xFF;
2901                 wm[2] = (sskpd >> 12) & 0xFF;
2902                 wm[3] = (sskpd >> 20) & 0x1FF;
2903                 wm[4] = (sskpd >> 32) & 0x1FF;
2904         } else if (INTEL_GEN(dev_priv) >= 6) {
2905                 uint32_t sskpd = I915_READ(MCH_SSKPD);
2906
2907                 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2908                 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2909                 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2910                 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2911         } else if (INTEL_GEN(dev_priv) >= 5) {
2912                 uint32_t mltr = I915_READ(MLTR_ILK);
2913
2914                 /* ILK primary LP0 latency is 700 ns */
2915                 wm[0] = 7;
2916                 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2917                 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2918         } else {
2919                 MISSING_CASE(INTEL_DEVID(dev_priv));
2920         }
2921 }
2922
2923 static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv,
2924                                        uint16_t wm[5])
2925 {
2926         /* ILK sprite LP0 latency is 1300 ns */
2927         if (IS_GEN5(dev_priv))
2928                 wm[0] = 13;
2929 }
2930
2931 static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv,
2932                                        uint16_t wm[5])
2933 {
2934         /* ILK cursor LP0 latency is 1300 ns */
2935         if (IS_GEN5(dev_priv))
2936                 wm[0] = 13;
2937 }
2938
2939 int ilk_wm_max_level(const struct drm_i915_private *dev_priv)
2940 {
2941         /* how many WM levels are we expecting */
2942         if (INTEL_GEN(dev_priv) >= 9)
2943                 return 7;
2944         else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
2945                 return 4;
2946         else if (INTEL_GEN(dev_priv) >= 6)
2947                 return 3;
2948         else
2949                 return 2;
2950 }
2951
2952 static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
2953                                    const char *name,
2954                                    const uint16_t wm[])
2955 {
2956         int level, max_level = ilk_wm_max_level(dev_priv);
2957
2958         for (level = 0; level <= max_level; level++) {
2959                 unsigned int latency = wm[level];
2960
2961                 if (latency == 0) {
2962                         DRM_DEBUG_KMS("%s WM%d latency not provided\n",
2963                                       name, level);
2964                         continue;
2965                 }
2966
2967                 /*
2968                  * - latencies are in us on gen9.
2969                  * - before then, WM1+ latency values are in 0.5us units
2970                  */
2971                 if (INTEL_GEN(dev_priv) >= 9)
2972                         latency *= 10;
2973                 else if (level > 0)
2974                         latency *= 5;
2975
2976                 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2977                               name, level, wm[level],
2978                               latency / 10, latency % 10);
2979         }
2980 }
2981
2982 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2983                                     uint16_t wm[5], uint16_t min)
2984 {
2985         int level, max_level = ilk_wm_max_level(dev_priv);
2986
2987         if (wm[0] >= min)
2988                 return false;
2989
2990         wm[0] = max(wm[0], min);
2991         for (level = 1; level <= max_level; level++)
2992                 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5));
2993
2994         return true;
2995 }
2996
2997 static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
2998 {
2999         bool changed;
3000
3001         /*
3002          * The BIOS provided WM memory latency values are often
3003          * inadequate for high resolution displays. Adjust them.
3004          */
3005         changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12);
3006         changed |= ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12);
3007         changed |= ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
3008
3009         if (!changed)
3010                 return;
3011
3012         DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
3013         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3014         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3015         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3016 }
3017
3018 static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv)
3019 {
3020         /*
3021          * On some SNB machines (Thinkpad X220 Tablet at least)
3022          * LP3 usage can cause vblank interrupts to be lost.
3023          * The DEIIR bit will go high but it looks like the CPU
3024          * never gets interrupted.
3025          *
3026          * It's not clear whether other interrupt source could
3027          * be affected or if this is somehow limited to vblank
3028          * interrupts only. To play it safe we disable LP3
3029          * watermarks entirely.
3030          */
3031         if (dev_priv->wm.pri_latency[3] == 0 &&
3032             dev_priv->wm.spr_latency[3] == 0 &&
3033             dev_priv->wm.cur_latency[3] == 0)
3034                 return;
3035
3036         dev_priv->wm.pri_latency[3] = 0;
3037         dev_priv->wm.spr_latency[3] = 0;
3038         dev_priv->wm.cur_latency[3] = 0;
3039
3040         DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n");
3041         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3042         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3043         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3044 }
3045
3046 static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv)
3047 {
3048         intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
3049
3050         memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
3051                sizeof(dev_priv->wm.pri_latency));
3052         memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
3053                sizeof(dev_priv->wm.pri_latency));
3054
3055         intel_fixup_spr_wm_latency(dev_priv, dev_priv->wm.spr_latency);
3056         intel_fixup_cur_wm_latency(dev_priv, dev_priv->wm.cur_latency);
3057
3058         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3059         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3060         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3061
3062         if (IS_GEN6(dev_priv)) {
3063                 snb_wm_latency_quirk(dev_priv);
3064                 snb_wm_lp3_irq_quirk(dev_priv);
3065         }
3066 }
3067
3068 static void skl_setup_wm_latency(struct drm_i915_private *dev_priv)
3069 {
3070         intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency);
3071         intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency);
3072 }
3073
3074 static bool ilk_validate_pipe_wm(struct drm_device *dev,
3075                                  struct intel_pipe_wm *pipe_wm)
3076 {
3077         /* LP0 watermark maximums depend on this pipe alone */
3078         const struct intel_wm_config config = {
3079                 .num_pipes_active = 1,
3080                 .sprites_enabled = pipe_wm->sprites_enabled,
3081                 .sprites_scaled = pipe_wm->sprites_scaled,
3082         };
3083         struct ilk_wm_maximums max;
3084
3085         /* LP0 watermarks always use 1/2 DDB partitioning */
3086         ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
3087
3088         /* At least LP0 must be valid */
3089         if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
3090                 DRM_DEBUG_KMS("LP0 watermark invalid\n");
3091                 return false;
3092         }
3093
3094         return true;
3095 }
3096
3097 /* Compute new watermarks for the pipe */
3098 static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate)
3099 {
3100         struct drm_atomic_state *state = cstate->base.state;
3101         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
3102         struct intel_pipe_wm *pipe_wm;
3103         struct drm_device *dev = state->dev;
3104         const struct drm_i915_private *dev_priv = to_i915(dev);
3105         struct drm_plane *plane;
3106         const struct drm_plane_state *plane_state;
3107         const struct intel_plane_state *pristate = NULL;
3108         const struct intel_plane_state *sprstate = NULL;
3109         const struct intel_plane_state *curstate = NULL;
3110         int level, max_level = ilk_wm_max_level(dev_priv), usable_level;
3111         struct ilk_wm_maximums max;
3112
3113         pipe_wm = &cstate->wm.ilk.optimal;
3114
3115         drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &cstate->base) {
3116                 const struct intel_plane_state *ps = to_intel_plane_state(plane_state);
3117
3118                 if (plane->type == DRM_PLANE_TYPE_PRIMARY)
3119                         pristate = ps;
3120                 else if (plane->type == DRM_PLANE_TYPE_OVERLAY)
3121                         sprstate = ps;
3122                 else if (plane->type == DRM_PLANE_TYPE_CURSOR)
3123                         curstate = ps;
3124         }
3125
3126         pipe_wm->pipe_enabled = cstate->base.active;
3127         if (sprstate) {
3128                 pipe_wm->sprites_enabled = sprstate->base.visible;
3129                 pipe_wm->sprites_scaled = sprstate->base.visible &&
3130                         (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 ||
3131                          drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16);
3132         }
3133
3134         usable_level = max_level;
3135
3136         /* ILK/SNB: LP2+ watermarks only w/o sprites */
3137         if (INTEL_GEN(dev_priv) <= 6 && pipe_wm->sprites_enabled)
3138                 usable_level = 1;
3139
3140         /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
3141         if (pipe_wm->sprites_scaled)
3142                 usable_level = 0;
3143
3144         memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm));
3145         ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate,
3146                              pristate, sprstate, curstate, &pipe_wm->wm[0]);
3147
3148         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3149                 pipe_wm->linetime = hsw_compute_linetime_wm(cstate);
3150
3151         if (!ilk_validate_pipe_wm(dev, pipe_wm))
3152                 return -EINVAL;
3153
3154         ilk_compute_wm_reg_maximums(dev_priv, 1, &max);
3155
3156         for (level = 1; level <= usable_level; level++) {
3157                 struct intel_wm_level *wm = &pipe_wm->wm[level];
3158
3159                 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate,
3160                                      pristate, sprstate, curstate, wm);
3161
3162                 /*
3163                  * Disable any watermark level that exceeds the
3164                  * register maximums since such watermarks are
3165                  * always invalid.
3166                  */
3167                 if (!ilk_validate_wm_level(level, &max, wm)) {
3168                         memset(wm, 0, sizeof(*wm));
3169                         break;
3170                 }
3171         }
3172
3173         return 0;
3174 }
3175
3176 /*
3177  * Build a set of 'intermediate' watermark values that satisfy both the old
3178  * state and the new state.  These can be programmed to the hardware
3179  * immediately.
3180  */
3181 static int ilk_compute_intermediate_wm(struct drm_device *dev,
3182                                        struct intel_crtc *intel_crtc,
3183                                        struct intel_crtc_state *newstate)
3184 {
3185         struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
3186         struct intel_atomic_state *intel_state =
3187                 to_intel_atomic_state(newstate->base.state);
3188         const struct intel_crtc_state *oldstate =
3189                 intel_atomic_get_old_crtc_state(intel_state, intel_crtc);
3190         const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal;
3191         int level, max_level = ilk_wm_max_level(to_i915(dev));
3192
3193         /*
3194          * Start with the final, target watermarks, then combine with the
3195          * currently active watermarks to get values that are safe both before
3196          * and after the vblank.
3197          */
3198         *a = newstate->wm.ilk.optimal;
3199         if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base))
3200                 return 0;
3201
3202         a->pipe_enabled |= b->pipe_enabled;
3203         a->sprites_enabled |= b->sprites_enabled;
3204         a->sprites_scaled |= b->sprites_scaled;
3205
3206         for (level = 0; level <= max_level; level++) {
3207                 struct intel_wm_level *a_wm = &a->wm[level];
3208                 const struct intel_wm_level *b_wm = &b->wm[level];
3209
3210                 a_wm->enable &= b_wm->enable;
3211                 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val);
3212                 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val);
3213                 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val);
3214                 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val);
3215         }
3216
3217         /*
3218          * We need to make sure that these merged watermark values are
3219          * actually a valid configuration themselves.  If they're not,
3220          * there's no safe way to transition from the old state to
3221          * the new state, so we need to fail the atomic transaction.
3222          */
3223         if (!ilk_validate_pipe_wm(dev, a))
3224                 return -EINVAL;
3225
3226         /*
3227          * If our intermediate WM are identical to the final WM, then we can
3228          * omit the post-vblank programming; only update if it's different.
3229          */
3230         if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0)
3231                 newstate->wm.need_postvbl_update = true;
3232
3233         return 0;
3234 }
3235
3236 /*
3237  * Merge the watermarks from all active pipes for a specific level.
3238  */
3239 static void ilk_merge_wm_level(struct drm_device *dev,
3240                                int level,
3241                                struct intel_wm_level *ret_wm)
3242 {
3243         const struct intel_crtc *intel_crtc;
3244
3245         ret_wm->enable = true;
3246
3247         for_each_intel_crtc(dev, intel_crtc) {
3248                 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk;
3249                 const struct intel_wm_level *wm = &active->wm[level];
3250
3251                 if (!active->pipe_enabled)
3252                         continue;
3253
3254                 /*
3255                  * The watermark values may have been used in the past,
3256                  * so we must maintain them in the registers for some
3257                  * time even if the level is now disabled.
3258                  */
3259                 if (!wm->enable)
3260                         ret_wm->enable = false;
3261
3262                 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
3263                 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
3264                 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
3265                 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
3266         }
3267 }
3268
3269 /*
3270  * Merge all low power watermarks for all active pipes.
3271  */
3272 static void ilk_wm_merge(struct drm_device *dev,
3273                          const struct intel_wm_config *config,
3274                          const struct ilk_wm_maximums *max,
3275                          struct intel_pipe_wm *merged)
3276 {
3277         struct drm_i915_private *dev_priv = to_i915(dev);
3278         int level, max_level = ilk_wm_max_level(dev_priv);
3279         int last_enabled_level = max_level;
3280
3281         /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
3282         if ((INTEL_GEN(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) &&
3283             config->num_pipes_active > 1)
3284                 last_enabled_level = 0;
3285
3286         /* ILK: FBC WM must be disabled always */
3287         merged->fbc_wm_enabled = INTEL_GEN(dev_priv) >= 6;
3288
3289         /* merge each WM1+ level */
3290         for (level = 1; level <= max_level; level++) {
3291                 struct intel_wm_level *wm = &merged->wm[level];
3292
3293                 ilk_merge_wm_level(dev, level, wm);
3294
3295                 if (level > last_enabled_level)
3296                         wm->enable = false;
3297                 else if (!ilk_validate_wm_level(level, max, wm))
3298                         /* make sure all following levels get disabled */
3299                         last_enabled_level = level - 1;
3300
3301                 /*
3302                  * The spec says it is preferred to disable
3303                  * FBC WMs instead of disabling a WM level.
3304                  */
3305                 if (wm->fbc_val > max->fbc) {
3306                         if (wm->enable)
3307                                 merged->fbc_wm_enabled = false;
3308                         wm->fbc_val = 0;
3309                 }
3310         }
3311
3312         /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
3313         /*
3314          * FIXME this is racy. FBC might get enabled later.
3315          * What we should check here is whether FBC can be
3316          * enabled sometime later.
3317          */
3318         if (IS_GEN5(dev_priv) && !merged->fbc_wm_enabled &&
3319             intel_fbc_is_active(dev_priv)) {
3320                 for (level = 2; level <= max_level; level++) {
3321                         struct intel_wm_level *wm = &merged->wm[level];
3322
3323                         wm->enable = false;
3324                 }
3325         }
3326 }
3327
3328 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
3329 {
3330         /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
3331         return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
3332 }
3333
3334 /* The value we need to program into the WM_LPx latency field */
3335 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level)
3336 {
3337         struct drm_i915_private *dev_priv = to_i915(dev);
3338
3339         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3340                 return 2 * level;
3341         else
3342                 return dev_priv->wm.pri_latency[level];
3343 }
3344
3345 static void ilk_compute_wm_results(struct drm_device *dev,
3346                                    const struct intel_pipe_wm *merged,
3347                                    enum intel_ddb_partitioning partitioning,
3348                                    struct ilk_wm_values *results)
3349 {
3350         struct drm_i915_private *dev_priv = to_i915(dev);
3351         struct intel_crtc *intel_crtc;
3352         int level, wm_lp;
3353
3354         results->enable_fbc_wm = merged->fbc_wm_enabled;
3355         results->partitioning = partitioning;
3356
3357         /* LP1+ register values */
3358         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3359                 const struct intel_wm_level *r;
3360
3361                 level = ilk_wm_lp_to_level(wm_lp, merged);
3362
3363                 r = &merged->wm[level];
3364
3365                 /*
3366                  * Maintain the watermark values even if the level is
3367                  * disabled. Doing otherwise could cause underruns.
3368                  */
3369                 results->wm_lp[wm_lp - 1] =
3370                         (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) |
3371                         (r->pri_val << WM1_LP_SR_SHIFT) |
3372                         r->cur_val;
3373
3374                 if (r->enable)
3375                         results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
3376
3377                 if (INTEL_GEN(dev_priv) >= 8)
3378                         results->wm_lp[wm_lp - 1] |=
3379                                 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
3380                 else
3381                         results->wm_lp[wm_lp - 1] |=
3382                                 r->fbc_val << WM1_LP_FBC_SHIFT;
3383
3384                 /*
3385                  * Always set WM1S_LP_EN when spr_val != 0, even if the
3386                  * level is disabled. Doing otherwise could cause underruns.
3387                  */
3388                 if (INTEL_GEN(dev_priv) <= 6 && r->spr_val) {
3389                         WARN_ON(wm_lp != 1);
3390                         results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
3391                 } else
3392                         results->wm_lp_spr[wm_lp - 1] = r->spr_val;
3393         }
3394
3395         /* LP0 register values */
3396         for_each_intel_crtc(dev, intel_crtc) {
3397                 enum pipe pipe = intel_crtc->pipe;
3398                 const struct intel_wm_level *r =
3399                         &intel_crtc->wm.active.ilk.wm[0];
3400
3401                 if (WARN_ON(!r->enable))
3402                         continue;
3403
3404                 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime;
3405
3406                 results->wm_pipe[pipe] =
3407                         (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
3408                         (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
3409                         r->cur_val;
3410         }
3411 }
3412
3413 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
3414  * case both are at the same level. Prefer r1 in case they're the same. */
3415 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev,
3416                                                   struct intel_pipe_wm *r1,
3417                                                   struct intel_pipe_wm *r2)
3418 {
3419         int level, max_level = ilk_wm_max_level(to_i915(dev));
3420         int level1 = 0, level2 = 0;
3421
3422         for (level = 1; level <= max_level; level++) {
3423                 if (r1->wm[level].enable)
3424                         level1 = level;
3425                 if (r2->wm[level].enable)
3426                         level2 = level;
3427         }
3428
3429         if (level1 == level2) {
3430                 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
3431                         return r2;
3432                 else
3433                         return r1;
3434         } else if (level1 > level2) {
3435                 return r1;
3436         } else {
3437                 return r2;
3438         }
3439 }
3440
3441 /* dirty bits used to track which watermarks need changes */
3442 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
3443 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
3444 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
3445 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
3446 #define WM_DIRTY_FBC (1 << 24)
3447 #define WM_DIRTY_DDB (1 << 25)
3448
3449 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
3450                                          const struct ilk_wm_values *old,
3451                                          const struct ilk_wm_values *new)
3452 {
3453         unsigned int dirty = 0;
3454         enum pipe pipe;
3455         int wm_lp;
3456
3457         for_each_pipe(dev_priv, pipe) {
3458                 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
3459                         dirty |= WM_DIRTY_LINETIME(pipe);
3460                         /* Must disable LP1+ watermarks too */
3461                         dirty |= WM_DIRTY_LP_ALL;
3462                 }
3463
3464                 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
3465                         dirty |= WM_DIRTY_PIPE(pipe);
3466                         /* Must disable LP1+ watermarks too */
3467                         dirty |= WM_DIRTY_LP_ALL;
3468                 }
3469         }
3470
3471         if (old->enable_fbc_wm != new->enable_fbc_wm) {
3472                 dirty |= WM_DIRTY_FBC;
3473                 /* Must disable LP1+ watermarks too */
3474                 dirty |= WM_DIRTY_LP_ALL;
3475         }
3476
3477         if (old->partitioning != new->partitioning) {
3478                 dirty |= WM_DIRTY_DDB;
3479                 /* Must disable LP1+ watermarks too */
3480                 dirty |= WM_DIRTY_LP_ALL;
3481         }
3482
3483         /* LP1+ watermarks already deemed dirty, no need to continue */
3484         if (dirty & WM_DIRTY_LP_ALL)
3485                 return dirty;
3486
3487         /* Find the lowest numbered LP1+ watermark in need of an update... */
3488         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3489                 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
3490                     old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
3491                         break;
3492         }
3493
3494         /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
3495         for (; wm_lp <= 3; wm_lp++)
3496                 dirty |= WM_DIRTY_LP(wm_lp);
3497
3498         return dirty;
3499 }
3500
3501 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
3502                                unsigned int dirty)
3503 {
3504         struct ilk_wm_values *previous = &dev_priv->wm.hw;
3505         bool changed = false;
3506
3507         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
3508                 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
3509                 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
3510                 changed = true;
3511         }
3512         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
3513                 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
3514                 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
3515                 changed = true;
3516         }
3517         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
3518                 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
3519                 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
3520                 changed = true;
3521         }
3522
3523         /*
3524          * Don't touch WM1S_LP_EN here.
3525          * Doing so could cause underruns.
3526          */
3527
3528         return changed;
3529 }
3530
3531 /*
3532  * The spec says we shouldn't write when we don't need, because every write
3533  * causes WMs to be re-evaluated, expending some power.
3534  */
3535 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
3536                                 struct ilk_wm_values *results)
3537 {
3538         struct ilk_wm_values *previous = &dev_priv->wm.hw;
3539         unsigned int dirty;
3540         uint32_t val;
3541
3542         dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
3543         if (!dirty)
3544                 return;
3545
3546         _ilk_disable_lp_wm(dev_priv, dirty);
3547
3548         if (dirty & WM_DIRTY_PIPE(PIPE_A))
3549                 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
3550         if (dirty & WM_DIRTY_PIPE(PIPE_B))
3551                 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
3552         if (dirty & WM_DIRTY_PIPE(PIPE_C))
3553                 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
3554
3555         if (dirty & WM_DIRTY_LINETIME(PIPE_A))
3556                 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
3557         if (dirty & WM_DIRTY_LINETIME(PIPE_B))
3558                 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
3559         if (dirty & WM_DIRTY_LINETIME(PIPE_C))
3560                 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
3561
3562         if (dirty & WM_DIRTY_DDB) {
3563                 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
3564                         val = I915_READ(WM_MISC);
3565                         if (results->partitioning == INTEL_DDB_PART_1_2)
3566                                 val &= ~WM_MISC_DATA_PARTITION_5_6;
3567                         else
3568                                 val |= WM_MISC_DATA_PARTITION_5_6;
3569                         I915_WRITE(WM_MISC, val);
3570                 } else {
3571                         val = I915_READ(DISP_ARB_CTL2);
3572                         if (results->partitioning == INTEL_DDB_PART_1_2)
3573                                 val &= ~DISP_DATA_PARTITION_5_6;
3574                         else
3575                                 val |= DISP_DATA_PARTITION_5_6;
3576                         I915_WRITE(DISP_ARB_CTL2, val);
3577                 }
3578         }
3579
3580         if (dirty & WM_DIRTY_FBC) {
3581                 val = I915_READ(DISP_ARB_CTL);
3582                 if (results->enable_fbc_wm)
3583                         val &= ~DISP_FBC_WM_DIS;
3584                 else
3585                         val |= DISP_FBC_WM_DIS;
3586                 I915_WRITE(DISP_ARB_CTL, val);
3587         }
3588
3589         if (dirty & WM_DIRTY_LP(1) &&
3590             previous->wm_lp_spr[0] != results->wm_lp_spr[0])
3591                 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
3592
3593         if (INTEL_GEN(dev_priv) >= 7) {
3594                 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
3595                         I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
3596                 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
3597                         I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
3598         }
3599
3600         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
3601                 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
3602         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
3603                 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
3604         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
3605                 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
3606
3607         dev_priv->wm.hw = *results;
3608 }
3609
3610 bool ilk_disable_lp_wm(struct drm_device *dev)
3611 {
3612         struct drm_i915_private *dev_priv = to_i915(dev);
3613
3614         return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
3615 }
3616
3617 static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
3618 {
3619         u8 enabled_slices;
3620
3621         /* Slice 1 will always be enabled */
3622         enabled_slices = 1;
3623
3624         /* Gen prior to GEN11 have only one DBuf slice */
3625         if (INTEL_GEN(dev_priv) < 11)
3626                 return enabled_slices;
3627
3628         if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
3629                 enabled_slices++;
3630
3631         return enabled_slices;
3632 }
3633
3634 /*
3635  * FIXME: We still don't have the proper code detect if we need to apply the WA,
3636  * so assume we'll always need it in order to avoid underruns.
3637  */
3638 static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
3639 {
3640         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
3641
3642         if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv))
3643                 return true;
3644
3645         return false;
3646 }
3647
3648 static bool
3649 intel_has_sagv(struct drm_i915_private *dev_priv)
3650 {
3651         if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
3652             IS_CANNONLAKE(dev_priv))
3653                 return true;
3654
3655         if (IS_SKYLAKE(dev_priv) &&
3656             dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED)
3657                 return true;
3658
3659         return false;
3660 }
3661
3662 /*
3663  * SAGV dynamically adjusts the system agent voltage and clock frequencies
3664  * depending on power and performance requirements. The display engine access
3665  * to system memory is blocked during the adjustment time. Because of the
3666  * blocking time, having this enabled can cause full system hangs and/or pipe
3667  * underruns if we don't meet all of the following requirements:
3668  *
3669  *  - <= 1 pipe enabled
3670  *  - All planes can enable watermarks for latencies >= SAGV engine block time
3671  *  - We're not using an interlaced display configuration
3672  */
3673 int
3674 intel_enable_sagv(struct drm_i915_private *dev_priv)
3675 {
3676         int ret;
3677
3678         if (!intel_has_sagv(dev_priv))
3679                 return 0;
3680
3681         if (dev_priv->sagv_status == I915_SAGV_ENABLED)
3682                 return 0;
3683
3684         DRM_DEBUG_KMS("Enabling the SAGV\n");
3685         mutex_lock(&dev_priv->pcu_lock);
3686
3687         ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3688                                       GEN9_SAGV_ENABLE);
3689
3690         /* We don't need to wait for the SAGV when enabling */
3691         mutex_unlock(&dev_priv->pcu_lock);
3692
3693         /*
3694          * Some skl systems, pre-release machines in particular,
3695          * don't actually have an SAGV.
3696          */
3697         if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3698                 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3699                 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3700                 return 0;
3701         } else if (ret < 0) {
3702                 DRM_ERROR("Failed to enable the SAGV\n");
3703                 return ret;
3704         }
3705
3706         dev_priv->sagv_status = I915_SAGV_ENABLED;
3707         return 0;
3708 }
3709
3710 int
3711 intel_disable_sagv(struct drm_i915_private *dev_priv)
3712 {
3713         int ret;
3714
3715         if (!intel_has_sagv(dev_priv))
3716                 return 0;
3717
3718         if (dev_priv->sagv_status == I915_SAGV_DISABLED)
3719                 return 0;
3720
3721         DRM_DEBUG_KMS("Disabling the SAGV\n");
3722         mutex_lock(&dev_priv->pcu_lock);
3723
3724         /* bspec says to keep retrying for at least 1 ms */
3725         ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3726                                 GEN9_SAGV_DISABLE,
3727                                 GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
3728                                 1);
3729         mutex_unlock(&dev_priv->pcu_lock);
3730
3731         /*
3732          * Some skl systems, pre-release machines in particular,
3733          * don't actually have an SAGV.
3734          */
3735         if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3736                 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3737                 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3738                 return 0;
3739         } else if (ret < 0) {
3740                 DRM_ERROR("Failed to disable the SAGV (%d)\n", ret);
3741                 return ret;
3742         }
3743
3744         dev_priv->sagv_status = I915_SAGV_DISABLED;
3745         return 0;
3746 }
3747
3748 bool intel_can_enable_sagv(struct drm_atomic_state *state)
3749 {
3750         struct drm_device *dev = state->dev;
3751         struct drm_i915_private *dev_priv = to_i915(dev);
3752         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3753         struct intel_crtc *crtc;
3754         struct intel_plane *plane;
3755         struct intel_crtc_state *cstate;
3756         enum pipe pipe;
3757         int level, latency;
3758         int sagv_block_time_us;
3759
3760         if (!intel_has_sagv(dev_priv))
3761                 return false;
3762
3763         if (IS_GEN9(dev_priv))
3764                 sagv_block_time_us = 30;
3765         else if (IS_GEN10(dev_priv))
3766                 sagv_block_time_us = 20;
3767         else
3768                 sagv_block_time_us = 10;
3769
3770         /*
3771          * SKL+ workaround: bspec recommends we disable the SAGV when we have
3772          * more then one pipe enabled
3773          *
3774          * If there are no active CRTCs, no additional checks need be performed
3775          */
3776         if (hweight32(intel_state->active_crtcs) == 0)
3777                 return true;
3778         else if (hweight32(intel_state->active_crtcs) > 1)
3779                 return false;
3780
3781         /* Since we're now guaranteed to only have one active CRTC... */
3782         pipe = ffs(intel_state->active_crtcs) - 1;
3783         crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
3784         cstate = to_intel_crtc_state(crtc->base.state);
3785
3786         if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
3787                 return false;
3788
3789         for_each_intel_plane_on_crtc(dev, crtc, plane) {
3790                 struct skl_plane_wm *wm =
3791                         &cstate->wm.skl.optimal.planes[plane->id];
3792
3793                 /* Skip this plane if it's not enabled */
3794                 if (!wm->wm[0].plane_en)
3795                         continue;
3796
3797                 /* Find the highest enabled wm level for this plane */
3798                 for (level = ilk_wm_max_level(dev_priv);
3799                      !wm->wm[level].plane_en; --level)
3800                      { }
3801
3802                 latency = dev_priv->wm.skl_latency[level];
3803
3804                 if (skl_needs_memory_bw_wa(intel_state) &&
3805                     plane->base.state->fb->modifier ==
3806                     I915_FORMAT_MOD_X_TILED)
3807                         latency += 15;
3808
3809                 /*
3810                  * If any of the planes on this pipe don't enable wm levels that
3811                  * incur memory latencies higher than sagv_block_time_us we
3812                  * can't enable the SAGV.
3813                  */
3814                 if (latency < sagv_block_time_us)
3815                         return false;
3816         }
3817
3818         return true;
3819 }
3820
3821 static unsigned int intel_get_ddb_size(struct drm_i915_private *dev_priv,
3822                                        const struct intel_crtc_state *cstate,
3823                                        const unsigned int total_data_rate,
3824                                        const int num_active,
3825                                        struct skl_ddb_allocation *ddb)
3826 {
3827         const struct drm_display_mode *adjusted_mode;
3828         u64 total_data_bw;
3829         u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
3830
3831         WARN_ON(ddb_size == 0);
3832
3833         if (INTEL_GEN(dev_priv) < 11)
3834                 return ddb_size - 4; /* 4 blocks for bypass path allocation */
3835
3836         adjusted_mode = &cstate->base.adjusted_mode;
3837         total_data_bw = (u64)total_data_rate * drm_mode_vrefresh(adjusted_mode);
3838
3839         /*
3840          * 12GB/s is maximum BW supported by single DBuf slice.
3841          */
3842         if (total_data_bw >= GBps(12) || num_active > 1) {
3843                 ddb->enabled_slices = 2;
3844         } else {
3845                 ddb->enabled_slices = 1;
3846                 ddb_size /= 2;
3847         }
3848
3849         return ddb_size;
3850 }
3851
3852 static void
3853 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
3854                                    const struct intel_crtc_state *cstate,
3855                                    const unsigned int total_data_rate,
3856                                    struct skl_ddb_allocation *ddb,
3857                                    struct skl_ddb_entry *alloc, /* out */
3858                                    int *num_active /* out */)
3859 {
3860         struct drm_atomic_state *state = cstate->base.state;
3861         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3862         struct drm_i915_private *dev_priv = to_i915(dev);
3863         struct drm_crtc *for_crtc = cstate->base.crtc;
3864         unsigned int pipe_size, ddb_size;
3865         int nth_active_pipe;
3866
3867         if (WARN_ON(!state) || !cstate->base.active) {
3868                 alloc->start = 0;
3869                 alloc->end = 0;
3870                 *num_active = hweight32(dev_priv->active_crtcs);
3871                 return;
3872         }
3873
3874         if (intel_state->active_pipe_changes)
3875                 *num_active = hweight32(intel_state->active_crtcs);
3876         else
3877                 *num_active = hweight32(dev_priv->active_crtcs);
3878
3879         ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate,
3880                                       *num_active, ddb);
3881
3882         /*
3883          * If the state doesn't change the active CRTC's, then there's
3884          * no need to recalculate; the existing pipe allocation limits
3885          * should remain unchanged.  Note that we're safe from racing
3886          * commits since any racing commit that changes the active CRTC
3887          * list would need to grab _all_ crtc locks, including the one
3888          * we currently hold.
3889          */
3890         if (!intel_state->active_pipe_changes) {
3891                 /*
3892                  * alloc may be cleared by clear_intel_crtc_state,
3893                  * copy from old state to be sure
3894                  */
3895                 *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb;
3896                 return;
3897         }
3898
3899         nth_active_pipe = hweight32(intel_state->active_crtcs &
3900                                     (drm_crtc_mask(for_crtc) - 1));
3901         pipe_size = ddb_size / hweight32(intel_state->active_crtcs);
3902         alloc->start = nth_active_pipe * ddb_size / *num_active;
3903         alloc->end = alloc->start + pipe_size;
3904 }
3905
3906 static unsigned int skl_cursor_allocation(int num_active)
3907 {
3908         if (num_active == 1)
3909                 return 32;
3910
3911         return 8;
3912 }
3913
3914 static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv,
3915                                        struct skl_ddb_entry *entry, u32 reg)
3916 {
3917         u16 mask;
3918
3919         if (INTEL_GEN(dev_priv) >= 11)
3920                 mask = ICL_DDB_ENTRY_MASK;
3921         else
3922                 mask = SKL_DDB_ENTRY_MASK;
3923         entry->start = reg & mask;
3924         entry->end = (reg >> DDB_ENTRY_END_SHIFT) & mask;
3925
3926         if (entry->end)
3927                 entry->end += 1;
3928 }
3929
3930 static void
3931 skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv,
3932                            const enum pipe pipe,
3933                            const enum plane_id plane_id,
3934                            struct skl_ddb_allocation *ddb /* out */)
3935 {
3936         u32 val, val2 = 0;
3937         int fourcc, pixel_format;
3938
3939         /* Cursor doesn't support NV12/planar, so no extra calculation needed */
3940         if (plane_id == PLANE_CURSOR) {
3941                 val = I915_READ(CUR_BUF_CFG(pipe));
3942                 skl_ddb_entry_init_from_hw(dev_priv,
3943                                            &ddb->plane[pipe][plane_id], val);
3944                 return;
3945         }
3946
3947         val = I915_READ(PLANE_CTL(pipe, plane_id));
3948
3949         /* No DDB allocated for disabled planes */
3950         if (!(val & PLANE_CTL_ENABLE))
3951                 return;
3952
3953         pixel_format = val & PLANE_CTL_FORMAT_MASK;
3954         fourcc = skl_format_to_fourcc(pixel_format,
3955                                       val & PLANE_CTL_ORDER_RGBX,
3956                                       val & PLANE_CTL_ALPHA_MASK);
3957
3958         val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
3959         val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id));
3960
3961         if (fourcc == DRM_FORMAT_NV12) {
3962                 skl_ddb_entry_init_from_hw(dev_priv,
3963                                            &ddb->plane[pipe][plane_id], val2);
3964                 skl_ddb_entry_init_from_hw(dev_priv,
3965                                            &ddb->uv_plane[pipe][plane_id], val);
3966         } else {
3967                 skl_ddb_entry_init_from_hw(dev_priv,
3968                                            &ddb->plane[pipe][plane_id], val);
3969         }
3970 }
3971
3972 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
3973                           struct skl_ddb_allocation *ddb /* out */)
3974 {
3975         struct intel_crtc *crtc;
3976
3977         memset(ddb, 0, sizeof(*ddb));
3978
3979         ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv);
3980
3981         for_each_intel_crtc(&dev_priv->drm, crtc) {
3982                 enum intel_display_power_domain power_domain;
3983                 enum plane_id plane_id;
3984                 enum pipe pipe = crtc->pipe;
3985
3986                 power_domain = POWER_DOMAIN_PIPE(pipe);
3987                 if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
3988                         continue;
3989
3990                 for_each_plane_id_on_crtc(crtc, plane_id)
3991                         skl_ddb_get_hw_plane_state(dev_priv, pipe,
3992                                                    plane_id, ddb);
3993
3994                 intel_display_power_put(dev_priv, power_domain);
3995         }
3996 }
3997
3998 /*
3999  * Determines the downscale amount of a plane for the purposes of watermark calculations.
4000  * The bspec defines downscale amount as:
4001  *
4002  * """
4003  * Horizontal down scale amount = maximum[1, Horizontal source size /
4004  *                                           Horizontal destination size]
4005  * Vertical down scale amount = maximum[1, Vertical source size /
4006  *                                         Vertical destination size]
4007  * Total down scale amount = Horizontal down scale amount *
4008  *                           Vertical down scale amount
4009  * """
4010  *
4011  * Return value is provided in 16.16 fixed point form to retain fractional part.
4012  * Caller should take care of dividing & rounding off the value.
4013  */
4014 static uint_fixed_16_16_t
4015 skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
4016                            const struct intel_plane_state *pstate)
4017 {
4018         struct intel_plane *plane = to_intel_plane(pstate->base.plane);
4019         uint32_t src_w, src_h, dst_w, dst_h;
4020         uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4021         uint_fixed_16_16_t downscale_h, downscale_w;
4022
4023         if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
4024                 return u32_to_fixed16(0);
4025
4026         /* n.b., src is 16.16 fixed point, dst is whole integer */
4027         if (plane->id == PLANE_CURSOR) {
4028                 /*
4029                  * Cursors only support 0/180 degree rotation,
4030                  * hence no need to account for rotation here.
4031                  */
4032                 src_w = pstate->base.src_w >> 16;
4033                 src_h = pstate->base.src_h >> 16;
4034                 dst_w = pstate->base.crtc_w;
4035                 dst_h = pstate->base.crtc_h;
4036         } else {
4037                 /*
4038                  * Src coordinates are already rotated by 270 degrees for
4039                  * the 90/270 degree plane rotation cases (to match the
4040                  * GTT mapping), hence no need to account for rotation here.
4041                  */
4042                 src_w = drm_rect_width(&pstate->base.src) >> 16;
4043                 src_h = drm_rect_height(&pstate->base.src) >> 16;
4044                 dst_w = drm_rect_width(&pstate->base.dst);
4045                 dst_h = drm_rect_height(&pstate->base.dst);
4046         }
4047
4048         fp_w_ratio = div_fixed16(src_w, dst_w);
4049         fp_h_ratio = div_fixed16(src_h, dst_h);
4050         downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4051         downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
4052
4053         return mul_fixed16(downscale_w, downscale_h);
4054 }
4055
4056 static uint_fixed_16_16_t
4057 skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
4058 {
4059         uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1);
4060
4061         if (!crtc_state->base.enable)
4062                 return pipe_downscale;
4063
4064         if (crtc_state->pch_pfit.enabled) {
4065                 uint32_t src_w, src_h, dst_w, dst_h;
4066                 uint32_t pfit_size = crtc_state->pch_pfit.size;
4067                 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4068                 uint_fixed_16_16_t downscale_h, downscale_w;
4069
4070                 src_w = crtc_state->pipe_src_w;
4071                 src_h = crtc_state->pipe_src_h;
4072                 dst_w = pfit_size >> 16;
4073                 dst_h = pfit_size & 0xffff;
4074
4075                 if (!dst_w || !dst_h)
4076                         return pipe_downscale;
4077
4078                 fp_w_ratio = div_fixed16(src_w, dst_w);
4079                 fp_h_ratio = div_fixed16(src_h, dst_h);
4080                 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4081                 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
4082
4083                 pipe_downscale = mul_fixed16(downscale_w, downscale_h);
4084         }
4085
4086         return pipe_downscale;
4087 }
4088
4089 int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
4090                                   struct intel_crtc_state *cstate)
4091 {
4092         struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
4093         struct drm_crtc_state *crtc_state = &cstate->base;
4094         struct drm_atomic_state *state = crtc_state->state;
4095         struct drm_plane *plane;
4096         const struct drm_plane_state *pstate;
4097         struct intel_plane_state *intel_pstate;
4098         int crtc_clock, dotclk;
4099         uint32_t pipe_max_pixel_rate;
4100         uint_fixed_16_16_t pipe_downscale;
4101         uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
4102
4103         if (!cstate->base.enable)
4104                 return 0;
4105
4106         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
4107                 uint_fixed_16_16_t plane_downscale;
4108                 uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8);
4109                 int bpp;
4110
4111                 if (!intel_wm_plane_visible(cstate,
4112                                             to_intel_plane_state(pstate)))
4113                         continue;
4114
4115                 if (WARN_ON(!pstate->fb))
4116                         return -EINVAL;
4117
4118                 intel_pstate = to_intel_plane_state(pstate);
4119                 plane_downscale = skl_plane_downscale_amount(cstate,
4120                                                              intel_pstate);
4121                 bpp = pstate->fb->format->cpp[0] * 8;
4122                 if (bpp == 64)
4123                         plane_downscale = mul_fixed16(plane_downscale,
4124                                                       fp_9_div_8);
4125
4126                 max_downscale = max_fixed16(plane_downscale, max_downscale);
4127         }
4128         pipe_downscale = skl_pipe_downscale_amount(cstate);
4129
4130         pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
4131
4132         crtc_clock = crtc_state->adjusted_mode.crtc_clock;
4133         dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk;
4134
4135         if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10)
4136                 dotclk *= 2;
4137
4138         pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale);
4139
4140         if (pipe_max_pixel_rate < crtc_clock) {
4141                 DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n");
4142                 return -EINVAL;
4143         }
4144
4145         return 0;
4146 }
4147
4148 static unsigned int
4149 skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
4150                              const struct drm_plane_state *pstate,
4151                              const int plane)
4152 {
4153         struct intel_plane *intel_plane = to_intel_plane(pstate->plane);
4154         struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
4155         uint32_t data_rate;
4156         uint32_t width = 0, height = 0;
4157         struct drm_framebuffer *fb;
4158         u32 format;
4159         uint_fixed_16_16_t down_scale_amount;
4160
4161         if (!intel_pstate->base.visible)
4162                 return 0;
4163
4164         fb = pstate->fb;
4165         format = fb->format->format;
4166
4167         if (intel_plane->id == PLANE_CURSOR)
4168                 return 0;
4169         if (plane == 1 && format != DRM_FORMAT_NV12)
4170                 return 0;
4171
4172         /*
4173          * Src coordinates are already rotated by 270 degrees for
4174          * the 90/270 degree plane rotation cases (to match the
4175          * GTT mapping), hence no need to account for rotation here.
4176          */
4177         width = drm_rect_width(&intel_pstate->base.src) >> 16;
4178         height = drm_rect_height(&intel_pstate->base.src) >> 16;
4179
4180         /* UV plane does 1/2 pixel sub-sampling */
4181         if (plane == 1 && format == DRM_FORMAT_NV12) {
4182                 width /= 2;
4183                 height /= 2;
4184         }
4185
4186         data_rate = width * height * fb->format->cpp[plane];
4187
4188         down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
4189
4190         return mul_round_up_u32_fixed16(data_rate, down_scale_amount);
4191 }
4192
4193 /*
4194  * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching
4195  * a 8192x4096@32bpp framebuffer:
4196  *   3 * 4096 * 8192  * 4 < 2^32
4197  */
4198 static unsigned int
4199 skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
4200                                  unsigned int *plane_data_rate,
4201                                  unsigned int *uv_plane_data_rate)
4202 {
4203         struct drm_crtc_state *cstate = &intel_cstate->base;
4204         struct drm_atomic_state *state = cstate->state;
4205         struct drm_plane *plane;
4206         const struct drm_plane_state *pstate;
4207         unsigned int total_data_rate = 0;
4208
4209         if (WARN_ON(!state))
4210                 return 0;
4211
4212         /* Calculate and cache data rate for each plane */
4213         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
4214                 enum plane_id plane_id = to_intel_plane(plane)->id;
4215                 unsigned int rate;
4216
4217                 /* packed/y */
4218                 rate = skl_plane_relative_data_rate(intel_cstate,
4219                                                     pstate, 0);
4220                 plane_data_rate[plane_id] = rate;
4221
4222                 total_data_rate += rate;
4223
4224                 /* uv-plane */
4225                 rate = skl_plane_relative_data_rate(intel_cstate,
4226                                                     pstate, 1);
4227                 uv_plane_data_rate[plane_id] = rate;
4228
4229                 total_data_rate += rate;
4230         }
4231
4232         return total_data_rate;
4233 }
4234
4235 static uint16_t
4236 skl_ddb_min_alloc(const struct drm_plane_state *pstate, const int plane)
4237 {
4238         struct drm_framebuffer *fb = pstate->fb;
4239         struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
4240         uint32_t src_w, src_h;
4241         uint32_t min_scanlines = 8;
4242         uint8_t plane_bpp;
4243
4244         if (WARN_ON(!fb))
4245                 return 0;
4246
4247         /* For packed formats, and uv-plane, return 0 */
4248         if (plane == 1 && fb->format->format != DRM_FORMAT_NV12)
4249                 return 0;
4250
4251         /* For Non Y-tile return 8-blocks */
4252         if (fb->modifier != I915_FORMAT_MOD_Y_TILED &&
4253             fb->modifier != I915_FORMAT_MOD_Yf_TILED &&
4254             fb->modifier != I915_FORMAT_MOD_Y_TILED_CCS &&
4255             fb->modifier != I915_FORMAT_MOD_Yf_TILED_CCS)
4256                 return 8;
4257
4258         /*
4259          * Src coordinates are already rotated by 270 degrees for
4260          * the 90/270 degree plane rotation cases (to match the
4261          * GTT mapping), hence no need to account for rotation here.
4262          */
4263         src_w = drm_rect_width(&intel_pstate->base.src) >> 16;
4264         src_h = drm_rect_height(&intel_pstate->base.src) >> 16;
4265
4266         /* Halve UV plane width and height for NV12 */
4267         if (plane == 1) {
4268                 src_w /= 2;
4269                 src_h /= 2;
4270         }
4271
4272         plane_bpp = fb->format->cpp[plane];
4273
4274         if (drm_rotation_90_or_270(pstate->rotation)) {
4275                 switch (plane_bpp) {
4276                 case 1:
4277                         min_scanlines = 32;
4278                         break;
4279                 case 2:
4280                         min_scanlines = 16;
4281                         break;
4282                 case 4:
4283                         min_scanlines = 8;
4284                         break;
4285                 case 8:
4286                         min_scanlines = 4;
4287                         break;
4288                 default:
4289                         WARN(1, "Unsupported pixel depth %u for rotation",
4290                              plane_bpp);
4291                         min_scanlines = 32;
4292                 }
4293         }
4294
4295         return DIV_ROUND_UP((4 * src_w * plane_bpp), 512) * min_scanlines/4 + 3;
4296 }
4297
4298 static void
4299 skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active,
4300                  uint16_t *minimum, uint16_t *uv_minimum)
4301 {
4302         const struct drm_plane_state *pstate;
4303         struct drm_plane *plane;
4304
4305         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, &cstate->base) {
4306                 enum plane_id plane_id = to_intel_plane(plane)->id;
4307
4308                 if (plane_id == PLANE_CURSOR)
4309                         continue;
4310
4311                 if (!pstate->visible)
4312                         continue;
4313
4314                 minimum[plane_id] = skl_ddb_min_alloc(pstate, 0);
4315                 uv_minimum[plane_id] = skl_ddb_min_alloc(pstate, 1);
4316         }
4317
4318         minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active);
4319 }
4320
4321 static int
4322 skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
4323                       struct skl_ddb_allocation *ddb /* out */)
4324 {
4325         struct drm_atomic_state *state = cstate->base.state;
4326         struct drm_crtc *crtc = cstate->base.crtc;
4327         struct drm_device *dev = crtc->dev;
4328         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
4329         enum pipe pipe = intel_crtc->pipe;
4330         struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb;
4331         uint16_t alloc_size, start;
4332         uint16_t minimum[I915_MAX_PLANES] = {};
4333         uint16_t uv_minimum[I915_MAX_PLANES] = {};
4334         unsigned int total_data_rate;
4335         enum plane_id plane_id;
4336         int num_active;
4337         unsigned int plane_data_rate[I915_MAX_PLANES] = {};
4338         unsigned int uv_plane_data_rate[I915_MAX_PLANES] = {};
4339         uint16_t total_min_blocks = 0;
4340
4341         /* Clear the partitioning for disabled planes. */
4342         memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
4343         memset(ddb->uv_plane[pipe], 0, sizeof(ddb->uv_plane[pipe]));
4344
4345         if (WARN_ON(!state))
4346                 return 0;
4347
4348         if (!cstate->base.active) {
4349                 alloc->start = alloc->end = 0;
4350                 return 0;
4351         }
4352
4353         total_data_rate = skl_get_total_relative_data_rate(cstate,
4354                                                            plane_data_rate,
4355                                                            uv_plane_data_rate);
4356         skl_ddb_get_pipe_allocation_limits(dev, cstate, total_data_rate, ddb,
4357                                            alloc, &num_active);
4358         alloc_size = skl_ddb_entry_size(alloc);
4359         if (alloc_size == 0)
4360                 return 0;
4361
4362         skl_ddb_calc_min(cstate, num_active, minimum, uv_minimum);
4363
4364         /*
4365          * 1. Allocate the mininum required blocks for each active plane
4366          * and allocate the cursor, it doesn't require extra allocation
4367          * proportional to the data rate.
4368          */
4369
4370         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4371                 total_min_blocks += minimum[plane_id];
4372                 total_min_blocks += uv_minimum[plane_id];
4373         }
4374
4375         if (total_min_blocks > alloc_size) {
4376                 DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
4377                 DRM_DEBUG_KMS("minimum required %d/%d\n", total_min_blocks,
4378                                                         alloc_size);
4379                 return -EINVAL;
4380         }
4381
4382         alloc_size -= total_min_blocks;
4383         ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR];
4384         ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
4385
4386         /*
4387          * 2. Distribute the remaining space in proportion to the amount of
4388          * data each plane needs to fetch from memory.
4389          *
4390          * FIXME: we may not allocate every single block here.
4391          */
4392         if (total_data_rate == 0)
4393                 return 0;
4394
4395         start = alloc->start;
4396         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4397                 unsigned int data_rate, uv_data_rate;
4398                 uint16_t plane_blocks, uv_plane_blocks;
4399
4400                 if (plane_id == PLANE_CURSOR)
4401                         continue;
4402
4403                 data_rate = plane_data_rate[plane_id];
4404
4405                 /*
4406                  * allocation for (packed formats) or (uv-plane part of planar format):
4407                  * promote the expression to 64 bits to avoid overflowing, the
4408                  * result is < available as data_rate / total_data_rate < 1
4409                  */
4410                 plane_blocks = minimum[plane_id];
4411                 plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
4412                                         total_data_rate);
4413
4414                 /* Leave disabled planes at (0,0) */
4415                 if (data_rate) {
4416                         ddb->plane[pipe][plane_id].start = start;
4417                         ddb->plane[pipe][plane_id].end = start + plane_blocks;
4418                 }
4419
4420                 start += plane_blocks;
4421
4422                 /* Allocate DDB for UV plane for planar format/NV12 */
4423                 uv_data_rate = uv_plane_data_rate[plane_id];
4424
4425                 uv_plane_blocks = uv_minimum[plane_id];
4426                 uv_plane_blocks += div_u64((uint64_t)alloc_size * uv_data_rate,
4427                                            total_data_rate);
4428
4429                 if (uv_data_rate) {
4430                         ddb->uv_plane[pipe][plane_id].start = start;
4431                         ddb->uv_plane[pipe][plane_id].end =
4432                                 start + uv_plane_blocks;
4433                 }
4434
4435                 start += uv_plane_blocks;
4436         }
4437
4438         return 0;
4439 }
4440
4441 /*
4442  * The max latency should be 257 (max the punit can code is 255 and we add 2us
4443  * for the read latency) and cpp should always be <= 8, so that
4444  * should allow pixel_rate up to ~2 GHz which seems sufficient since max
4445  * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
4446 */
4447 static uint_fixed_16_16_t
4448 skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate,
4449                uint8_t cpp, uint32_t latency, uint32_t dbuf_block_size)
4450 {
4451         uint32_t wm_intermediate_val;
4452         uint_fixed_16_16_t ret;
4453
4454         if (latency == 0)
4455                 return FP_16_16_MAX;
4456
4457         wm_intermediate_val = latency * pixel_rate * cpp;
4458         ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
4459
4460         if (INTEL_GEN(dev_priv) >= 10)
4461                 ret = add_fixed16_u32(ret, 1);
4462
4463         return ret;
4464 }
4465
4466 static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate,
4467                         uint32_t pipe_htotal,
4468                         uint32_t latency,
4469                         uint_fixed_16_16_t plane_blocks_per_line)
4470 {
4471         uint32_t wm_intermediate_val;
4472         uint_fixed_16_16_t ret;
4473
4474         if (latency == 0)
4475                 return FP_16_16_MAX;
4476
4477         wm_intermediate_val = latency * pixel_rate;
4478         wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val,
4479                                            pipe_htotal * 1000);
4480         ret = mul_u32_fixed16(wm_intermediate_val, plane_blocks_per_line);
4481         return ret;
4482 }
4483
4484 static uint_fixed_16_16_t
4485 intel_get_linetime_us(struct intel_crtc_state *cstate)
4486 {
4487         uint32_t pixel_rate;
4488         uint32_t crtc_htotal;
4489         uint_fixed_16_16_t linetime_us;
4490
4491         if (!cstate->base.active)
4492                 return u32_to_fixed16(0);
4493
4494         pixel_rate = cstate->pixel_rate;
4495
4496         if (WARN_ON(pixel_rate == 0))
4497                 return u32_to_fixed16(0);
4498
4499         crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
4500         linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate);
4501
4502         return linetime_us;
4503 }
4504
4505 static uint32_t
4506 skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
4507                               const struct intel_plane_state *pstate)
4508 {
4509         uint64_t adjusted_pixel_rate;
4510         uint_fixed_16_16_t downscale_amount;
4511
4512         /* Shouldn't reach here on disabled planes... */
4513         if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
4514                 return 0;
4515
4516         /*
4517          * Adjusted plane pixel rate is just the pipe's adjusted pixel rate
4518          * with additional adjustments for plane-specific scaling.
4519          */
4520         adjusted_pixel_rate = cstate->pixel_rate;
4521         downscale_amount = skl_plane_downscale_amount(cstate, pstate);
4522
4523         return mul_round_up_u32_fixed16(adjusted_pixel_rate,
4524                                             downscale_amount);
4525 }
4526
4527 static int
4528 skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
4529                             struct intel_crtc_state *cstate,
4530                             const struct intel_plane_state *intel_pstate,
4531                             struct skl_wm_params *wp, int plane_id)
4532 {
4533         struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
4534         const struct drm_plane_state *pstate = &intel_pstate->base;
4535         const struct drm_framebuffer *fb = pstate->fb;
4536         uint32_t interm_pbpl;
4537         struct intel_atomic_state *state =
4538                 to_intel_atomic_state(cstate->base.state);
4539         bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
4540
4541         if (!intel_wm_plane_visible(cstate, intel_pstate))
4542                 return 0;
4543
4544         /* only NV12 format has two planes */
4545         if (plane_id == 1 && fb->format->format != DRM_FORMAT_NV12) {
4546                 DRM_DEBUG_KMS("Non NV12 format have single plane\n");
4547                 return -EINVAL;
4548         }
4549
4550         wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
4551                       fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
4552                       fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4553                       fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4554         wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
4555         wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4556                          fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4557         wp->is_planar = fb->format->format == DRM_FORMAT_NV12;
4558
4559         if (plane->id == PLANE_CURSOR) {
4560                 wp->width = intel_pstate->base.crtc_w;
4561         } else {
4562                 /*
4563                  * Src coordinates are already rotated by 270 degrees for
4564                  * the 90/270 degree plane rotation cases (to match the
4565                  * GTT mapping), hence no need to account for rotation here.
4566                  */
4567                 wp->width = drm_rect_width(&intel_pstate->base.src) >> 16;
4568         }
4569
4570         if (plane_id == 1 && wp->is_planar)
4571                 wp->width /= 2;
4572
4573         wp->cpp = fb->format->cpp[plane_id];
4574         wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
4575                                                              intel_pstate);
4576
4577         if (INTEL_GEN(dev_priv) >= 11 &&
4578             fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8)
4579                 wp->dbuf_block_size = 256;
4580         else
4581                 wp->dbuf_block_size = 512;
4582
4583         if (drm_rotation_90_or_270(pstate->rotation)) {
4584
4585                 switch (wp->cpp) {
4586                 case 1:
4587                         wp->y_min_scanlines = 16;
4588                         break;
4589                 case 2:
4590                         wp->y_min_scanlines = 8;
4591                         break;
4592                 case 4:
4593                         wp->y_min_scanlines = 4;
4594                         break;
4595                 default:
4596                         MISSING_CASE(wp->cpp);
4597                         return -EINVAL;
4598                 }
4599         } else {
4600                 wp->y_min_scanlines = 4;
4601         }
4602
4603         if (apply_memory_bw_wa)
4604                 wp->y_min_scanlines *= 2;
4605
4606         wp->plane_bytes_per_line = wp->width * wp->cpp;
4607         if (wp->y_tiled) {
4608                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
4609                                            wp->y_min_scanlines,
4610                                            wp->dbuf_block_size);
4611
4612                 if (INTEL_GEN(dev_priv) >= 10)
4613                         interm_pbpl++;
4614
4615                 wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
4616                                                         wp->y_min_scanlines);
4617         } else if (wp->x_tiled && IS_GEN9(dev_priv)) {
4618                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4619                                            wp->dbuf_block_size);
4620                 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4621         } else {
4622                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4623                                            wp->dbuf_block_size) + 1;
4624                 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4625         }
4626
4627         wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
4628                                              wp->plane_blocks_per_line);
4629         wp->linetime_us = fixed16_to_u32_round_up(
4630                                         intel_get_linetime_us(cstate));
4631
4632         return 0;
4633 }
4634
4635 static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
4636                                 struct intel_crtc_state *cstate,
4637                                 const struct intel_plane_state *intel_pstate,
4638                                 uint16_t ddb_allocation,
4639                                 int level,
4640                                 const struct skl_wm_params *wp,
4641                                 const struct skl_wm_level *result_prev,
4642                                 struct skl_wm_level *result /* out */)
4643 {
4644         const struct drm_plane_state *pstate = &intel_pstate->base;
4645         uint32_t latency = dev_priv->wm.skl_latency[level];
4646         uint_fixed_16_16_t method1, method2;
4647         uint_fixed_16_16_t selected_result;
4648         uint32_t res_blocks, res_lines;
4649         struct intel_atomic_state *state =
4650                 to_intel_atomic_state(cstate->base.state);
4651         bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
4652         uint32_t min_disp_buf_needed;
4653
4654         if (latency == 0 ||
4655             !intel_wm_plane_visible(cstate, intel_pstate)) {
4656                 result->plane_en = false;
4657                 return 0;
4658         }
4659
4660         /* Display WA #1141: kbl,cfl */
4661         if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
4662             IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) &&
4663             dev_priv->ipc_enabled)
4664                 latency += 4;
4665
4666         if (apply_memory_bw_wa && wp->x_tiled)
4667                 latency += 15;
4668
4669         method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
4670                                  wp->cpp, latency, wp->dbuf_block_size);
4671         method2 = skl_wm_method2(wp->plane_pixel_rate,
4672                                  cstate->base.adjusted_mode.crtc_htotal,
4673                                  latency,
4674                                  wp->plane_blocks_per_line);
4675
4676         if (wp->y_tiled) {
4677                 selected_result = max_fixed16(method2, wp->y_tile_minimum);
4678         } else {
4679                 if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
4680                      wp->dbuf_block_size < 1) &&
4681                      (wp->plane_bytes_per_line / wp->dbuf_block_size < 1))
4682                         selected_result = method2;
4683                 else if (ddb_allocation >=
4684                          fixed16_to_u32_round_up(wp->plane_blocks_per_line))
4685                         selected_result = min_fixed16(method1, method2);
4686                 else if (latency >= wp->linetime_us)
4687                         selected_result = min_fixed16(method1, method2);
4688                 else
4689                         selected_result = method1;
4690         }
4691
4692         res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
4693         res_lines = div_round_up_fixed16(selected_result,
4694                                          wp->plane_blocks_per_line);
4695
4696         /* Display WA #1125: skl,bxt,kbl,glk */
4697         if (level == 0 && wp->rc_surface)
4698                 res_blocks += fixed16_to_u32_round_up(wp->y_tile_minimum);
4699
4700         /* Display WA #1126: skl,bxt,kbl,glk */
4701         if (level >= 1 && level <= 7) {
4702                 if (wp->y_tiled) {
4703                         res_blocks += fixed16_to_u32_round_up(
4704                                                         wp->y_tile_minimum);
4705                         res_lines += wp->y_min_scanlines;
4706                 } else {
4707                         res_blocks++;
4708                 }
4709
4710                 /*
4711                  * Make sure result blocks for higher latency levels are atleast
4712                  * as high as level below the current level.
4713                  * Assumption in DDB algorithm optimization for special cases.
4714                  * Also covers Display WA #1125 for RC.
4715                  */
4716                 if (result_prev->plane_res_b > res_blocks)
4717                         res_blocks = result_prev->plane_res_b;
4718         }
4719
4720         if (INTEL_GEN(dev_priv) >= 11) {
4721                 if (wp->y_tiled) {
4722                         uint32_t extra_lines;
4723                         uint_fixed_16_16_t fp_min_disp_buf_needed;
4724
4725                         if (res_lines % wp->y_min_scanlines == 0)
4726                                 extra_lines = wp->y_min_scanlines;
4727                         else
4728                                 extra_lines = wp->y_min_scanlines * 2 -
4729                                               res_lines % wp->y_min_scanlines;
4730
4731                         fp_min_disp_buf_needed = mul_u32_fixed16(res_lines +
4732                                                 extra_lines,
4733                                                 wp->plane_blocks_per_line);
4734                         min_disp_buf_needed = fixed16_to_u32_round_up(
4735                                                 fp_min_disp_buf_needed);
4736                 } else {
4737                         min_disp_buf_needed = DIV_ROUND_UP(res_blocks * 11, 10);
4738                 }
4739         } else {
4740                 min_disp_buf_needed = res_blocks;
4741         }
4742
4743         if ((level > 0 && res_lines > 31) ||
4744             res_blocks >= ddb_allocation ||
4745             min_disp_buf_needed >= ddb_allocation) {
4746                 result->plane_en = false;
4747
4748                 /*
4749                  * If there are no valid level 0 watermarks, then we can't
4750                  * support this display configuration.
4751                  */
4752                 if (level) {
4753                         return 0;
4754                 } else {
4755                         struct drm_plane *plane = pstate->plane;
4756
4757                         DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n");
4758                         DRM_DEBUG_KMS("[PLANE:%d:%s] blocks required = %u/%u, lines required = %u/31\n",
4759                                       plane->base.id, plane->name,
4760                                       res_blocks, ddb_allocation, res_lines);
4761                         return -EINVAL;
4762                 }
4763         }
4764
4765         /*
4766          * Display WA #826 (SKL:ALL, BXT:ALL) & #1059 (CNL:A)
4767          * disable wm level 1-7 on NV12 planes
4768          */
4769         if (wp->is_planar && level >= 1 &&
4770             (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) ||
4771              IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))) {
4772                 result->plane_en = false;
4773                 return 0;
4774         }
4775
4776         /* The number of lines are ignored for the level 0 watermark. */
4777         result->plane_res_b = res_blocks;
4778         result->plane_res_l = res_lines;
4779         result->plane_en = true;
4780
4781         return 0;
4782 }
4783
4784 static int
4785 skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
4786                       struct skl_ddb_allocation *ddb,
4787                       struct intel_crtc_state *cstate,
4788                       const struct intel_plane_state *intel_pstate,
4789                       const struct skl_wm_params *wm_params,
4790                       struct skl_plane_wm *wm,
4791                       int plane_id)
4792 {
4793         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
4794         struct drm_plane *plane = intel_pstate->base.plane;
4795         struct intel_plane *intel_plane = to_intel_plane(plane);
4796         uint16_t ddb_blocks;
4797         enum pipe pipe = intel_crtc->pipe;
4798         int level, max_level = ilk_wm_max_level(dev_priv);
4799         enum plane_id intel_plane_id = intel_plane->id;
4800         int ret;
4801
4802         if (WARN_ON(!intel_pstate->base.fb))
4803                 return -EINVAL;
4804
4805         ddb_blocks = plane_id ?
4806                      skl_ddb_entry_size(&ddb->uv_plane[pipe][intel_plane_id]) :
4807                      skl_ddb_entry_size(&ddb->plane[pipe][intel_plane_id]);
4808
4809         for (level = 0; level <= max_level; level++) {
4810                 struct skl_wm_level *result = plane_id ? &wm->uv_wm[level] :
4811                                                           &wm->wm[level];
4812                 struct skl_wm_level *result_prev;
4813
4814                 if (level)
4815                         result_prev = plane_id ? &wm->uv_wm[level - 1] :
4816                                                   &wm->wm[level - 1];
4817                 else
4818                         result_prev = plane_id ? &wm->uv_wm[0] : &wm->wm[0];
4819
4820                 ret = skl_compute_plane_wm(dev_priv,
4821                                            cstate,
4822                                            intel_pstate,
4823                                            ddb_blocks,
4824                                            level,
4825                                            wm_params,
4826                                            result_prev,
4827                                            result);
4828                 if (ret)
4829                         return ret;
4830         }
4831
4832         if (intel_pstate->base.fb->format->format == DRM_FORMAT_NV12)
4833                 wm->is_planar = true;
4834
4835         return 0;
4836 }
4837
4838 static uint32_t
4839 skl_compute_linetime_wm(struct intel_crtc_state *cstate)
4840 {
4841         struct drm_atomic_state *state = cstate->base.state;
4842         struct drm_i915_private *dev_priv = to_i915(state->dev);
4843         uint_fixed_16_16_t linetime_us;
4844         uint32_t linetime_wm;
4845
4846         linetime_us = intel_get_linetime_us(cstate);
4847
4848         if (is_fixed16_zero(linetime_us))
4849                 return 0;
4850
4851         linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
4852
4853         /* Display WA #1135: bxt:ALL GLK:ALL */
4854         if ((IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) &&
4855             dev_priv->ipc_enabled)
4856                 linetime_wm /= 2;
4857
4858         return linetime_wm;
4859 }
4860
4861 static void skl_compute_transition_wm(struct intel_crtc_state *cstate,
4862                                       struct skl_wm_params *wp,
4863                                       struct skl_wm_level *wm_l0,
4864                                       uint16_t ddb_allocation,
4865                                       struct skl_wm_level *trans_wm /* out */)
4866 {
4867         struct drm_device *dev = cstate->base.crtc->dev;
4868         const struct drm_i915_private *dev_priv = to_i915(dev);
4869         uint16_t trans_min, trans_y_tile_min;
4870         const uint16_t trans_amount = 10; /* This is configurable amount */
4871         uint16_t trans_offset_b, res_blocks;
4872
4873         if (!cstate->base.active)
4874                 goto exit;
4875
4876         /* Transition WM are not recommended by HW team for GEN9 */
4877         if (INTEL_GEN(dev_priv) <= 9)
4878                 goto exit;
4879
4880         /* Transition WM don't make any sense if ipc is disabled */
4881         if (!dev_priv->ipc_enabled)
4882                 goto exit;
4883
4884         trans_min = 0;
4885         if (INTEL_GEN(dev_priv) >= 10)
4886                 trans_min = 4;
4887
4888         trans_offset_b = trans_min + trans_amount;
4889
4890         if (wp->y_tiled) {
4891                 trans_y_tile_min = (uint16_t) mul_round_up_u32_fixed16(2,
4892                                                         wp->y_tile_minimum);
4893                 res_blocks = max(wm_l0->plane_res_b, trans_y_tile_min) +
4894                                 trans_offset_b;
4895         } else {
4896                 res_blocks = wm_l0->plane_res_b + trans_offset_b;
4897
4898                 /* WA BUG:1938466 add one block for non y-tile planes */
4899                 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))
4900                         res_blocks += 1;
4901
4902         }
4903
4904         res_blocks += 1;
4905
4906         if (res_blocks < ddb_allocation) {
4907                 trans_wm->plane_res_b = res_blocks;
4908                 trans_wm->plane_en = true;
4909                 return;
4910         }
4911
4912 exit:
4913         trans_wm->plane_en = false;
4914 }
4915
4916 static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
4917                              struct skl_ddb_allocation *ddb,
4918                              struct skl_pipe_wm *pipe_wm)
4919 {
4920         struct drm_device *dev = cstate->base.crtc->dev;
4921         struct drm_crtc_state *crtc_state = &cstate->base;
4922         const struct drm_i915_private *dev_priv = to_i915(dev);
4923         struct drm_plane *plane;
4924         const struct drm_plane_state *pstate;
4925         struct skl_plane_wm *wm;
4926         int ret;
4927
4928         /*
4929          * We'll only calculate watermarks for planes that are actually
4930          * enabled, so make sure all other planes are set as disabled.
4931          */
4932         memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
4933
4934         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
4935                 const struct intel_plane_state *intel_pstate =
4936                                                 to_intel_plane_state(pstate);
4937                 enum plane_id plane_id = to_intel_plane(plane)->id;
4938                 struct skl_wm_params wm_params;
4939                 enum pipe pipe = to_intel_crtc(cstate->base.crtc)->pipe;
4940                 uint16_t ddb_blocks;
4941
4942                 wm = &pipe_wm->planes[plane_id];
4943                 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][plane_id]);
4944
4945                 ret = skl_compute_plane_wm_params(dev_priv, cstate,
4946                                                   intel_pstate, &wm_params, 0);
4947                 if (ret)
4948                         return ret;
4949
4950                 ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
4951                                             intel_pstate, &wm_params, wm, 0);
4952                 if (ret)
4953                         return ret;
4954
4955                 skl_compute_transition_wm(cstate, &wm_params, &wm->wm[0],
4956                                           ddb_blocks, &wm->trans_wm);
4957
4958                 /* uv plane watermarks must also be validated for NV12/Planar */
4959                 if (wm_params.is_planar) {
4960                         memset(&wm_params, 0, sizeof(struct skl_wm_params));
4961                         wm->is_planar = true;
4962
4963                         ret = skl_compute_plane_wm_params(dev_priv, cstate,
4964                                                           intel_pstate,
4965                                                           &wm_params, 1);
4966                         if (ret)
4967                                 return ret;
4968
4969                         ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
4970                                                     intel_pstate, &wm_params,
4971                                                     wm, 1);
4972                         if (ret)
4973                                 return ret;
4974                 }
4975         }
4976
4977         pipe_wm->linetime = skl_compute_linetime_wm(cstate);
4978
4979         return 0;
4980 }
4981
4982 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
4983                                 i915_reg_t reg,
4984                                 const struct skl_ddb_entry *entry)
4985 {
4986         if (entry->end)
4987                 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start);
4988         else
4989                 I915_WRITE(reg, 0);
4990 }
4991
4992 static void skl_write_wm_level(struct drm_i915_private *dev_priv,
4993                                i915_reg_t reg,
4994                                const struct skl_wm_level *level)
4995 {
4996         uint32_t val = 0;
4997
4998         if (level->plane_en) {
4999                 val |= PLANE_WM_EN;
5000                 val |= level->plane_res_b;
5001                 val |= level->plane_res_l << PLANE_WM_LINES_SHIFT;
5002         }
5003
5004         I915_WRITE(reg, val);
5005 }
5006
5007 static void skl_write_plane_wm(struct intel_crtc *intel_crtc,
5008                                const struct skl_plane_wm *wm,
5009                                const struct skl_ddb_allocation *ddb,
5010                                enum plane_id plane_id)
5011 {
5012         struct drm_crtc *crtc = &intel_crtc->base;
5013         struct drm_device *dev = crtc->dev;
5014         struct drm_i915_private *dev_priv = to_i915(dev);
5015         int level, max_level = ilk_wm_max_level(dev_priv);
5016         enum pipe pipe = intel_crtc->pipe;
5017
5018         for (level = 0; level <= max_level; level++) {
5019                 skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level),
5020                                    &wm->wm[level]);
5021         }
5022         skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id),
5023                            &wm->trans_wm);
5024
5025         skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id),
5026                             &ddb->plane[pipe][plane_id]);
5027         if (INTEL_GEN(dev_priv) >= 11)
5028                 return skl_ddb_entry_write(dev_priv,
5029                                            PLANE_BUF_CFG(pipe, plane_id),
5030                                            &ddb->plane[pipe][plane_id]);
5031         if (wm->is_planar) {
5032                 skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id),
5033                                     &ddb->uv_plane[pipe][plane_id]);
5034                 skl_ddb_entry_write(dev_priv,
5035                                     PLANE_NV12_BUF_CFG(pipe, plane_id),
5036                                     &ddb->plane[pipe][plane_id]);
5037         } else {
5038                 skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id),
5039                                     &ddb->plane[pipe][plane_id]);
5040                 I915_WRITE(PLANE_NV12_BUF_CFG(pipe, plane_id), 0x0);
5041         }
5042 }
5043
5044 static void skl_write_cursor_wm(struct intel_crtc *intel_crtc,
5045                                 const struct skl_plane_wm *wm,
5046                                 const struct skl_ddb_allocation *ddb)
5047 {
5048         struct drm_crtc *crtc = &intel_crtc->base;
5049         struct drm_device *dev = crtc->dev;
5050         struct drm_i915_private *dev_priv = to_i915(dev);
5051         int level, max_level = ilk_wm_max_level(dev_priv);
5052         enum pipe pipe = intel_crtc->pipe;
5053
5054         for (level = 0; level <= max_level; level++) {
5055                 skl_write_wm_level(dev_priv, CUR_WM(pipe, level),
5056                                    &wm->wm[level]);
5057         }
5058         skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm);
5059
5060         skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
5061                             &ddb->plane[pipe][PLANE_CURSOR]);
5062 }
5063
5064 bool skl_wm_level_equals(const struct skl_wm_level *l1,
5065                          const struct skl_wm_level *l2)
5066 {
5067         if (l1->plane_en != l2->plane_en)
5068                 return false;
5069
5070         /* If both planes aren't enabled, the rest shouldn't matter */
5071         if (!l1->plane_en)
5072                 return true;
5073
5074         return (l1->plane_res_l == l2->plane_res_l &&
5075                 l1->plane_res_b == l2->plane_res_b);
5076 }
5077
5078 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
5079                                            const struct skl_ddb_entry *b)
5080 {
5081         return a->start < b->end && b->start < a->end;
5082 }
5083
5084 bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv,
5085                                  const struct skl_ddb_entry **entries,
5086                                  const struct skl_ddb_entry *ddb,
5087                                  int ignore)
5088 {
5089         enum pipe pipe;
5090
5091         for_each_pipe(dev_priv, pipe) {
5092                 if (pipe != ignore && entries[pipe] &&
5093                     skl_ddb_entries_overlap(ddb, entries[pipe]))
5094                         return true;
5095         }
5096
5097         return false;
5098 }
5099
5100 static int skl_update_pipe_wm(struct drm_crtc_state *cstate,
5101                               const struct skl_pipe_wm *old_pipe_wm,
5102                               struct skl_pipe_wm *pipe_wm, /* out */
5103                               struct skl_ddb_allocation *ddb, /* out */
5104                               bool *changed /* out */)
5105 {
5106         struct intel_crtc_state *intel_cstate = to_intel_crtc_state(cstate);
5107         int ret;
5108
5109         ret = skl_build_pipe_wm(intel_cstate, ddb, pipe_wm);
5110         if (ret)
5111                 return ret;
5112
5113         if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm)))
5114                 *changed = false;
5115         else
5116                 *changed = true;
5117
5118         return 0;
5119 }
5120
5121 static uint32_t
5122 pipes_modified(struct drm_atomic_state *state)
5123 {
5124         struct drm_crtc *crtc;
5125         struct drm_crtc_state *cstate;
5126         uint32_t i, ret = 0;
5127
5128         for_each_new_crtc_in_state(state, crtc, cstate, i)
5129                 ret |= drm_crtc_mask(crtc);
5130
5131         return ret;
5132 }
5133
5134 static int
5135 skl_ddb_add_affected_planes(struct intel_crtc_state *cstate)
5136 {
5137         struct drm_atomic_state *state = cstate->base.state;
5138         struct drm_device *dev = state->dev;
5139         struct drm_crtc *crtc = cstate->base.crtc;
5140         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5141         struct drm_i915_private *dev_priv = to_i915(dev);
5142         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
5143         struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
5144         struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
5145         struct drm_plane_state *plane_state;
5146         struct drm_plane *plane;
5147         enum pipe pipe = intel_crtc->pipe;
5148
5149         drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) {
5150                 enum plane_id plane_id = to_intel_plane(plane)->id;
5151
5152                 if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id],
5153                                         &new_ddb->plane[pipe][plane_id]) &&
5154                     skl_ddb_entry_equal(&cur_ddb->uv_plane[pipe][plane_id],
5155                                         &new_ddb->uv_plane[pipe][plane_id]))
5156                         continue;
5157
5158                 plane_state = drm_atomic_get_plane_state(state, plane);
5159                 if (IS_ERR(plane_state))
5160                         return PTR_ERR(plane_state);
5161         }
5162
5163         return 0;
5164 }
5165
5166 static int
5167 skl_compute_ddb(struct drm_atomic_state *state)
5168 {
5169         const struct drm_i915_private *dev_priv = to_i915(state->dev);
5170         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
5171         struct skl_ddb_allocation *ddb = &intel_state->wm_results.ddb;
5172         struct intel_crtc *crtc;
5173         struct intel_crtc_state *cstate;
5174         int ret, i;
5175
5176         memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
5177
5178         for_each_new_intel_crtc_in_state(intel_state, crtc, cstate, i) {
5179                 ret = skl_allocate_pipe_ddb(cstate, ddb);
5180                 if (ret)
5181                         return ret;
5182
5183                 ret = skl_ddb_add_affected_planes(cstate);
5184                 if (ret)
5185                         return ret;
5186         }
5187
5188         return 0;
5189 }
5190
5191 static void
5192 skl_copy_ddb_for_pipe(struct skl_ddb_values *dst,
5193                       struct skl_ddb_values *src,
5194                       enum pipe pipe)
5195 {
5196         memcpy(dst->ddb.uv_plane[pipe], src->ddb.uv_plane[pipe],
5197                sizeof(dst->ddb.uv_plane[pipe]));
5198         memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe],
5199                sizeof(dst->ddb.plane[pipe]));
5200 }
5201
5202 static void
5203 skl_print_wm_changes(const struct drm_atomic_state *state)
5204 {
5205         const struct drm_device *dev = state->dev;
5206         const struct drm_i915_private *dev_priv = to_i915(dev);
5207         const struct intel_atomic_state *intel_state =
5208                 to_intel_atomic_state(state);
5209         const struct drm_crtc *crtc;
5210         const struct drm_crtc_state *cstate;
5211         const struct intel_plane *intel_plane;
5212         const struct skl_ddb_allocation *old_ddb = &dev_priv->wm.skl_hw.ddb;
5213         const struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
5214         int i;
5215
5216         for_each_new_crtc_in_state(state, crtc, cstate, i) {
5217                 const struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5218                 enum pipe pipe = intel_crtc->pipe;
5219
5220                 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
5221                         enum plane_id plane_id = intel_plane->id;
5222                         const struct skl_ddb_entry *old, *new;
5223
5224                         old = &old_ddb->plane[pipe][plane_id];
5225                         new = &new_ddb->plane[pipe][plane_id];
5226
5227                         if (skl_ddb_entry_equal(old, new))
5228                                 continue;
5229
5230                         DRM_DEBUG_ATOMIC("[PLANE:%d:%s] ddb (%d - %d) -> (%d - %d)\n",
5231                                          intel_plane->base.base.id,
5232                                          intel_plane->base.name,
5233                                          old->start, old->end,
5234                                          new->start, new->end);
5235                 }
5236         }
5237 }
5238
5239 static int
5240 skl_ddb_add_affected_pipes(struct drm_atomic_state *state, bool *changed)
5241 {
5242         struct drm_device *dev = state->dev;
5243         const struct drm_i915_private *dev_priv = to_i915(dev);
5244         const struct drm_crtc *crtc;
5245         const struct drm_crtc_state *cstate;
5246         struct intel_crtc *intel_crtc;
5247         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
5248         uint32_t realloc_pipes = pipes_modified(state);
5249         int ret, i;
5250
5251         /*
5252          * When we distrust bios wm we always need to recompute to set the
5253          * expected DDB allocations for each CRTC.
5254          */
5255         if (dev_priv->wm.distrust_bios_wm)
5256                 (*changed) = true;
5257
5258         /*
5259          * If this transaction isn't actually touching any CRTC's, don't
5260          * bother with watermark calculation.  Note that if we pass this
5261          * test, we're guaranteed to hold at least one CRTC state mutex,
5262          * which means we can safely use values like dev_priv->active_crtcs
5263          * since any racing commits that want to update them would need to
5264          * hold _all_ CRTC state mutexes.
5265          */
5266         for_each_new_crtc_in_state(state, crtc, cstate, i)
5267                 (*changed) = true;
5268
5269         if (!*changed)
5270                 return 0;
5271
5272         /*
5273          * If this is our first atomic update following hardware readout,
5274          * we can't trust the DDB that the BIOS programmed for us.  Let's
5275          * pretend that all pipes switched active status so that we'll
5276          * ensure a full DDB recompute.
5277          */
5278         if (dev_priv->wm.distrust_bios_wm) {
5279                 ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
5280                                        state->acquire_ctx);
5281                 if (ret)
5282                         return ret;
5283
5284                 intel_state->active_pipe_changes = ~0;
5285
5286                 /*
5287                  * We usually only initialize intel_state->active_crtcs if we
5288                  * we're doing a modeset; make sure this field is always
5289                  * initialized during the sanitization process that happens
5290                  * on the first commit too.
5291                  */
5292                 if (!intel_state->modeset)
5293                         intel_state->active_crtcs = dev_priv->active_crtcs;
5294         }
5295
5296         /*
5297          * If the modeset changes which CRTC's are active, we need to
5298          * recompute the DDB allocation for *all* active pipes, even
5299          * those that weren't otherwise being modified in any way by this
5300          * atomic commit.  Due to the shrinking of the per-pipe allocations
5301          * when new active CRTC's are added, it's possible for a pipe that
5302          * we were already using and aren't changing at all here to suddenly
5303          * become invalid if its DDB needs exceeds its new allocation.
5304          *
5305          * Note that if we wind up doing a full DDB recompute, we can't let
5306          * any other display updates race with this transaction, so we need
5307          * to grab the lock on *all* CRTC's.
5308          */
5309         if (intel_state->active_pipe_changes) {
5310                 realloc_pipes = ~0;
5311                 intel_state->wm_results.dirty_pipes = ~0;
5312         }
5313
5314         /*
5315          * We're not recomputing for the pipes not included in the commit, so
5316          * make sure we start with the current state.
5317          */
5318         for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
5319                 struct intel_crtc_state *cstate;
5320
5321                 cstate = intel_atomic_get_crtc_state(state, intel_crtc);
5322                 if (IS_ERR(cstate))
5323                         return PTR_ERR(cstate);
5324         }
5325
5326         return 0;
5327 }
5328
5329 static int
5330 skl_compute_wm(struct drm_atomic_state *state)
5331 {
5332         struct drm_crtc *crtc;
5333         struct drm_crtc_state *cstate;
5334         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
5335         struct skl_ddb_values *results = &intel_state->wm_results;
5336         struct skl_pipe_wm *pipe_wm;
5337         bool changed = false;
5338         int ret, i;
5339
5340         /* Clear all dirty flags */
5341         results->dirty_pipes = 0;
5342
5343         ret = skl_ddb_add_affected_pipes(state, &changed);
5344         if (ret || !changed)
5345                 return ret;
5346
5347         ret = skl_compute_ddb(state);
5348         if (ret)
5349                 return ret;
5350
5351         /*
5352          * Calculate WM's for all pipes that are part of this transaction.
5353          * Note that the DDB allocation above may have added more CRTC's that
5354          * weren't otherwise being modified (and set bits in dirty_pipes) if
5355          * pipe allocations had to change.
5356          *
5357          * FIXME:  Now that we're doing this in the atomic check phase, we
5358          * should allow skl_update_pipe_wm() to return failure in cases where
5359          * no suitable watermark values can be found.
5360          */
5361         for_each_new_crtc_in_state(state, crtc, cstate, i) {
5362                 struct intel_crtc_state *intel_cstate =
5363                         to_intel_crtc_state(cstate);
5364                 const struct skl_pipe_wm *old_pipe_wm =
5365                         &to_intel_crtc_state(crtc->state)->wm.skl.optimal;
5366
5367                 pipe_wm = &intel_cstate->wm.skl.optimal;
5368                 ret = skl_update_pipe_wm(cstate, old_pipe_wm, pipe_wm,
5369                                          &results->ddb, &changed);
5370                 if (ret)
5371                         return ret;
5372
5373                 if (changed)
5374                         results->dirty_pipes |= drm_crtc_mask(crtc);
5375
5376                 if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0)
5377                         /* This pipe's WM's did not change */
5378                         continue;
5379
5380                 intel_cstate->update_wm_pre = true;
5381         }
5382
5383         skl_print_wm_changes(state);
5384
5385         return 0;
5386 }
5387
5388 static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state,
5389                                       struct intel_crtc_state *cstate)
5390 {
5391         struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
5392         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5393         struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
5394         const struct skl_ddb_allocation *ddb = &state->wm_results.ddb;
5395         enum pipe pipe = crtc->pipe;
5396         enum plane_id plane_id;
5397
5398         if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base)))
5399                 return;
5400
5401         I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime);
5402
5403         for_each_plane_id_on_crtc(crtc, plane_id) {
5404                 if (plane_id != PLANE_CURSOR)
5405                         skl_write_plane_wm(crtc, &pipe_wm->planes[plane_id],
5406                                            ddb, plane_id);
5407                 else
5408                         skl_write_cursor_wm(crtc, &pipe_wm->planes[plane_id],
5409                                             ddb);
5410         }
5411 }
5412
5413 static void skl_initial_wm(struct intel_atomic_state *state,
5414                            struct intel_crtc_state *cstate)
5415 {
5416         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5417         struct drm_device *dev = intel_crtc->base.dev;
5418         struct drm_i915_private *dev_priv = to_i915(dev);
5419         struct skl_ddb_values *results = &state->wm_results;
5420         struct skl_ddb_values *hw_vals = &dev_priv->wm.skl_hw;
5421         enum pipe pipe = intel_crtc->pipe;
5422
5423         if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0)
5424                 return;
5425
5426         mutex_lock(&dev_priv->wm.wm_mutex);
5427
5428         if (cstate->base.active_changed)
5429                 skl_atomic_update_crtc_wm(state, cstate);
5430
5431         skl_copy_ddb_for_pipe(hw_vals, results, pipe);
5432
5433         mutex_unlock(&dev_priv->wm.wm_mutex);
5434 }
5435
5436 static void ilk_compute_wm_config(struct drm_device *dev,
5437                                   struct intel_wm_config *config)
5438 {
5439         struct intel_crtc *crtc;
5440
5441         /* Compute the currently _active_ config */
5442         for_each_intel_crtc(dev, crtc) {
5443                 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk;
5444
5445                 if (!wm->pipe_enabled)
5446                         continue;
5447
5448                 config->sprites_enabled |= wm->sprites_enabled;
5449                 config->sprites_scaled |= wm->sprites_scaled;
5450                 config->num_pipes_active++;
5451         }
5452 }
5453
5454 static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
5455 {
5456         struct drm_device *dev = &dev_priv->drm;
5457         struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
5458         struct ilk_wm_maximums max;
5459         struct intel_wm_config config = {};
5460         struct ilk_wm_values results = {};
5461         enum intel_ddb_partitioning partitioning;
5462
5463         ilk_compute_wm_config(dev, &config);
5464
5465         ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
5466         ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
5467
5468         /* 5/6 split only in single pipe config on IVB+ */
5469         if (INTEL_GEN(dev_priv) >= 7 &&
5470             config.num_pipes_active == 1 && config.sprites_enabled) {
5471                 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
5472                 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
5473
5474                 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
5475         } else {
5476                 best_lp_wm = &lp_wm_1_2;
5477         }
5478
5479         partitioning = (best_lp_wm == &lp_wm_1_2) ?
5480                        INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
5481
5482         ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
5483
5484         ilk_write_wm_values(dev_priv, &results);
5485 }
5486
5487 static void ilk_initial_watermarks(struct intel_atomic_state *state,
5488                                    struct intel_crtc_state *cstate)
5489 {
5490         struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5491         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5492
5493         mutex_lock(&dev_priv->wm.wm_mutex);
5494         intel_crtc->wm.active.ilk = cstate->wm.ilk.intermediate;
5495         ilk_program_watermarks(dev_priv);
5496         mutex_unlock(&dev_priv->wm.wm_mutex);
5497 }
5498
5499 static void ilk_optimize_watermarks(struct intel_atomic_state *state,
5500                                     struct intel_crtc_state *cstate)
5501 {
5502         struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5503         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5504
5505         mutex_lock(&dev_priv->wm.wm_mutex);
5506         if (cstate->wm.need_postvbl_update) {
5507                 intel_crtc->wm.active.ilk = cstate->wm.ilk.optimal;
5508                 ilk_program_watermarks(dev_priv);
5509         }
5510         mutex_unlock(&dev_priv->wm.wm_mutex);
5511 }
5512
5513 static inline void skl_wm_level_from_reg_val(uint32_t val,
5514                                              struct skl_wm_level *level)
5515 {
5516         level->plane_en = val & PLANE_WM_EN;
5517         level->plane_res_b = val & PLANE_WM_BLOCKS_MASK;
5518         level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) &
5519                 PLANE_WM_LINES_MASK;
5520 }
5521
5522 void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc,
5523                               struct skl_pipe_wm *out)
5524 {
5525         struct drm_i915_private *dev_priv = to_i915(crtc->dev);
5526         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5527         enum pipe pipe = intel_crtc->pipe;
5528         int level, max_level;
5529         enum plane_id plane_id;
5530         uint32_t val;
5531
5532         max_level = ilk_wm_max_level(dev_priv);
5533
5534         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
5535                 struct skl_plane_wm *wm = &out->planes[plane_id];
5536
5537                 for (level = 0; level <= max_level; level++) {
5538                         if (plane_id != PLANE_CURSOR)
5539                                 val = I915_READ(PLANE_WM(pipe, plane_id, level));
5540                         else
5541                                 val = I915_READ(CUR_WM(pipe, level));
5542
5543                         skl_wm_level_from_reg_val(val, &wm->wm[level]);
5544                 }
5545
5546                 if (plane_id != PLANE_CURSOR)
5547                         val = I915_READ(PLANE_WM_TRANS(pipe, plane_id));
5548                 else
5549                         val = I915_READ(CUR_WM_TRANS(pipe));
5550
5551                 skl_wm_level_from_reg_val(val, &wm->trans_wm);
5552         }
5553
5554         if (!intel_crtc->active)
5555                 return;
5556
5557         out->linetime = I915_READ(PIPE_WM_LINETIME(pipe));
5558 }
5559
5560 void skl_wm_get_hw_state(struct drm_device *dev)
5561 {
5562         struct drm_i915_private *dev_priv = to_i915(dev);
5563         struct skl_ddb_values *hw = &dev_priv->wm.skl_hw;
5564         struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
5565         struct drm_crtc *crtc;
5566         struct intel_crtc *intel_crtc;
5567         struct intel_crtc_state *cstate;
5568
5569         skl_ddb_get_hw_state(dev_priv, ddb);
5570         list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
5571                 intel_crtc = to_intel_crtc(crtc);
5572                 cstate = to_intel_crtc_state(crtc->state);
5573
5574                 skl_pipe_wm_get_hw_state(crtc, &cstate->wm.skl.optimal);
5575
5576                 if (intel_crtc->active)
5577                         hw->dirty_pipes |= drm_crtc_mask(crtc);
5578         }
5579
5580         if (dev_priv->active_crtcs) {
5581                 /* Fully recompute DDB on first atomic commit */
5582                 dev_priv->wm.distrust_bios_wm = true;
5583         } else {
5584                 /*
5585                  * Easy/common case; just sanitize DDB now if everything off
5586                  * Keep dbuf slice info intact
5587                  */
5588                 memset(ddb->plane, 0, sizeof(ddb->plane));
5589                 memset(ddb->uv_plane, 0, sizeof(ddb->uv_plane));
5590         }
5591 }
5592
5593 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
5594 {
5595         struct drm_device *dev = crtc->dev;
5596         struct drm_i915_private *dev_priv = to_i915(dev);
5597         struct ilk_wm_values *hw = &dev_priv->wm.hw;
5598         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5599         struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
5600         struct intel_pipe_wm *active = &cstate->wm.ilk.optimal;
5601         enum pipe pipe = intel_crtc->pipe;
5602         static const i915_reg_t wm0_pipe_reg[] = {
5603                 [PIPE_A] = WM0_PIPEA_ILK,
5604                 [PIPE_B] = WM0_PIPEB_ILK,
5605                 [PIPE_C] = WM0_PIPEC_IVB,
5606         };
5607
5608         hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
5609         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
5610                 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
5611
5612         memset(active, 0, sizeof(*active));
5613
5614         active->pipe_enabled = intel_crtc->active;
5615
5616         if (active->pipe_enabled) {
5617                 u32 tmp = hw->wm_pipe[pipe];
5618
5619                 /*
5620                  * For active pipes LP0 watermark is marked as
5621                  * enabled, and LP1+ watermaks as disabled since
5622                  * we can't really reverse compute them in case
5623                  * multiple pipes are active.
5624                  */
5625                 active->wm[0].enable = true;
5626                 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
5627                 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
5628                 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
5629                 active->linetime = hw->wm_linetime[pipe];
5630         } else {
5631                 int level, max_level = ilk_wm_max_level(dev_priv);
5632
5633                 /*
5634                  * For inactive pipes, all watermark levels
5635                  * should be marked as enabled but zeroed,
5636                  * which is what we'd compute them to.
5637                  */
5638                 for (level = 0; level <= max_level; level++)
5639                         active->wm[level].enable = true;
5640         }
5641
5642         intel_crtc->wm.active.ilk = *active;
5643 }
5644
5645 #define _FW_WM(value, plane) \
5646         (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
5647 #define _FW_WM_VLV(value, plane) \
5648         (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
5649
5650 static void g4x_read_wm_values(struct drm_i915_private *dev_priv,
5651                                struct g4x_wm_values *wm)
5652 {
5653         uint32_t tmp;
5654
5655         tmp = I915_READ(DSPFW1);
5656         wm->sr.plane = _FW_WM(tmp, SR);
5657         wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5658         wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB);
5659         wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA);
5660
5661         tmp = I915_READ(DSPFW2);
5662         wm->fbc_en = tmp & DSPFW_FBC_SR_EN;
5663         wm->sr.fbc = _FW_WM(tmp, FBC_SR);
5664         wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR);
5665         wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB);
5666         wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5667         wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA);
5668
5669         tmp = I915_READ(DSPFW3);
5670         wm->hpll_en = tmp & DSPFW_HPLL_SR_EN;
5671         wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5672         wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR);
5673         wm->hpll.plane = _FW_WM(tmp, HPLL_SR);
5674 }
5675
5676 static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
5677                                struct vlv_wm_values *wm)
5678 {
5679         enum pipe pipe;
5680         uint32_t tmp;
5681
5682         for_each_pipe(dev_priv, pipe) {
5683                 tmp = I915_READ(VLV_DDL(pipe));
5684
5685                 wm->ddl[pipe].plane[PLANE_PRIMARY] =
5686                         (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5687                 wm->ddl[pipe].plane[PLANE_CURSOR] =
5688                         (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5689                 wm->ddl[pipe].plane[PLANE_SPRITE0] =
5690                         (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5691                 wm->ddl[pipe].plane[PLANE_SPRITE1] =
5692                         (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5693         }
5694
5695         tmp = I915_READ(DSPFW1);
5696         wm->sr.plane = _FW_WM(tmp, SR);
5697         wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5698         wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEB);
5699         wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEA);
5700
5701         tmp = I915_READ(DSPFW2);
5702         wm->pipe[PIPE_A].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEB);
5703         wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5704         wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEA);
5705
5706         tmp = I915_READ(DSPFW3);
5707         wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5708
5709         if (IS_CHERRYVIEW(dev_priv)) {
5710                 tmp = I915_READ(DSPFW7_CHV);
5711                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5712                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5713
5714                 tmp = I915_READ(DSPFW8_CHV);
5715                 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEF);
5716                 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEE);
5717
5718                 tmp = I915_READ(DSPFW9_CHV);
5719                 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEC);
5720                 wm->pipe[PIPE_C].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORC);
5721
5722                 tmp = I915_READ(DSPHOWM);
5723                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5724                 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
5725                 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
5726                 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEC_HI) << 8;
5727                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5728                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5729                 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5730                 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5731                 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5732                 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5733         } else {
5734                 tmp = I915_READ(DSPFW7);
5735                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5736                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5737
5738                 tmp = I915_READ(DSPHOWM);
5739                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5740                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5741                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5742                 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5743                 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5744                 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5745                 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5746         }
5747 }
5748
5749 #undef _FW_WM
5750 #undef _FW_WM_VLV
5751
5752 void g4x_wm_get_hw_state(struct drm_device *dev)
5753 {
5754         struct drm_i915_private *dev_priv = to_i915(dev);
5755         struct g4x_wm_values *wm = &dev_priv->wm.g4x;
5756         struct intel_crtc *crtc;
5757
5758         g4x_read_wm_values(dev_priv, wm);
5759
5760         wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
5761
5762         for_each_intel_crtc(dev, crtc) {
5763                 struct intel_crtc_state *crtc_state =
5764                         to_intel_crtc_state(crtc->base.state);
5765                 struct g4x_wm_state *active = &crtc->wm.active.g4x;
5766                 struct g4x_pipe_wm *raw;
5767                 enum pipe pipe = crtc->pipe;
5768                 enum plane_id plane_id;
5769                 int level, max_level;
5770
5771                 active->cxsr = wm->cxsr;
5772                 active->hpll_en = wm->hpll_en;
5773                 active->fbc_en = wm->fbc_en;
5774
5775                 active->sr = wm->sr;
5776                 active->hpll = wm->hpll;
5777
5778                 for_each_plane_id_on_crtc(crtc, plane_id) {
5779                         active->wm.plane[plane_id] =
5780                                 wm->pipe[pipe].plane[plane_id];
5781                 }
5782
5783                 if (wm->cxsr && wm->hpll_en)
5784                         max_level = G4X_WM_LEVEL_HPLL;
5785                 else if (wm->cxsr)
5786                         max_level = G4X_WM_LEVEL_SR;
5787                 else
5788                         max_level = G4X_WM_LEVEL_NORMAL;
5789
5790                 level = G4X_WM_LEVEL_NORMAL;
5791                 raw = &crtc_state->wm.g4x.raw[level];
5792                 for_each_plane_id_on_crtc(crtc, plane_id)
5793                         raw->plane[plane_id] = active->wm.plane[plane_id];
5794
5795                 if (++level > max_level)
5796                         goto out;
5797
5798                 raw = &crtc_state->wm.g4x.raw[level];
5799                 raw->plane[PLANE_PRIMARY] = active->sr.plane;
5800                 raw->plane[PLANE_CURSOR] = active->sr.cursor;
5801                 raw->plane[PLANE_SPRITE0] = 0;
5802                 raw->fbc = active->sr.fbc;
5803
5804                 if (++level > max_level)
5805                         goto out;
5806
5807                 raw = &crtc_state->wm.g4x.raw[level];
5808                 raw->plane[PLANE_PRIMARY] = active->hpll.plane;
5809                 raw->plane[PLANE_CURSOR] = active->hpll.cursor;
5810                 raw->plane[PLANE_SPRITE0] = 0;
5811                 raw->fbc = active->hpll.fbc;
5812
5813         out:
5814                 for_each_plane_id_on_crtc(crtc, plane_id)
5815                         g4x_raw_plane_wm_set(crtc_state, level,
5816                                              plane_id, USHRT_MAX);
5817                 g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
5818
5819                 crtc_state->wm.g4x.optimal = *active;
5820                 crtc_state->wm.g4x.intermediate = *active;
5821
5822                 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
5823                               pipe_name(pipe),
5824                               wm->pipe[pipe].plane[PLANE_PRIMARY],
5825                               wm->pipe[pipe].plane[PLANE_CURSOR],
5826                               wm->pipe[pipe].plane[PLANE_SPRITE0]);
5827         }
5828
5829         DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
5830                       wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
5831         DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
5832                       wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
5833         DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
5834                       yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
5835 }
5836
5837 void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
5838 {
5839         struct intel_plane *plane;
5840         struct intel_crtc *crtc;
5841
5842         mutex_lock(&dev_priv->wm.wm_mutex);
5843
5844         for_each_intel_plane(&dev_priv->drm, plane) {
5845                 struct intel_crtc *crtc =
5846                         intel_get_crtc_for_pipe(dev_priv, plane->pipe);
5847                 struct intel_crtc_state *crtc_state =
5848                         to_intel_crtc_state(crtc->base.state);
5849                 struct intel_plane_state *plane_state =
5850                         to_intel_plane_state(plane->base.state);
5851                 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
5852                 enum plane_id plane_id = plane->id;
5853                 int level;
5854
5855                 if (plane_state->base.visible)
5856                         continue;
5857
5858                 for (level = 0; level < 3; level++) {
5859                         struct g4x_pipe_wm *raw =
5860                                 &crtc_state->wm.g4x.raw[level];
5861
5862                         raw->plane[plane_id] = 0;
5863                         wm_state->wm.plane[plane_id] = 0;
5864                 }
5865
5866                 if (plane_id == PLANE_PRIMARY) {
5867                         for (level = 0; level < 3; level++) {
5868                                 struct g4x_pipe_wm *raw =
5869                                         &crtc_state->wm.g4x.raw[level];
5870                                 raw->fbc = 0;
5871                         }
5872
5873                         wm_state->sr.fbc = 0;
5874                         wm_state->hpll.fbc = 0;
5875                         wm_state->fbc_en = false;
5876                 }
5877         }
5878
5879         for_each_intel_crtc(&dev_priv->drm, crtc) {
5880                 struct intel_crtc_state *crtc_state =
5881                         to_intel_crtc_state(crtc->base.state);
5882
5883                 crtc_state->wm.g4x.intermediate =
5884                         crtc_state->wm.g4x.optimal;
5885                 crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
5886         }
5887
5888         g4x_program_watermarks(dev_priv);
5889
5890         mutex_unlock(&dev_priv->wm.wm_mutex);
5891 }
5892
5893 void vlv_wm_get_hw_state(struct drm_device *dev)
5894 {
5895         struct drm_i915_private *dev_priv = to_i915(dev);
5896         struct vlv_wm_values *wm = &dev_priv->wm.vlv;
5897         struct intel_crtc *crtc;
5898         u32 val;
5899
5900         vlv_read_wm_values(dev_priv, wm);
5901
5902         wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
5903         wm->level = VLV_WM_LEVEL_PM2;
5904
5905         if (IS_CHERRYVIEW(dev_priv)) {
5906                 mutex_lock(&dev_priv->pcu_lock);
5907
5908                 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
5909                 if (val & DSP_MAXFIFO_PM5_ENABLE)
5910                         wm->level = VLV_WM_LEVEL_PM5;
5911
5912                 /*
5913                  * If DDR DVFS is disabled in the BIOS, Punit
5914                  * will never ack the request. So if that happens
5915                  * assume we don't have to enable/disable DDR DVFS
5916                  * dynamically. To test that just set the REQ_ACK
5917                  * bit to poke the Punit, but don't change the
5918                  * HIGH/LOW bits so that we don't actually change
5919                  * the current state.
5920                  */
5921                 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
5922                 val |= FORCE_DDR_FREQ_REQ_ACK;
5923                 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
5924
5925                 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
5926                               FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
5927                         DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
5928                                       "assuming DDR DVFS is disabled\n");
5929                         dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
5930                 } else {
5931                         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
5932                         if ((val & FORCE_DDR_HIGH_FREQ) == 0)
5933                                 wm->level = VLV_WM_LEVEL_DDR_DVFS;
5934                 }
5935
5936                 mutex_unlock(&dev_priv->pcu_lock);
5937         }
5938
5939         for_each_intel_crtc(dev, crtc) {
5940                 struct intel_crtc_state *crtc_state =
5941                         to_intel_crtc_state(crtc->base.state);
5942                 struct vlv_wm_state *active = &crtc->wm.active.vlv;
5943                 const struct vlv_fifo_state *fifo_state =
5944                         &crtc_state->wm.vlv.fifo_state;
5945                 enum pipe pipe = crtc->pipe;
5946                 enum plane_id plane_id;
5947                 int level;
5948
5949                 vlv_get_fifo_size(crtc_state);
5950
5951                 active->num_levels = wm->level + 1;
5952                 active->cxsr = wm->cxsr;
5953
5954                 for (level = 0; level < active->num_levels; level++) {
5955                         struct g4x_pipe_wm *raw =
5956                                 &crtc_state->wm.vlv.raw[level];
5957
5958                         active->sr[level].plane = wm->sr.plane;
5959                         active->sr[level].cursor = wm->sr.cursor;
5960
5961                         for_each_plane_id_on_crtc(crtc, plane_id) {
5962                                 active->wm[level].plane[plane_id] =
5963                                         wm->pipe[pipe].plane[plane_id];
5964
5965                                 raw->plane[plane_id] =
5966                                         vlv_invert_wm_value(active->wm[level].plane[plane_id],
5967                                                             fifo_state->plane[plane_id]);
5968                         }
5969                 }
5970
5971                 for_each_plane_id_on_crtc(crtc, plane_id)
5972                         vlv_raw_plane_wm_set(crtc_state, level,
5973                                              plane_id, USHRT_MAX);
5974                 vlv_invalidate_wms(crtc, active, level);
5975
5976                 crtc_state->wm.vlv.optimal = *active;
5977                 crtc_state->wm.vlv.intermediate = *active;
5978
5979                 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
5980                               pipe_name(pipe),
5981                               wm->pipe[pipe].plane[PLANE_PRIMARY],
5982                               wm->pipe[pipe].plane[PLANE_CURSOR],
5983                               wm->pipe[pipe].plane[PLANE_SPRITE0],
5984                               wm->pipe[pipe].plane[PLANE_SPRITE1]);
5985         }
5986
5987         DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
5988                       wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
5989 }
5990
5991 void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
5992 {
5993         struct intel_plane *plane;
5994         struct intel_crtc *crtc;
5995
5996         mutex_lock(&dev_priv->wm.wm_mutex);
5997
5998         for_each_intel_plane(&dev_priv->drm, plane) {
5999                 struct intel_crtc *crtc =
6000                         intel_get_crtc_for_pipe(dev_priv, plane->pipe);
6001                 struct intel_crtc_state *crtc_state =
6002                         to_intel_crtc_state(crtc->base.state);
6003                 struct intel_plane_state *plane_state =
6004                         to_intel_plane_state(plane->base.state);
6005                 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
6006                 const struct vlv_fifo_state *fifo_state =
6007                         &crtc_state->wm.vlv.fifo_state;
6008                 enum plane_id plane_id = plane->id;
6009                 int level;
6010
6011                 if (plane_state->base.visible)
6012                         continue;
6013
6014                 for (level = 0; level < wm_state->num_levels; level++) {
6015                         struct g4x_pipe_wm *raw =
6016                                 &crtc_state->wm.vlv.raw[level];
6017
6018                         raw->plane[plane_id] = 0;
6019
6020                         wm_state->wm[level].plane[plane_id] =
6021                                 vlv_invert_wm_value(raw->plane[plane_id],
6022                                                     fifo_state->plane[plane_id]);
6023                 }
6024         }
6025
6026         for_each_intel_crtc(&dev_priv->drm, crtc) {
6027                 struct intel_crtc_state *crtc_state =
6028                         to_intel_crtc_state(crtc->base.state);
6029
6030                 crtc_state->wm.vlv.intermediate =
6031                         crtc_state->wm.vlv.optimal;
6032                 crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
6033         }
6034
6035         vlv_program_watermarks(dev_priv);
6036
6037         mutex_unlock(&dev_priv->wm.wm_mutex);
6038 }
6039
6040 /*
6041  * FIXME should probably kill this and improve
6042  * the real watermark readout/sanitation instead
6043  */
6044 static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
6045 {
6046         I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
6047         I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
6048         I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
6049
6050         /*
6051          * Don't touch WM1S_LP_EN here.
6052          * Doing so could cause underruns.
6053          */
6054 }
6055
6056 void ilk_wm_get_hw_state(struct drm_device *dev)
6057 {
6058         struct drm_i915_private *dev_priv = to_i915(dev);
6059         struct ilk_wm_values *hw = &dev_priv->wm.hw;
6060         struct drm_crtc *crtc;
6061
6062         ilk_init_lp_watermarks(dev_priv);
6063
6064         for_each_crtc(dev, crtc)
6065                 ilk_pipe_wm_get_hw_state(crtc);
6066
6067         hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
6068         hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
6069         hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
6070
6071         hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
6072         if (INTEL_GEN(dev_priv) >= 7) {
6073                 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
6074                 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
6075         }
6076
6077         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6078                 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
6079                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
6080         else if (IS_IVYBRIDGE(dev_priv))
6081                 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
6082                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
6083
6084         hw->enable_fbc_wm =
6085                 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
6086 }
6087
6088 /**
6089  * intel_update_watermarks - update FIFO watermark values based on current modes
6090  * @crtc: the #intel_crtc on which to compute the WM
6091  *
6092  * Calculate watermark values for the various WM regs based on current mode
6093  * and plane configuration.
6094  *
6095  * There are several cases to deal with here:
6096  *   - normal (i.e. non-self-refresh)
6097  *   - self-refresh (SR) mode
6098  *   - lines are large relative to FIFO size (buffer can hold up to 2)
6099  *   - lines are small relative to FIFO size (buffer can hold more than 2
6100  *     lines), so need to account for TLB latency
6101  *
6102  *   The normal calculation is:
6103  *     watermark = dotclock * bytes per pixel * latency
6104  *   where latency is platform & configuration dependent (we assume pessimal
6105  *   values here).
6106  *
6107  *   The SR calculation is:
6108  *     watermark = (trunc(latency/line time)+1) * surface width *
6109  *       bytes per pixel
6110  *   where
6111  *     line time = htotal / dotclock
6112  *     surface width = hdisplay for normal plane and 64 for cursor
6113  *   and latency is assumed to be high, as above.
6114  *
6115  * The final value programmed to the register should always be rounded up,
6116  * and include an extra 2 entries to account for clock crossings.
6117  *
6118  * We don't use the sprite, so we can ignore that.  And on Crestline we have
6119  * to set the non-SR watermarks to 8.
6120  */
6121 void intel_update_watermarks(struct intel_crtc *crtc)
6122 {
6123         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
6124
6125         if (dev_priv->display.update_wm)
6126                 dev_priv->display.update_wm(crtc);
6127 }
6128
6129 void intel_enable_ipc(struct drm_i915_private *dev_priv)
6130 {
6131         u32 val;
6132
6133         /* Display WA #0477 WaDisableIPC: skl */
6134         if (IS_SKYLAKE(dev_priv)) {
6135                 dev_priv->ipc_enabled = false;
6136                 return;
6137         }
6138
6139         val = I915_READ(DISP_ARB_CTL2);
6140
6141         if (dev_priv->ipc_enabled)
6142                 val |= DISP_IPC_ENABLE;
6143         else
6144                 val &= ~DISP_IPC_ENABLE;
6145
6146         I915_WRITE(DISP_ARB_CTL2, val);
6147 }
6148
6149 void intel_init_ipc(struct drm_i915_private *dev_priv)
6150 {
6151         dev_priv->ipc_enabled = false;
6152         if (!HAS_IPC(dev_priv))
6153                 return;
6154
6155         dev_priv->ipc_enabled = true;
6156         intel_enable_ipc(dev_priv);
6157 }
6158
6159 /*
6160  * Lock protecting IPS related data structures
6161  */
6162 DEFINE_SPINLOCK(mchdev_lock);
6163
6164 /* Global for IPS driver to get at the current i915 device. Protected by
6165  * mchdev_lock. */
6166 static struct drm_i915_private *i915_mch_dev;
6167
6168 bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
6169 {
6170         u16 rgvswctl;
6171
6172         lockdep_assert_held(&mchdev_lock);
6173
6174         rgvswctl = I915_READ16(MEMSWCTL);
6175         if (rgvswctl & MEMCTL_CMD_STS) {
6176                 DRM_DEBUG("gpu busy, RCS change rejected\n");
6177                 return false; /* still busy with another command */
6178         }
6179
6180         rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
6181                 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
6182         I915_WRITE16(MEMSWCTL, rgvswctl);
6183         POSTING_READ16(MEMSWCTL);
6184
6185         rgvswctl |= MEMCTL_CMD_STS;
6186         I915_WRITE16(MEMSWCTL, rgvswctl);
6187
6188         return true;
6189 }
6190
6191 static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
6192 {
6193         u32 rgvmodectl;
6194         u8 fmax, fmin, fstart, vstart;
6195
6196         spin_lock_irq(&mchdev_lock);
6197
6198         rgvmodectl = I915_READ(MEMMODECTL);
6199
6200         /* Enable temp reporting */
6201         I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
6202         I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
6203
6204         /* 100ms RC evaluation intervals */
6205         I915_WRITE(RCUPEI, 100000);
6206         I915_WRITE(RCDNEI, 100000);
6207
6208         /* Set max/min thresholds to 90ms and 80ms respectively */
6209         I915_WRITE(RCBMAXAVG, 90000);
6210         I915_WRITE(RCBMINAVG, 80000);
6211
6212         I915_WRITE(MEMIHYST, 1);
6213
6214         /* Set up min, max, and cur for interrupt handling */
6215         fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
6216         fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
6217         fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
6218                 MEMMODE_FSTART_SHIFT;
6219
6220         vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
6221                 PXVFREQ_PX_SHIFT;
6222
6223         dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
6224         dev_priv->ips.fstart = fstart;
6225
6226         dev_priv->ips.max_delay = fstart;
6227         dev_priv->ips.min_delay = fmin;
6228         dev_priv->ips.cur_delay = fstart;
6229
6230         DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
6231                          fmax, fmin, fstart);
6232
6233         I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
6234
6235         /*
6236          * Interrupts will be enabled in ironlake_irq_postinstall
6237          */
6238
6239         I915_WRITE(VIDSTART, vstart);
6240         POSTING_READ(VIDSTART);
6241
6242         rgvmodectl |= MEMMODE_SWMODE_EN;
6243         I915_WRITE(MEMMODECTL, rgvmodectl);
6244
6245         if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
6246                 DRM_ERROR("stuck trying to change perf mode\n");
6247         mdelay(1);
6248
6249         ironlake_set_drps(dev_priv, fstart);
6250
6251         dev_priv->ips.last_count1 = I915_READ(DMIEC) +
6252                 I915_READ(DDREC) + I915_READ(CSIEC);
6253         dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
6254         dev_priv->ips.last_count2 = I915_READ(GFXEC);
6255         dev_priv->ips.last_time2 = ktime_get_raw_ns();
6256
6257         spin_unlock_irq(&mchdev_lock);
6258 }
6259
6260 static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
6261 {
6262         u16 rgvswctl;
6263
6264         spin_lock_irq(&mchdev_lock);
6265
6266         rgvswctl = I915_READ16(MEMSWCTL);
6267
6268         /* Ack interrupts, disable EFC interrupt */
6269         I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
6270         I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
6271         I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
6272         I915_WRITE(DEIIR, DE_PCU_EVENT);
6273         I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
6274
6275         /* Go back to the starting frequency */
6276         ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
6277         mdelay(1);
6278         rgvswctl |= MEMCTL_CMD_STS;
6279         I915_WRITE(MEMSWCTL, rgvswctl);
6280         mdelay(1);
6281
6282         spin_unlock_irq(&mchdev_lock);
6283 }
6284
6285 /* There's a funny hw issue where the hw returns all 0 when reading from
6286  * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
6287  * ourselves, instead of doing a rmw cycle (which might result in us clearing
6288  * all limits and the gpu stuck at whatever frequency it is at atm).
6289  */
6290 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
6291 {
6292         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6293         u32 limits;
6294
6295         /* Only set the down limit when we've reached the lowest level to avoid
6296          * getting more interrupts, otherwise leave this clear. This prevents a
6297          * race in the hw when coming out of rc6: There's a tiny window where
6298          * the hw runs at the minimal clock before selecting the desired
6299          * frequency, if the down threshold expires in that window we will not
6300          * receive a down interrupt. */
6301         if (INTEL_GEN(dev_priv) >= 9) {
6302                 limits = (rps->max_freq_softlimit) << 23;
6303                 if (val <= rps->min_freq_softlimit)
6304                         limits |= (rps->min_freq_softlimit) << 14;
6305         } else {
6306                 limits = rps->max_freq_softlimit << 24;
6307                 if (val <= rps->min_freq_softlimit)
6308                         limits |= rps->min_freq_softlimit << 16;
6309         }
6310
6311         return limits;
6312 }
6313
6314 static void rps_set_power(struct drm_i915_private *dev_priv, int new_power)
6315 {
6316         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6317         u32 threshold_up = 0, threshold_down = 0; /* in % */
6318         u32 ei_up = 0, ei_down = 0;
6319
6320         lockdep_assert_held(&rps->power.mutex);
6321
6322         if (new_power == rps->power.mode)
6323                 return;
6324
6325         /* Note the units here are not exactly 1us, but 1280ns. */
6326         switch (new_power) {
6327         case LOW_POWER:
6328                 /* Upclock if more than 95% busy over 16ms */
6329                 ei_up = 16000;
6330                 threshold_up = 95;
6331
6332                 /* Downclock if less than 85% busy over 32ms */
6333                 ei_down = 32000;
6334                 threshold_down = 85;
6335                 break;
6336
6337         case BETWEEN:
6338                 /* Upclock if more than 90% busy over 13ms */
6339                 ei_up = 13000;
6340                 threshold_up = 90;
6341
6342                 /* Downclock if less than 75% busy over 32ms */
6343                 ei_down = 32000;
6344                 threshold_down = 75;
6345                 break;
6346
6347         case HIGH_POWER:
6348                 /* Upclock if more than 85% busy over 10ms */
6349                 ei_up = 10000;
6350                 threshold_up = 85;
6351
6352                 /* Downclock if less than 60% busy over 32ms */
6353                 ei_down = 32000;
6354                 threshold_down = 60;
6355                 break;
6356         }
6357
6358         /* When byt can survive without system hang with dynamic
6359          * sw freq adjustments, this restriction can be lifted.
6360          */
6361         if (IS_VALLEYVIEW(dev_priv))
6362                 goto skip_hw_write;
6363
6364         I915_WRITE(GEN6_RP_UP_EI,
6365                    GT_INTERVAL_FROM_US(dev_priv, ei_up));
6366         I915_WRITE(GEN6_RP_UP_THRESHOLD,
6367                    GT_INTERVAL_FROM_US(dev_priv,
6368                                        ei_up * threshold_up / 100));
6369
6370         I915_WRITE(GEN6_RP_DOWN_EI,
6371                    GT_INTERVAL_FROM_US(dev_priv, ei_down));
6372         I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
6373                    GT_INTERVAL_FROM_US(dev_priv,
6374                                        ei_down * threshold_down / 100));
6375
6376         I915_WRITE(GEN6_RP_CONTROL,
6377                    GEN6_RP_MEDIA_TURBO |
6378                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
6379                    GEN6_RP_MEDIA_IS_GFX |
6380                    GEN6_RP_ENABLE |
6381                    GEN6_RP_UP_BUSY_AVG |
6382                    GEN6_RP_DOWN_IDLE_AVG);
6383
6384 skip_hw_write:
6385         rps->power.mode = new_power;
6386         rps->power.up_threshold = threshold_up;
6387         rps->power.down_threshold = threshold_down;
6388 }
6389
6390 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
6391 {
6392         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6393         int new_power;
6394
6395         new_power = rps->power.mode;
6396         switch (rps->power.mode) {
6397         case LOW_POWER:
6398                 if (val > rps->efficient_freq + 1 &&
6399                     val > rps->cur_freq)
6400                         new_power = BETWEEN;
6401                 break;
6402
6403         case BETWEEN:
6404                 if (val <= rps->efficient_freq &&
6405                     val < rps->cur_freq)
6406                         new_power = LOW_POWER;
6407                 else if (val >= rps->rp0_freq &&
6408                          val > rps->cur_freq)
6409                         new_power = HIGH_POWER;
6410                 break;
6411
6412         case HIGH_POWER:
6413                 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
6414                     val < rps->cur_freq)
6415                         new_power = BETWEEN;
6416                 break;
6417         }
6418         /* Max/min bins are special */
6419         if (val <= rps->min_freq_softlimit)
6420                 new_power = LOW_POWER;
6421         if (val >= rps->max_freq_softlimit)
6422                 new_power = HIGH_POWER;
6423
6424         mutex_lock(&rps->power.mutex);
6425         if (rps->power.interactive)
6426                 new_power = HIGH_POWER;
6427         rps_set_power(dev_priv, new_power);
6428         mutex_unlock(&rps->power.mutex);
6429         rps->last_adj = 0;
6430 }
6431
6432 void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive)
6433 {
6434         struct intel_rps *rps = &i915->gt_pm.rps;
6435
6436         if (INTEL_GEN(i915) < 6)
6437                 return;
6438
6439         mutex_lock(&rps->power.mutex);
6440         if (interactive) {
6441                 if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake))
6442                         rps_set_power(i915, HIGH_POWER);
6443         } else {
6444                 GEM_BUG_ON(!rps->power.interactive);
6445                 rps->power.interactive--;
6446         }
6447         mutex_unlock(&rps->power.mutex);
6448 }
6449
6450 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
6451 {
6452         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6453         u32 mask = 0;
6454
6455         /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
6456         if (val > rps->min_freq_softlimit)
6457                 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
6458         if (val < rps->max_freq_softlimit)
6459                 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
6460
6461         mask &= dev_priv->pm_rps_events;
6462
6463         return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
6464 }
6465
6466 /* gen6_set_rps is called to update the frequency request, but should also be
6467  * called when the range (min_delay and max_delay) is modified so that we can
6468  * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
6469 static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
6470 {
6471         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6472
6473         /* min/max delay may still have been modified so be sure to
6474          * write the limits value.
6475          */
6476         if (val != rps->cur_freq) {
6477                 gen6_set_rps_thresholds(dev_priv, val);
6478
6479                 if (INTEL_GEN(dev_priv) >= 9)
6480                         I915_WRITE(GEN6_RPNSWREQ,
6481                                    GEN9_FREQUENCY(val));
6482                 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6483                         I915_WRITE(GEN6_RPNSWREQ,
6484                                    HSW_FREQUENCY(val));
6485                 else
6486                         I915_WRITE(GEN6_RPNSWREQ,
6487                                    GEN6_FREQUENCY(val) |
6488                                    GEN6_OFFSET(0) |
6489                                    GEN6_AGGRESSIVE_TURBO);
6490         }
6491
6492         /* Make sure we continue to get interrupts
6493          * until we hit the minimum or maximum frequencies.
6494          */
6495         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
6496         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6497
6498         rps->cur_freq = val;
6499         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6500
6501         return 0;
6502 }
6503
6504 static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
6505 {
6506         int err;
6507
6508         if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
6509                       "Odd GPU freq value\n"))
6510                 val &= ~1;
6511
6512         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6513
6514         if (val != dev_priv->gt_pm.rps.cur_freq) {
6515                 err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
6516                 if (err)
6517                         return err;
6518
6519                 gen6_set_rps_thresholds(dev_priv, val);
6520         }
6521
6522         dev_priv->gt_pm.rps.cur_freq = val;
6523         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6524
6525         return 0;
6526 }
6527
6528 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
6529  *
6530  * * If Gfx is Idle, then
6531  * 1. Forcewake Media well.
6532  * 2. Request idle freq.
6533  * 3. Release Forcewake of Media well.
6534 */
6535 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
6536 {
6537         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6538         u32 val = rps->idle_freq;
6539         int err;
6540
6541         if (rps->cur_freq <= val)
6542                 return;
6543
6544         /* The punit delays the write of the frequency and voltage until it
6545          * determines the GPU is awake. During normal usage we don't want to
6546          * waste power changing the frequency if the GPU is sleeping (rc6).
6547          * However, the GPU and driver is now idle and we do not want to delay
6548          * switching to minimum voltage (reducing power whilst idle) as we do
6549          * not expect to be woken in the near future and so must flush the
6550          * change by waking the device.
6551          *
6552          * We choose to take the media powerwell (either would do to trick the
6553          * punit into committing the voltage change) as that takes a lot less
6554          * power than the render powerwell.
6555          */
6556         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
6557         err = valleyview_set_rps(dev_priv, val);
6558         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
6559
6560         if (err)
6561                 DRM_ERROR("Failed to set RPS for idle\n");
6562 }
6563
6564 void gen6_rps_busy(struct drm_i915_private *dev_priv)
6565 {
6566         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6567
6568         mutex_lock(&dev_priv->pcu_lock);
6569         if (rps->enabled) {
6570                 u8 freq;
6571
6572                 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
6573                         gen6_rps_reset_ei(dev_priv);
6574                 I915_WRITE(GEN6_PMINTRMSK,
6575                            gen6_rps_pm_mask(dev_priv, rps->cur_freq));
6576
6577                 gen6_enable_rps_interrupts(dev_priv);
6578
6579                 /* Use the user's desired frequency as a guide, but for better
6580                  * performance, jump directly to RPe as our starting frequency.
6581                  */
6582                 freq = max(rps->cur_freq,
6583                            rps->efficient_freq);
6584
6585                 if (intel_set_rps(dev_priv,
6586                                   clamp(freq,
6587                                         rps->min_freq_softlimit,
6588                                         rps->max_freq_softlimit)))
6589                         DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
6590         }
6591         mutex_unlock(&dev_priv->pcu_lock);
6592 }
6593
6594 void gen6_rps_idle(struct drm_i915_private *dev_priv)
6595 {
6596         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6597
6598         /* Flush our bottom-half so that it does not race with us
6599          * setting the idle frequency and so that it is bounded by
6600          * our rpm wakeref. And then disable the interrupts to stop any
6601          * futher RPS reclocking whilst we are asleep.
6602          */
6603         gen6_disable_rps_interrupts(dev_priv);
6604
6605         mutex_lock(&dev_priv->pcu_lock);
6606         if (rps->enabled) {
6607                 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6608                         vlv_set_rps_idle(dev_priv);
6609                 else
6610                         gen6_set_rps(dev_priv, rps->idle_freq);
6611                 rps->last_adj = 0;
6612                 I915_WRITE(GEN6_PMINTRMSK,
6613                            gen6_sanitize_rps_pm_mask(dev_priv, ~0));
6614         }
6615         mutex_unlock(&dev_priv->pcu_lock);
6616 }
6617
6618 void gen6_rps_boost(struct i915_request *rq,
6619                     struct intel_rps_client *rps_client)
6620 {
6621         struct intel_rps *rps = &rq->i915->gt_pm.rps;
6622         unsigned long flags;
6623         bool boost;
6624
6625         /* This is intentionally racy! We peek at the state here, then
6626          * validate inside the RPS worker.
6627          */
6628         if (!rps->enabled)
6629                 return;
6630
6631         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
6632                 return;
6633
6634         /* Serializes with i915_request_retire() */
6635         boost = false;
6636         spin_lock_irqsave(&rq->lock, flags);
6637         if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
6638                 boost = !atomic_fetch_inc(&rps->num_waiters);
6639                 rq->waitboost = true;
6640         }
6641         spin_unlock_irqrestore(&rq->lock, flags);
6642         if (!boost)
6643                 return;
6644
6645         if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
6646                 schedule_work(&rps->work);
6647
6648         atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts);
6649 }
6650
6651 int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
6652 {
6653         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6654         int err;
6655
6656         lockdep_assert_held(&dev_priv->pcu_lock);
6657         GEM_BUG_ON(val > rps->max_freq);
6658         GEM_BUG_ON(val < rps->min_freq);
6659
6660         if (!rps->enabled) {
6661                 rps->cur_freq = val;
6662                 return 0;
6663         }
6664
6665         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6666                 err = valleyview_set_rps(dev_priv, val);
6667         else
6668                 err = gen6_set_rps(dev_priv, val);
6669
6670         return err;
6671 }
6672
6673 static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
6674 {
6675         I915_WRITE(GEN6_RC_CONTROL, 0);
6676         I915_WRITE(GEN9_PG_ENABLE, 0);
6677 }
6678
6679 static void gen9_disable_rps(struct drm_i915_private *dev_priv)
6680 {
6681         I915_WRITE(GEN6_RP_CONTROL, 0);
6682 }
6683
6684 static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
6685 {
6686         I915_WRITE(GEN6_RC_CONTROL, 0);
6687 }
6688
6689 static void gen6_disable_rps(struct drm_i915_private *dev_priv)
6690 {
6691         I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
6692         I915_WRITE(GEN6_RP_CONTROL, 0);
6693 }
6694
6695 static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
6696 {
6697         I915_WRITE(GEN6_RC_CONTROL, 0);
6698 }
6699
6700 static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
6701 {
6702         I915_WRITE(GEN6_RP_CONTROL, 0);
6703 }
6704
6705 static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
6706 {
6707         /* We're doing forcewake before Disabling RC6,
6708          * This what the BIOS expects when going into suspend */
6709         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6710
6711         I915_WRITE(GEN6_RC_CONTROL, 0);
6712
6713         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6714 }
6715
6716 static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
6717 {
6718         I915_WRITE(GEN6_RP_CONTROL, 0);
6719 }
6720
6721 static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
6722 {
6723         bool enable_rc6 = true;
6724         unsigned long rc6_ctx_base;
6725         u32 rc_ctl;
6726         int rc_sw_target;
6727
6728         rc_ctl = I915_READ(GEN6_RC_CONTROL);
6729         rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
6730                        RC_SW_TARGET_STATE_SHIFT;
6731         DRM_DEBUG_DRIVER("BIOS enabled RC states: "
6732                          "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
6733                          onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
6734                          onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
6735                          rc_sw_target);
6736
6737         if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
6738                 DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
6739                 enable_rc6 = false;
6740         }
6741
6742         /*
6743          * The exact context size is not known for BXT, so assume a page size
6744          * for this check.
6745          */
6746         rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
6747         if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) &&
6748               (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) {
6749                 DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
6750                 enable_rc6 = false;
6751         }
6752
6753         if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
6754               ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
6755               ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
6756               ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
6757                 DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
6758                 enable_rc6 = false;
6759         }
6760
6761         if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
6762             !I915_READ(GEN8_PUSHBUS_ENABLE) ||
6763             !I915_READ(GEN8_PUSHBUS_SHIFT)) {
6764                 DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
6765                 enable_rc6 = false;
6766         }
6767
6768         if (!I915_READ(GEN6_GFXPAUSE)) {
6769                 DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
6770                 enable_rc6 = false;
6771         }
6772
6773         if (!I915_READ(GEN8_MISC_CTRL0)) {
6774                 DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
6775                 enable_rc6 = false;
6776         }
6777
6778         return enable_rc6;
6779 }
6780
6781 static bool sanitize_rc6(struct drm_i915_private *i915)
6782 {
6783         struct intel_device_info *info = mkwrite_device_info(i915);
6784
6785         /* Powersaving is controlled by the host when inside a VM */
6786         if (intel_vgpu_active(i915))
6787                 info->has_rc6 = 0;
6788
6789         if (info->has_rc6 &&
6790             IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
6791                 DRM_INFO("RC6 disabled by BIOS\n");
6792                 info->has_rc6 = 0;
6793         }
6794
6795         /*
6796          * We assume that we do not have any deep rc6 levels if we don't have
6797          * have the previous rc6 level supported, i.e. we use HAS_RC6()
6798          * as the initial coarse check for rc6 in general, moving on to
6799          * progressively finer/deeper levels.
6800          */
6801         if (!info->has_rc6 && info->has_rc6p)
6802                 info->has_rc6p = 0;
6803
6804         return info->has_rc6;
6805 }
6806
6807 static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
6808 {
6809         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6810
6811         /* All of these values are in units of 50MHz */
6812
6813         /* static values from HW: RP0 > RP1 > RPn (min_freq) */
6814         if (IS_GEN9_LP(dev_priv)) {
6815                 u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
6816                 rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
6817                 rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
6818                 rps->min_freq = (rp_state_cap >>  0) & 0xff;
6819         } else {
6820                 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
6821                 rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
6822                 rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
6823                 rps->min_freq = (rp_state_cap >> 16) & 0xff;
6824         }
6825         /* hw_max = RP0 until we check for overclocking */
6826         rps->max_freq = rps->rp0_freq;
6827
6828         rps->efficient_freq = rps->rp1_freq;
6829         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
6830             IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
6831                 u32 ddcc_status = 0;
6832
6833                 if (sandybridge_pcode_read(dev_priv,
6834                                            HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
6835                                            &ddcc_status) == 0)
6836                         rps->efficient_freq =
6837                                 clamp_t(u8,
6838                                         ((ddcc_status >> 8) & 0xff),
6839                                         rps->min_freq,
6840                                         rps->max_freq);
6841         }
6842
6843         if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
6844                 /* Store the frequency values in 16.66 MHZ units, which is
6845                  * the natural hardware unit for SKL
6846                  */
6847                 rps->rp0_freq *= GEN9_FREQ_SCALER;
6848                 rps->rp1_freq *= GEN9_FREQ_SCALER;
6849                 rps->min_freq *= GEN9_FREQ_SCALER;
6850                 rps->max_freq *= GEN9_FREQ_SCALER;
6851                 rps->efficient_freq *= GEN9_FREQ_SCALER;
6852         }
6853 }
6854
6855 static void reset_rps(struct drm_i915_private *dev_priv,
6856                       int (*set)(struct drm_i915_private *, u8))
6857 {
6858         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6859         u8 freq = rps->cur_freq;
6860
6861         /* force a reset */
6862         rps->power.mode = -1;
6863         rps->cur_freq = -1;
6864
6865         if (set(dev_priv, freq))
6866                 DRM_ERROR("Failed to reset RPS to initial values\n");
6867 }
6868
6869 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
6870 static void gen9_enable_rps(struct drm_i915_private *dev_priv)
6871 {
6872         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6873
6874         /* Program defaults and thresholds for RPS */
6875         if (IS_GEN9(dev_priv))
6876                 I915_WRITE(GEN6_RC_VIDEO_FREQ,
6877                         GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
6878
6879         /* 1 second timeout*/
6880         I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
6881                 GT_INTERVAL_FROM_US(dev_priv, 1000000));
6882
6883         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
6884
6885         /* Leaning on the below call to gen6_set_rps to program/setup the
6886          * Up/Down EI & threshold registers, as well as the RP_CONTROL,
6887          * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
6888         reset_rps(dev_priv, gen6_set_rps);
6889
6890         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6891 }
6892
6893 static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
6894 {
6895         struct intel_engine_cs *engine;
6896         enum intel_engine_id id;
6897         u32 rc6_mode;
6898
6899         /* 1a: Software RC state - RC0 */
6900         I915_WRITE(GEN6_RC_STATE, 0);
6901
6902         /* 1b: Get forcewake during program sequence. Although the driver
6903          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
6904         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6905
6906         /* 2a: Disable RC states. */
6907         I915_WRITE(GEN6_RC_CONTROL, 0);
6908
6909         /* 2b: Program RC6 thresholds.*/
6910         if (INTEL_GEN(dev_priv) >= 10) {
6911                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
6912                 I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
6913         } else if (IS_SKYLAKE(dev_priv)) {
6914                 /*
6915                  * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
6916                  * when CPG is enabled
6917                  */
6918                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
6919         } else {
6920                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
6921         }
6922
6923         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
6924         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
6925         for_each_engine(engine, dev_priv, id)
6926                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
6927
6928         if (HAS_GUC(dev_priv))
6929                 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
6930
6931         I915_WRITE(GEN6_RC_SLEEP, 0);
6932
6933         /*
6934          * 2c: Program Coarse Power Gating Policies.
6935          *
6936          * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
6937          * use instead is a more conservative estimate for the maximum time
6938          * it takes us to service a CS interrupt and submit a new ELSP - that
6939          * is the time which the GPU is idle waiting for the CPU to select the
6940          * next request to execute. If the idle hysteresis is less than that
6941          * interrupt service latency, the hardware will automatically gate
6942          * the power well and we will then incur the wake up cost on top of
6943          * the service latency. A similar guide from intel_pstate is that we
6944          * do not want the enable hysteresis to less than the wakeup latency.
6945          *
6946          * igt/gem_exec_nop/sequential provides a rough estimate for the
6947          * service latency, and puts it around 10us for Broadwell (and other
6948          * big core) and around 40us for Broxton (and other low power cores).
6949          * [Note that for legacy ringbuffer submission, this is less than 1us!]
6950          * However, the wakeup latency on Broxton is closer to 100us. To be
6951          * conservative, we have to factor in a context switch on top (due
6952          * to ksoftirqd).
6953          */
6954         I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
6955         I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
6956
6957         /* 3a: Enable RC6 */
6958         I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
6959
6960         /* WaRsUseTimeoutMode:cnl (pre-prod) */
6961         if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
6962                 rc6_mode = GEN7_RC_CTL_TO_MODE;
6963         else
6964                 rc6_mode = GEN6_RC_CTL_EI_MODE(1);
6965
6966         I915_WRITE(GEN6_RC_CONTROL,
6967                    GEN6_RC_CTL_HW_ENABLE |
6968                    GEN6_RC_CTL_RC6_ENABLE |
6969                    rc6_mode);
6970
6971         /*
6972          * 3b: Enable Coarse Power Gating only when RC6 is enabled.
6973          * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
6974          */
6975         if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
6976                 I915_WRITE(GEN9_PG_ENABLE, 0);
6977         else
6978                 I915_WRITE(GEN9_PG_ENABLE,
6979                            GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
6980
6981         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6982 }
6983
6984 static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
6985 {
6986         struct intel_engine_cs *engine;
6987         enum intel_engine_id id;
6988
6989         /* 1a: Software RC state - RC0 */
6990         I915_WRITE(GEN6_RC_STATE, 0);
6991
6992         /* 1b: Get forcewake during program sequence. Although the driver
6993          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
6994         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6995
6996         /* 2a: Disable RC states. */
6997         I915_WRITE(GEN6_RC_CONTROL, 0);
6998
6999         /* 2b: Program RC6 thresholds.*/
7000         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7001         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7002         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7003         for_each_engine(engine, dev_priv, id)
7004                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7005         I915_WRITE(GEN6_RC_SLEEP, 0);
7006         I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
7007
7008         /* 3: Enable RC6 */
7009
7010         I915_WRITE(GEN6_RC_CONTROL,
7011                    GEN6_RC_CTL_HW_ENABLE |
7012                    GEN7_RC_CTL_TO_MODE |
7013                    GEN6_RC_CTL_RC6_ENABLE);
7014
7015         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7016 }
7017
7018 static void gen8_enable_rps(struct drm_i915_private *dev_priv)
7019 {
7020         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7021
7022         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7023
7024         /* 1 Program defaults and thresholds for RPS*/
7025         I915_WRITE(GEN6_RPNSWREQ,
7026                    HSW_FREQUENCY(rps->rp1_freq));
7027         I915_WRITE(GEN6_RC_VIDEO_FREQ,
7028                    HSW_FREQUENCY(rps->rp1_freq));
7029         /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
7030         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
7031
7032         /* Docs recommend 900MHz, and 300 MHz respectively */
7033         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
7034                    rps->max_freq_softlimit << 24 |
7035                    rps->min_freq_softlimit << 16);
7036
7037         I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
7038         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
7039         I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
7040         I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
7041
7042         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7043
7044         /* 2: Enable RPS */
7045         I915_WRITE(GEN6_RP_CONTROL,
7046                    GEN6_RP_MEDIA_TURBO |
7047                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
7048                    GEN6_RP_MEDIA_IS_GFX |
7049                    GEN6_RP_ENABLE |
7050                    GEN6_RP_UP_BUSY_AVG |
7051                    GEN6_RP_DOWN_IDLE_AVG);
7052
7053         reset_rps(dev_priv, gen6_set_rps);
7054
7055         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7056 }
7057
7058 static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
7059 {
7060         struct intel_engine_cs *engine;
7061         enum intel_engine_id id;
7062         u32 rc6vids, rc6_mask;
7063         u32 gtfifodbg;
7064         int ret;
7065
7066         I915_WRITE(GEN6_RC_STATE, 0);
7067
7068         /* Clear the DBG now so we don't confuse earlier errors */
7069         gtfifodbg = I915_READ(GTFIFODBG);
7070         if (gtfifodbg) {
7071                 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
7072                 I915_WRITE(GTFIFODBG, gtfifodbg);
7073         }
7074
7075         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7076
7077         /* disable the counters and set deterministic thresholds */
7078         I915_WRITE(GEN6_RC_CONTROL, 0);
7079
7080         I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
7081         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
7082         I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
7083         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7084         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7085
7086         for_each_engine(engine, dev_priv, id)
7087                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7088
7089         I915_WRITE(GEN6_RC_SLEEP, 0);
7090         I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
7091         if (IS_IVYBRIDGE(dev_priv))
7092                 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
7093         else
7094                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
7095         I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
7096         I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
7097
7098         /* We don't use those on Haswell */
7099         rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
7100         if (HAS_RC6p(dev_priv))
7101                 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
7102         if (HAS_RC6pp(dev_priv))
7103                 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
7104         I915_WRITE(GEN6_RC_CONTROL,
7105                    rc6_mask |
7106                    GEN6_RC_CTL_EI_MODE(1) |
7107                    GEN6_RC_CTL_HW_ENABLE);
7108
7109         rc6vids = 0;
7110         ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
7111         if (IS_GEN6(dev_priv) && ret) {
7112                 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
7113         } else if (IS_GEN6(dev_priv) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
7114                 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
7115                           GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
7116                 rc6vids &= 0xffff00;
7117                 rc6vids |= GEN6_ENCODE_RC6_VID(450);
7118                 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
7119                 if (ret)
7120                         DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
7121         }
7122
7123         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7124 }
7125
7126 static void gen6_enable_rps(struct drm_i915_private *dev_priv)
7127 {
7128         /* Here begins a magic sequence of register writes to enable
7129          * auto-downclocking.
7130          *
7131          * Perhaps there might be some value in exposing these to
7132          * userspace...
7133          */
7134         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7135
7136         /* Power down if completely idle for over 50ms */
7137         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
7138         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7139
7140         reset_rps(dev_priv, gen6_set_rps);
7141
7142         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7143 }
7144
7145 static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
7146 {
7147         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7148         const int min_freq = 15;
7149         const int scaling_factor = 180;
7150         unsigned int gpu_freq;
7151         unsigned int max_ia_freq, min_ring_freq;
7152         unsigned int max_gpu_freq, min_gpu_freq;
7153         struct cpufreq_policy *policy;
7154
7155         WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
7156
7157         if (rps->max_freq <= rps->min_freq)
7158                 return;
7159
7160         policy = cpufreq_cpu_get(0);
7161         if (policy) {
7162                 max_ia_freq = policy->cpuinfo.max_freq;
7163                 cpufreq_cpu_put(policy);
7164         } else {
7165                 /*
7166                  * Default to measured freq if none found, PCU will ensure we
7167                  * don't go over
7168                  */
7169                 max_ia_freq = tsc_khz;
7170         }
7171
7172         /* Convert from kHz to MHz */
7173         max_ia_freq /= 1000;
7174
7175         min_ring_freq = I915_READ(DCLK) & 0xf;
7176         /* convert DDR frequency from units of 266.6MHz to bandwidth */
7177         min_ring_freq = mult_frac(min_ring_freq, 8, 3);
7178
7179         min_gpu_freq = rps->min_freq;
7180         max_gpu_freq = rps->max_freq;
7181         if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7182                 /* Convert GT frequency to 50 HZ units */
7183                 min_gpu_freq /= GEN9_FREQ_SCALER;
7184                 max_gpu_freq /= GEN9_FREQ_SCALER;
7185         }
7186
7187         /*
7188          * For each potential GPU frequency, load a ring frequency we'd like
7189          * to use for memory access.  We do this by specifying the IA frequency
7190          * the PCU should use as a reference to determine the ring frequency.
7191          */
7192         for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
7193                 const int diff = max_gpu_freq - gpu_freq;
7194                 unsigned int ia_freq = 0, ring_freq = 0;
7195
7196                 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7197                         /*
7198                          * ring_freq = 2 * GT. ring_freq is in 100MHz units
7199                          * No floor required for ring frequency on SKL.
7200                          */
7201                         ring_freq = gpu_freq;
7202                 } else if (INTEL_GEN(dev_priv) >= 8) {
7203                         /* max(2 * GT, DDR). NB: GT is 50MHz units */
7204                         ring_freq = max(min_ring_freq, gpu_freq);
7205                 } else if (IS_HASWELL(dev_priv)) {
7206                         ring_freq = mult_frac(gpu_freq, 5, 4);
7207                         ring_freq = max(min_ring_freq, ring_freq);
7208                         /* leave ia_freq as the default, chosen by cpufreq */
7209                 } else {
7210                         /* On older processors, there is no separate ring
7211                          * clock domain, so in order to boost the bandwidth
7212                          * of the ring, we need to upclock the CPU (ia_freq).
7213                          *
7214                          * For GPU frequencies less than 750MHz,
7215                          * just use the lowest ring freq.
7216                          */
7217                         if (gpu_freq < min_freq)
7218                                 ia_freq = 800;
7219                         else
7220                                 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
7221                         ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
7222                 }
7223
7224                 sandybridge_pcode_write(dev_priv,
7225                                         GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
7226                                         ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
7227                                         ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
7228                                         gpu_freq);
7229         }
7230 }
7231
7232 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
7233 {
7234         u32 val, rp0;
7235
7236         val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7237
7238         switch (INTEL_INFO(dev_priv)->sseu.eu_total) {
7239         case 8:
7240                 /* (2 * 4) config */
7241                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
7242                 break;
7243         case 12:
7244                 /* (2 * 6) config */
7245                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
7246                 break;
7247         case 16:
7248                 /* (2 * 8) config */
7249         default:
7250                 /* Setting (2 * 8) Min RP0 for any other combination */
7251                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
7252                 break;
7253         }
7254
7255         rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
7256
7257         return rp0;
7258 }
7259
7260 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7261 {
7262         u32 val, rpe;
7263
7264         val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
7265         rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
7266
7267         return rpe;
7268 }
7269
7270 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
7271 {
7272         u32 val, rp1;
7273
7274         val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7275         rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
7276
7277         return rp1;
7278 }
7279
7280 static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
7281 {
7282         u32 val, rpn;
7283
7284         val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
7285         rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
7286                        FB_GFX_FREQ_FUSE_MASK);
7287
7288         return rpn;
7289 }
7290
7291 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
7292 {
7293         u32 val, rp1;
7294
7295         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7296
7297         rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
7298
7299         return rp1;
7300 }
7301
7302 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
7303 {
7304         u32 val, rp0;
7305
7306         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7307
7308         rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
7309         /* Clamp to max */
7310         rp0 = min_t(u32, rp0, 0xea);
7311
7312         return rp0;
7313 }
7314
7315 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7316 {
7317         u32 val, rpe;
7318
7319         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
7320         rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
7321         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
7322         rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
7323
7324         return rpe;
7325 }
7326
7327 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
7328 {
7329         u32 val;
7330
7331         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
7332         /*
7333          * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
7334          * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
7335          * a BYT-M B0 the above register contains 0xbf. Moreover when setting
7336          * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
7337          * to make sure it matches what Punit accepts.
7338          */
7339         return max_t(u32, val, 0xc0);
7340 }
7341
7342 /* Check that the pctx buffer wasn't move under us. */
7343 static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
7344 {
7345         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7346
7347         WARN_ON(pctx_addr != dev_priv->dsm.start +
7348                              dev_priv->vlv_pctx->stolen->start);
7349 }
7350
7351
7352 /* Check that the pcbr address is not empty. */
7353 static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
7354 {
7355         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7356
7357         WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
7358 }
7359
7360 static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
7361 {
7362         resource_size_t pctx_paddr, paddr;
7363         resource_size_t pctx_size = 32*1024;
7364         u32 pcbr;
7365
7366         pcbr = I915_READ(VLV_PCBR);
7367         if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
7368                 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7369                 paddr = dev_priv->dsm.end + 1 - pctx_size;
7370                 GEM_BUG_ON(paddr > U32_MAX);
7371
7372                 pctx_paddr = (paddr & (~4095));
7373                 I915_WRITE(VLV_PCBR, pctx_paddr);
7374         }
7375
7376         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7377 }
7378
7379 static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
7380 {
7381         struct drm_i915_gem_object *pctx;
7382         resource_size_t pctx_paddr;
7383         resource_size_t pctx_size = 24*1024;
7384         u32 pcbr;
7385
7386         pcbr = I915_READ(VLV_PCBR);
7387         if (pcbr) {
7388                 /* BIOS set it up already, grab the pre-alloc'd space */
7389                 resource_size_t pcbr_offset;
7390
7391                 pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start;
7392                 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
7393                                                                       pcbr_offset,
7394                                                                       I915_GTT_OFFSET_NONE,
7395                                                                       pctx_size);
7396                 goto out;
7397         }
7398
7399         DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7400
7401         /*
7402          * From the Gunit register HAS:
7403          * The Gfx driver is expected to program this register and ensure
7404          * proper allocation within Gfx stolen memory.  For example, this
7405          * register should be programmed such than the PCBR range does not
7406          * overlap with other ranges, such as the frame buffer, protected
7407          * memory, or any other relevant ranges.
7408          */
7409         pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
7410         if (!pctx) {
7411                 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
7412                 goto out;
7413         }
7414
7415         GEM_BUG_ON(range_overflows_t(u64,
7416                                      dev_priv->dsm.start,
7417                                      pctx->stolen->start,
7418                                      U32_MAX));
7419         pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
7420         I915_WRITE(VLV_PCBR, pctx_paddr);
7421
7422 out:
7423         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7424         dev_priv->vlv_pctx = pctx;
7425 }
7426
7427 static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
7428 {
7429         struct drm_i915_gem_object *pctx;
7430
7431         pctx = fetch_and_zero(&dev_priv->vlv_pctx);
7432         if (pctx)
7433                 i915_gem_object_put(pctx);
7434 }
7435
7436 static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
7437 {
7438         dev_priv->gt_pm.rps.gpll_ref_freq =
7439                 vlv_get_cck_clock(dev_priv, "GPLL ref",
7440                                   CCK_GPLL_CLOCK_CONTROL,
7441                                   dev_priv->czclk_freq);
7442
7443         DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
7444                          dev_priv->gt_pm.rps.gpll_ref_freq);
7445 }
7446
7447 static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
7448 {
7449         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7450         u32 val;
7451
7452         valleyview_setup_pctx(dev_priv);
7453
7454         vlv_init_gpll_ref_freq(dev_priv);
7455
7456         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7457         switch ((val >> 6) & 3) {
7458         case 0:
7459         case 1:
7460                 dev_priv->mem_freq = 800;
7461                 break;
7462         case 2:
7463                 dev_priv->mem_freq = 1066;
7464                 break;
7465         case 3:
7466                 dev_priv->mem_freq = 1333;
7467                 break;
7468         }
7469         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7470
7471         rps->max_freq = valleyview_rps_max_freq(dev_priv);
7472         rps->rp0_freq = rps->max_freq;
7473         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7474                          intel_gpu_freq(dev_priv, rps->max_freq),
7475                          rps->max_freq);
7476
7477         rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
7478         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7479                          intel_gpu_freq(dev_priv, rps->efficient_freq),
7480                          rps->efficient_freq);
7481
7482         rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
7483         DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
7484                          intel_gpu_freq(dev_priv, rps->rp1_freq),
7485                          rps->rp1_freq);
7486
7487         rps->min_freq = valleyview_rps_min_freq(dev_priv);
7488         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7489                          intel_gpu_freq(dev_priv, rps->min_freq),
7490                          rps->min_freq);
7491 }
7492
7493 static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
7494 {
7495         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7496         u32 val;
7497
7498         cherryview_setup_pctx(dev_priv);
7499
7500         vlv_init_gpll_ref_freq(dev_priv);
7501
7502         mutex_lock(&dev_priv->sb_lock);
7503         val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
7504         mutex_unlock(&dev_priv->sb_lock);
7505
7506         switch ((val >> 2) & 0x7) {
7507         case 3:
7508                 dev_priv->mem_freq = 2000;
7509                 break;
7510         default:
7511                 dev_priv->mem_freq = 1600;
7512                 break;
7513         }
7514         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7515
7516         rps->max_freq = cherryview_rps_max_freq(dev_priv);
7517         rps->rp0_freq = rps->max_freq;
7518         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7519                          intel_gpu_freq(dev_priv, rps->max_freq),
7520                          rps->max_freq);
7521
7522         rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
7523         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7524                          intel_gpu_freq(dev_priv, rps->efficient_freq),
7525                          rps->efficient_freq);
7526
7527         rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
7528         DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
7529                          intel_gpu_freq(dev_priv, rps->rp1_freq),
7530                          rps->rp1_freq);
7531
7532         rps->min_freq = cherryview_rps_min_freq(dev_priv);
7533         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7534                          intel_gpu_freq(dev_priv, rps->min_freq),
7535                          rps->min_freq);
7536
7537         WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
7538                    rps->min_freq) & 1,
7539                   "Odd GPU freq values\n");
7540 }
7541
7542 static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
7543 {
7544         valleyview_cleanup_pctx(dev_priv);
7545 }
7546
7547 static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
7548 {
7549         struct intel_engine_cs *engine;
7550         enum intel_engine_id id;
7551         u32 gtfifodbg, rc6_mode, pcbr;
7552
7553         gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
7554                                              GT_FIFO_FREE_ENTRIES_CHV);
7555         if (gtfifodbg) {
7556                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7557                                  gtfifodbg);
7558                 I915_WRITE(GTFIFODBG, gtfifodbg);
7559         }
7560
7561         cherryview_check_pctx(dev_priv);
7562
7563         /* 1a & 1b: Get forcewake during program sequence. Although the driver
7564          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7565         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7566
7567         /*  Disable RC states. */
7568         I915_WRITE(GEN6_RC_CONTROL, 0);
7569
7570         /* 2a: Program RC6 thresholds.*/
7571         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7572         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7573         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7574
7575         for_each_engine(engine, dev_priv, id)
7576                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7577         I915_WRITE(GEN6_RC_SLEEP, 0);
7578
7579         /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
7580         I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
7581
7582         /* Allows RC6 residency counter to work */
7583         I915_WRITE(VLV_COUNTER_CONTROL,
7584                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7585                                       VLV_MEDIA_RC6_COUNT_EN |
7586                                       VLV_RENDER_RC6_COUNT_EN));
7587
7588         /* For now we assume BIOS is allocating and populating the PCBR  */
7589         pcbr = I915_READ(VLV_PCBR);
7590
7591         /* 3: Enable RC6 */
7592         rc6_mode = 0;
7593         if (pcbr >> VLV_PCBR_ADDR_SHIFT)
7594                 rc6_mode = GEN7_RC_CTL_TO_MODE;
7595         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
7596
7597         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7598 }
7599
7600 static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
7601 {
7602         u32 val;
7603
7604         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7605
7606         /* 1: Program defaults and thresholds for RPS*/
7607         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7608         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7609         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7610         I915_WRITE(GEN6_RP_UP_EI, 66000);
7611         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7612
7613         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7614
7615         /* 2: Enable RPS */
7616         I915_WRITE(GEN6_RP_CONTROL,
7617                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
7618                    GEN6_RP_MEDIA_IS_GFX |
7619                    GEN6_RP_ENABLE |
7620                    GEN6_RP_UP_BUSY_AVG |
7621                    GEN6_RP_DOWN_IDLE_AVG);
7622
7623         /* Setting Fixed Bias */
7624         val = VLV_OVERRIDE_EN |
7625                   VLV_SOC_TDP_EN |
7626                   CHV_BIAS_CPU_50_SOC_50;
7627         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7628
7629         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7630
7631         /* RPS code assumes GPLL is used */
7632         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7633
7634         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
7635         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7636
7637         reset_rps(dev_priv, valleyview_set_rps);
7638
7639         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7640 }
7641
7642 static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
7643 {
7644         struct intel_engine_cs *engine;
7645         enum intel_engine_id id;
7646         u32 gtfifodbg;
7647
7648         valleyview_check_pctx(dev_priv);
7649
7650         gtfifodbg = I915_READ(GTFIFODBG);
7651         if (gtfifodbg) {
7652                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7653                                  gtfifodbg);
7654                 I915_WRITE(GTFIFODBG, gtfifodbg);
7655         }
7656
7657         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7658
7659         /*  Disable RC states. */
7660         I915_WRITE(GEN6_RC_CONTROL, 0);
7661
7662         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
7663         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7664         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7665
7666         for_each_engine(engine, dev_priv, id)
7667                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7668
7669         I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
7670
7671         /* Allows RC6 residency counter to work */
7672         I915_WRITE(VLV_COUNTER_CONTROL,
7673                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7674                                       VLV_MEDIA_RC0_COUNT_EN |
7675                                       VLV_RENDER_RC0_COUNT_EN |
7676                                       VLV_MEDIA_RC6_COUNT_EN |
7677                                       VLV_RENDER_RC6_COUNT_EN));
7678
7679         I915_WRITE(GEN6_RC_CONTROL,
7680                    GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
7681
7682         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7683 }
7684
7685 static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
7686 {
7687         u32 val;
7688
7689         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7690
7691         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7692         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7693         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7694         I915_WRITE(GEN6_RP_UP_EI, 66000);
7695         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7696
7697         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7698
7699         I915_WRITE(GEN6_RP_CONTROL,
7700                    GEN6_RP_MEDIA_TURBO |
7701                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
7702                    GEN6_RP_MEDIA_IS_GFX |
7703                    GEN6_RP_ENABLE |
7704                    GEN6_RP_UP_BUSY_AVG |
7705                    GEN6_RP_DOWN_IDLE_CONT);
7706
7707         /* Setting Fixed Bias */
7708         val = VLV_OVERRIDE_EN |
7709                   VLV_SOC_TDP_EN |
7710                   VLV_BIAS_CPU_125_SOC_875;
7711         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7712
7713         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7714
7715         /* RPS code assumes GPLL is used */
7716         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7717
7718         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
7719         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7720
7721         reset_rps(dev_priv, valleyview_set_rps);
7722
7723         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7724 }
7725
7726 static unsigned long intel_pxfreq(u32 vidfreq)
7727 {
7728         unsigned long freq;
7729         int div = (vidfreq & 0x3f0000) >> 16;
7730         int post = (vidfreq & 0x3000) >> 12;
7731         int pre = (vidfreq & 0x7);
7732
7733         if (!pre)
7734                 return 0;
7735
7736         freq = ((div * 133333) / ((1<<post) * pre));
7737
7738         return freq;
7739 }
7740
7741 static const struct cparams {
7742         u16 i;
7743         u16 t;
7744         u16 m;
7745         u16 c;
7746 } cparams[] = {
7747         { 1, 1333, 301, 28664 },
7748         { 1, 1066, 294, 24460 },
7749         { 1, 800, 294, 25192 },
7750         { 0, 1333, 276, 27605 },
7751         { 0, 1066, 276, 27605 },
7752         { 0, 800, 231, 23784 },
7753 };
7754
7755 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
7756 {
7757         u64 total_count, diff, ret;
7758         u32 count1, count2, count3, m = 0, c = 0;
7759         unsigned long now = jiffies_to_msecs(jiffies), diff1;
7760         int i;
7761
7762         lockdep_assert_held(&mchdev_lock);
7763
7764         diff1 = now - dev_priv->ips.last_time1;
7765
7766         /* Prevent division-by-zero if we are asking too fast.
7767          * Also, we don't get interesting results if we are polling
7768          * faster than once in 10ms, so just return the saved value
7769          * in such cases.
7770          */
7771         if (diff1 <= 10)
7772                 return dev_priv->ips.chipset_power;
7773
7774         count1 = I915_READ(DMIEC);
7775         count2 = I915_READ(DDREC);
7776         count3 = I915_READ(CSIEC);
7777
7778         total_count = count1 + count2 + count3;
7779
7780         /* FIXME: handle per-counter overflow */
7781         if (total_count < dev_priv->ips.last_count1) {
7782                 diff = ~0UL - dev_priv->ips.last_count1;
7783                 diff += total_count;
7784         } else {
7785                 diff = total_count - dev_priv->ips.last_count1;
7786         }
7787
7788         for (i = 0; i < ARRAY_SIZE(cparams); i++) {
7789                 if (cparams[i].i == dev_priv->ips.c_m &&
7790                     cparams[i].t == dev_priv->ips.r_t) {
7791                         m = cparams[i].m;
7792                         c = cparams[i].c;
7793                         break;
7794                 }
7795         }
7796
7797         diff = div_u64(diff, diff1);
7798         ret = ((m * diff) + c);
7799         ret = div_u64(ret, 10);
7800
7801         dev_priv->ips.last_count1 = total_count;
7802         dev_priv->ips.last_time1 = now;
7803
7804         dev_priv->ips.chipset_power = ret;
7805
7806         return ret;
7807 }
7808
7809 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
7810 {
7811         unsigned long val;
7812
7813         if (!IS_GEN5(dev_priv))
7814                 return 0;
7815
7816         spin_lock_irq(&mchdev_lock);
7817
7818         val = __i915_chipset_val(dev_priv);
7819
7820         spin_unlock_irq(&mchdev_lock);
7821
7822         return val;
7823 }
7824
7825 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
7826 {
7827         unsigned long m, x, b;
7828         u32 tsfs;
7829
7830         tsfs = I915_READ(TSFS);
7831
7832         m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
7833         x = I915_READ8(TR1);
7834
7835         b = tsfs & TSFS_INTR_MASK;
7836
7837         return ((m * x) / 127) - b;
7838 }
7839
7840 static int _pxvid_to_vd(u8 pxvid)
7841 {
7842         if (pxvid == 0)
7843                 return 0;
7844
7845         if (pxvid >= 8 && pxvid < 31)
7846                 pxvid = 31;
7847
7848         return (pxvid + 2) * 125;
7849 }
7850
7851 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
7852 {
7853         const int vd = _pxvid_to_vd(pxvid);
7854         const int vm = vd - 1125;
7855
7856         if (INTEL_INFO(dev_priv)->is_mobile)
7857                 return vm > 0 ? vm : 0;
7858
7859         return vd;
7860 }
7861
7862 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
7863 {
7864         u64 now, diff, diffms;
7865         u32 count;
7866
7867         lockdep_assert_held(&mchdev_lock);
7868
7869         now = ktime_get_raw_ns();
7870         diffms = now - dev_priv->ips.last_time2;
7871         do_div(diffms, NSEC_PER_MSEC);
7872
7873         /* Don't divide by 0 */
7874         if (!diffms)
7875                 return;
7876
7877         count = I915_READ(GFXEC);
7878
7879         if (count < dev_priv->ips.last_count2) {
7880                 diff = ~0UL - dev_priv->ips.last_count2;
7881                 diff += count;
7882         } else {
7883                 diff = count - dev_priv->ips.last_count2;
7884         }
7885
7886         dev_priv->ips.last_count2 = count;
7887         dev_priv->ips.last_time2 = now;
7888
7889         /* More magic constants... */
7890         diff = diff * 1181;
7891         diff = div_u64(diff, diffms * 10);
7892         dev_priv->ips.gfx_power = diff;
7893 }
7894
7895 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
7896 {
7897         if (!IS_GEN5(dev_priv))
7898                 return;
7899
7900         spin_lock_irq(&mchdev_lock);
7901
7902         __i915_update_gfx_val(dev_priv);
7903
7904         spin_unlock_irq(&mchdev_lock);
7905 }
7906
7907 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
7908 {
7909         unsigned long t, corr, state1, corr2, state2;
7910         u32 pxvid, ext_v;
7911
7912         lockdep_assert_held(&mchdev_lock);
7913
7914         pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
7915         pxvid = (pxvid >> 24) & 0x7f;
7916         ext_v = pvid_to_extvid(dev_priv, pxvid);
7917
7918         state1 = ext_v;
7919
7920         t = i915_mch_val(dev_priv);
7921
7922         /* Revel in the empirically derived constants */
7923
7924         /* Correction factor in 1/100000 units */
7925         if (t > 80)
7926                 corr = ((t * 2349) + 135940);
7927         else if (t >= 50)
7928                 corr = ((t * 964) + 29317);
7929         else /* < 50 */
7930                 corr = ((t * 301) + 1004);
7931
7932         corr = corr * ((150142 * state1) / 10000 - 78642);
7933         corr /= 100000;
7934         corr2 = (corr * dev_priv->ips.corr);
7935
7936         state2 = (corr2 * state1) / 10000;
7937         state2 /= 100; /* convert to mW */
7938
7939         __i915_update_gfx_val(dev_priv);
7940
7941         return dev_priv->ips.gfx_power + state2;
7942 }
7943
7944 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
7945 {
7946         unsigned long val;
7947
7948         if (!IS_GEN5(dev_priv))
7949                 return 0;
7950
7951         spin_lock_irq(&mchdev_lock);
7952
7953         val = __i915_gfx_val(dev_priv);
7954
7955         spin_unlock_irq(&mchdev_lock);
7956
7957         return val;
7958 }
7959
7960 /**
7961  * i915_read_mch_val - return value for IPS use
7962  *
7963  * Calculate and return a value for the IPS driver to use when deciding whether
7964  * we have thermal and power headroom to increase CPU or GPU power budget.
7965  */
7966 unsigned long i915_read_mch_val(void)
7967 {
7968         struct drm_i915_private *dev_priv;
7969         unsigned long chipset_val, graphics_val, ret = 0;
7970
7971         spin_lock_irq(&mchdev_lock);
7972         if (!i915_mch_dev)
7973                 goto out_unlock;
7974         dev_priv = i915_mch_dev;
7975
7976         chipset_val = __i915_chipset_val(dev_priv);
7977         graphics_val = __i915_gfx_val(dev_priv);
7978
7979         ret = chipset_val + graphics_val;
7980
7981 out_unlock:
7982         spin_unlock_irq(&mchdev_lock);
7983
7984         return ret;
7985 }
7986 EXPORT_SYMBOL_GPL(i915_read_mch_val);
7987
7988 /**
7989  * i915_gpu_raise - raise GPU frequency limit
7990  *
7991  * Raise the limit; IPS indicates we have thermal headroom.
7992  */
7993 bool i915_gpu_raise(void)
7994 {
7995         struct drm_i915_private *dev_priv;
7996         bool ret = true;
7997
7998         spin_lock_irq(&mchdev_lock);
7999         if (!i915_mch_dev) {
8000                 ret = false;
8001                 goto out_unlock;
8002         }
8003         dev_priv = i915_mch_dev;
8004
8005         if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
8006                 dev_priv->ips.max_delay--;
8007
8008 out_unlock:
8009         spin_unlock_irq(&mchdev_lock);
8010
8011         return ret;
8012 }
8013 EXPORT_SYMBOL_GPL(i915_gpu_raise);
8014
8015 /**
8016  * i915_gpu_lower - lower GPU frequency limit
8017  *
8018  * IPS indicates we're close to a thermal limit, so throttle back the GPU
8019  * frequency maximum.
8020  */
8021 bool i915_gpu_lower(void)
8022 {
8023         struct drm_i915_private *dev_priv;
8024         bool ret = true;
8025
8026         spin_lock_irq(&mchdev_lock);
8027         if (!i915_mch_dev) {
8028                 ret = false;
8029                 goto out_unlock;
8030         }
8031         dev_priv = i915_mch_dev;
8032
8033         if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
8034                 dev_priv->ips.max_delay++;
8035
8036 out_unlock:
8037         spin_unlock_irq(&mchdev_lock);
8038
8039         return ret;
8040 }
8041 EXPORT_SYMBOL_GPL(i915_gpu_lower);
8042
8043 /**
8044  * i915_gpu_busy - indicate GPU business to IPS
8045  *
8046  * Tell the IPS driver whether or not the GPU is busy.
8047  */
8048 bool i915_gpu_busy(void)
8049 {
8050         bool ret = false;
8051
8052         spin_lock_irq(&mchdev_lock);
8053         if (i915_mch_dev)
8054                 ret = i915_mch_dev->gt.awake;
8055         spin_unlock_irq(&mchdev_lock);
8056
8057         return ret;
8058 }
8059 EXPORT_SYMBOL_GPL(i915_gpu_busy);
8060
8061 /**
8062  * i915_gpu_turbo_disable - disable graphics turbo
8063  *
8064  * Disable graphics turbo by resetting the max frequency and setting the
8065  * current frequency to the default.
8066  */
8067 bool i915_gpu_turbo_disable(void)
8068 {
8069         struct drm_i915_private *dev_priv;
8070         bool ret = true;
8071
8072         spin_lock_irq(&mchdev_lock);
8073         if (!i915_mch_dev) {
8074                 ret = false;
8075                 goto out_unlock;
8076         }
8077         dev_priv = i915_mch_dev;
8078
8079         dev_priv->ips.max_delay = dev_priv->ips.fstart;
8080
8081         if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart))
8082                 ret = false;
8083
8084 out_unlock:
8085         spin_unlock_irq(&mchdev_lock);
8086
8087         return ret;
8088 }
8089 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
8090
8091 /**
8092  * Tells the intel_ips driver that the i915 driver is now loaded, if
8093  * IPS got loaded first.
8094  *
8095  * This awkward dance is so that neither module has to depend on the
8096  * other in order for IPS to do the appropriate communication of
8097  * GPU turbo limits to i915.
8098  */
8099 static void
8100 ips_ping_for_i915_load(void)
8101 {
8102         void (*link)(void);
8103
8104         link = symbol_get(ips_link_to_i915_driver);
8105         if (link) {
8106                 link();
8107                 symbol_put(ips_link_to_i915_driver);
8108         }
8109 }
8110
8111 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
8112 {
8113         /* We only register the i915 ips part with intel-ips once everything is
8114          * set up, to avoid intel-ips sneaking in and reading bogus values. */
8115         spin_lock_irq(&mchdev_lock);
8116         i915_mch_dev = dev_priv;
8117         spin_unlock_irq(&mchdev_lock);
8118
8119         ips_ping_for_i915_load();
8120 }
8121
8122 void intel_gpu_ips_teardown(void)
8123 {
8124         spin_lock_irq(&mchdev_lock);
8125         i915_mch_dev = NULL;
8126         spin_unlock_irq(&mchdev_lock);
8127 }
8128
8129 static void intel_init_emon(struct drm_i915_private *dev_priv)
8130 {
8131         u32 lcfuse;
8132         u8 pxw[16];
8133         int i;
8134
8135         /* Disable to program */
8136         I915_WRITE(ECR, 0);
8137         POSTING_READ(ECR);
8138
8139         /* Program energy weights for various events */
8140         I915_WRITE(SDEW, 0x15040d00);
8141         I915_WRITE(CSIEW0, 0x007f0000);
8142         I915_WRITE(CSIEW1, 0x1e220004);
8143         I915_WRITE(CSIEW2, 0x04000004);
8144
8145         for (i = 0; i < 5; i++)
8146                 I915_WRITE(PEW(i), 0);
8147         for (i = 0; i < 3; i++)
8148                 I915_WRITE(DEW(i), 0);
8149
8150         /* Program P-state weights to account for frequency power adjustment */
8151         for (i = 0; i < 16; i++) {
8152                 u32 pxvidfreq = I915_READ(PXVFREQ(i));
8153                 unsigned long freq = intel_pxfreq(pxvidfreq);
8154                 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
8155                         PXVFREQ_PX_SHIFT;
8156                 unsigned long val;
8157
8158                 val = vid * vid;
8159                 val *= (freq / 1000);
8160                 val *= 255;
8161                 val /= (127*127*900);
8162                 if (val > 0xff)
8163                         DRM_ERROR("bad pxval: %ld\n", val);
8164                 pxw[i] = val;
8165         }
8166         /* Render standby states get 0 weight */
8167         pxw[14] = 0;
8168         pxw[15] = 0;
8169
8170         for (i = 0; i < 4; i++) {
8171                 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
8172                         (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
8173                 I915_WRITE(PXW(i), val);
8174         }
8175
8176         /* Adjust magic regs to magic values (more experimental results) */
8177         I915_WRITE(OGW0, 0);
8178         I915_WRITE(OGW1, 0);
8179         I915_WRITE(EG0, 0x00007f00);
8180         I915_WRITE(EG1, 0x0000000e);
8181         I915_WRITE(EG2, 0x000e0000);
8182         I915_WRITE(EG3, 0x68000300);
8183         I915_WRITE(EG4, 0x42000000);
8184         I915_WRITE(EG5, 0x00140031);
8185         I915_WRITE(EG6, 0);
8186         I915_WRITE(EG7, 0);
8187
8188         for (i = 0; i < 8; i++)
8189                 I915_WRITE(PXWL(i), 0);
8190
8191         /* Enable PMON + select events */
8192         I915_WRITE(ECR, 0x80000019);
8193
8194         lcfuse = I915_READ(LCFUSE02);
8195
8196         dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
8197 }
8198
8199 static bool i915_rc6_ctx_corrupted(struct drm_i915_private *dev_priv)
8200 {
8201         return !I915_READ(GEN8_RC6_CTX_INFO);
8202 }
8203
8204 static void i915_rc6_ctx_wa_init(struct drm_i915_private *i915)
8205 {
8206         if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
8207                 return;
8208
8209         if (i915_rc6_ctx_corrupted(i915)) {
8210                 DRM_INFO("RC6 context corrupted, disabling runtime power management\n");
8211                 i915->gt_pm.rc6.ctx_corrupted = true;
8212                 intel_runtime_pm_get(i915);
8213         }
8214 }
8215
8216 static void i915_rc6_ctx_wa_cleanup(struct drm_i915_private *i915)
8217 {
8218         if (i915->gt_pm.rc6.ctx_corrupted) {
8219                 intel_runtime_pm_put(i915);
8220                 i915->gt_pm.rc6.ctx_corrupted = false;
8221         }
8222 }
8223
8224 /**
8225  * i915_rc6_ctx_wa_suspend - system suspend sequence for the RC6 CTX WA
8226  * @i915: i915 device
8227  *
8228  * Perform any steps needed to clean up the RC6 CTX WA before system suspend.
8229  */
8230 void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915)
8231 {
8232         if (i915->gt_pm.rc6.ctx_corrupted)
8233                 intel_runtime_pm_put(i915);
8234 }
8235
8236 /**
8237  * i915_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA
8238  * @i915: i915 device
8239  *
8240  * Perform any steps needed to re-init the RC6 CTX WA after system resume.
8241  */
8242 void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915)
8243 {
8244         if (!i915->gt_pm.rc6.ctx_corrupted)
8245                 return;
8246
8247         if (i915_rc6_ctx_corrupted(i915)) {
8248                 intel_runtime_pm_get(i915);
8249                 return;
8250         }
8251
8252         DRM_INFO("RC6 context restored, re-enabling runtime power management\n");
8253         i915->gt_pm.rc6.ctx_corrupted = false;
8254 }
8255
8256 static void intel_disable_rc6(struct drm_i915_private *dev_priv);
8257
8258 /**
8259  * i915_rc6_ctx_wa_check - check for a new RC6 CTX corruption
8260  * @i915: i915 device
8261  *
8262  * Check if an RC6 CTX corruption has happened since the last check and if so
8263  * disable RC6 and runtime power management.
8264  *
8265  * Return false if no context corruption has happened since the last call of
8266  * this function, true otherwise.
8267 */
8268 bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915)
8269 {
8270         if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
8271                 return false;
8272
8273         if (i915->gt_pm.rc6.ctx_corrupted)
8274                 return false;
8275
8276         if (!i915_rc6_ctx_corrupted(i915))
8277                 return false;
8278
8279         DRM_NOTE("RC6 context corruption, disabling runtime power management\n");
8280
8281         intel_disable_rc6(i915);
8282         i915->gt_pm.rc6.ctx_corrupted = true;
8283         intel_runtime_pm_get_noresume(i915);
8284
8285         return true;
8286 }
8287
8288 void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
8289 {
8290         struct intel_rps *rps = &dev_priv->gt_pm.rps;
8291
8292         /*
8293          * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
8294          * requirement.
8295          */
8296         if (!sanitize_rc6(dev_priv)) {
8297                 DRM_INFO("RC6 disabled, disabling runtime PM support\n");
8298                 intel_runtime_pm_get(dev_priv);
8299         }
8300
8301         mutex_lock(&dev_priv->pcu_lock);
8302
8303         i915_rc6_ctx_wa_init(dev_priv);
8304
8305         /* Initialize RPS limits (for userspace) */
8306         if (IS_CHERRYVIEW(dev_priv))
8307                 cherryview_init_gt_powersave(dev_priv);
8308         else if (IS_VALLEYVIEW(dev_priv))
8309                 valleyview_init_gt_powersave(dev_priv);
8310         else if (INTEL_GEN(dev_priv) >= 6)
8311                 gen6_init_rps_frequencies(dev_priv);
8312
8313         /* Derive initial user preferences/limits from the hardware limits */
8314         rps->idle_freq = rps->min_freq;
8315         rps->cur_freq = rps->idle_freq;
8316
8317         rps->max_freq_softlimit = rps->max_freq;
8318         rps->min_freq_softlimit = rps->min_freq;
8319
8320         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
8321                 rps->min_freq_softlimit =
8322                         max_t(int,
8323                               rps->efficient_freq,
8324                               intel_freq_opcode(dev_priv, 450));
8325
8326         /* After setting max-softlimit, find the overclock max freq */
8327         if (IS_GEN6(dev_priv) ||
8328             IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
8329                 u32 params = 0;
8330
8331                 sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
8332                 if (params & BIT(31)) { /* OC supported */
8333                         DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
8334                                          (rps->max_freq & 0xff) * 50,
8335                                          (params & 0xff) * 50);
8336                         rps->max_freq = params & 0xff;
8337                 }
8338         }
8339
8340         /* Finally allow us to boost to max by default */
8341         rps->boost_freq = rps->max_freq;
8342
8343         mutex_unlock(&dev_priv->pcu_lock);
8344 }
8345
8346 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
8347 {
8348         if (IS_VALLEYVIEW(dev_priv))
8349                 valleyview_cleanup_gt_powersave(dev_priv);
8350
8351         i915_rc6_ctx_wa_cleanup(dev_priv);
8352
8353         if (!HAS_RC6(dev_priv))
8354                 intel_runtime_pm_put(dev_priv);
8355 }
8356
8357 /**
8358  * intel_suspend_gt_powersave - suspend PM work and helper threads
8359  * @dev_priv: i915 device
8360  *
8361  * We don't want to disable RC6 or other features here, we just want
8362  * to make sure any work we've queued has finished and won't bother
8363  * us while we're suspended.
8364  */
8365 void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
8366 {
8367         if (INTEL_GEN(dev_priv) < 6)
8368                 return;
8369
8370         /* gen6_rps_idle() will be called later to disable interrupts */
8371 }
8372
8373 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
8374 {
8375         dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
8376         dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
8377         intel_disable_gt_powersave(dev_priv);
8378
8379         if (INTEL_GEN(dev_priv) >= 11)
8380                 gen11_reset_rps_interrupts(dev_priv);
8381         else
8382                 gen6_reset_rps_interrupts(dev_priv);
8383 }
8384
8385 static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
8386 {
8387         lockdep_assert_held(&i915->pcu_lock);
8388
8389         if (!i915->gt_pm.llc_pstate.enabled)
8390                 return;
8391
8392         /* Currently there is no HW configuration to be done to disable. */
8393
8394         i915->gt_pm.llc_pstate.enabled = false;
8395 }
8396
8397 static void __intel_disable_rc6(struct drm_i915_private *dev_priv)
8398 {
8399         lockdep_assert_held(&dev_priv->pcu_lock);
8400
8401         if (!dev_priv->gt_pm.rc6.enabled)
8402                 return;
8403
8404         if (INTEL_GEN(dev_priv) >= 9)
8405                 gen9_disable_rc6(dev_priv);
8406         else if (IS_CHERRYVIEW(dev_priv))
8407                 cherryview_disable_rc6(dev_priv);
8408         else if (IS_VALLEYVIEW(dev_priv))
8409                 valleyview_disable_rc6(dev_priv);
8410         else if (INTEL_GEN(dev_priv) >= 6)
8411                 gen6_disable_rc6(dev_priv);
8412
8413         dev_priv->gt_pm.rc6.enabled = false;
8414 }
8415
8416 static void intel_disable_rc6(struct drm_i915_private *dev_priv)
8417 {
8418         mutex_lock(&dev_priv->pcu_lock);
8419         __intel_disable_rc6(dev_priv);
8420         mutex_unlock(&dev_priv->pcu_lock);
8421 }
8422
8423 static void intel_disable_rps(struct drm_i915_private *dev_priv)
8424 {
8425         lockdep_assert_held(&dev_priv->pcu_lock);
8426
8427         if (!dev_priv->gt_pm.rps.enabled)
8428                 return;
8429
8430         if (INTEL_GEN(dev_priv) >= 9)
8431                 gen9_disable_rps(dev_priv);
8432         else if (IS_CHERRYVIEW(dev_priv))
8433                 cherryview_disable_rps(dev_priv);
8434         else if (IS_VALLEYVIEW(dev_priv))
8435                 valleyview_disable_rps(dev_priv);
8436         else if (INTEL_GEN(dev_priv) >= 6)
8437                 gen6_disable_rps(dev_priv);
8438         else if (IS_IRONLAKE_M(dev_priv))
8439                 ironlake_disable_drps(dev_priv);
8440
8441         dev_priv->gt_pm.rps.enabled = false;
8442 }
8443
8444 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
8445 {
8446         mutex_lock(&dev_priv->pcu_lock);
8447
8448         __intel_disable_rc6(dev_priv);
8449         intel_disable_rps(dev_priv);
8450         if (HAS_LLC(dev_priv))
8451                 intel_disable_llc_pstate(dev_priv);
8452
8453         mutex_unlock(&dev_priv->pcu_lock);
8454 }
8455
8456 static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
8457 {
8458         lockdep_assert_held(&i915->pcu_lock);
8459
8460         if (i915->gt_pm.llc_pstate.enabled)
8461                 return;
8462
8463         gen6_update_ring_freq(i915);
8464
8465         i915->gt_pm.llc_pstate.enabled = true;
8466 }
8467
8468 static void intel_enable_rc6(struct drm_i915_private *dev_priv)
8469 {
8470         lockdep_assert_held(&dev_priv->pcu_lock);
8471
8472         if (dev_priv->gt_pm.rc6.enabled)
8473                 return;
8474
8475         if (dev_priv->gt_pm.rc6.ctx_corrupted)
8476                 return;
8477
8478         if (IS_CHERRYVIEW(dev_priv))
8479                 cherryview_enable_rc6(dev_priv);
8480         else if (IS_VALLEYVIEW(dev_priv))
8481                 valleyview_enable_rc6(dev_priv);
8482         else if (INTEL_GEN(dev_priv) >= 9)
8483                 gen9_enable_rc6(dev_priv);
8484         else if (IS_BROADWELL(dev_priv))
8485                 gen8_enable_rc6(dev_priv);
8486         else if (INTEL_GEN(dev_priv) >= 6)
8487                 gen6_enable_rc6(dev_priv);
8488
8489         dev_priv->gt_pm.rc6.enabled = true;
8490 }
8491
8492 static void intel_enable_rps(struct drm_i915_private *dev_priv)
8493 {
8494         struct intel_rps *rps = &dev_priv->gt_pm.rps;
8495
8496         lockdep_assert_held(&dev_priv->pcu_lock);
8497
8498         if (rps->enabled)
8499                 return;
8500
8501         if (IS_CHERRYVIEW(dev_priv)) {
8502                 cherryview_enable_rps(dev_priv);
8503         } else if (IS_VALLEYVIEW(dev_priv)) {
8504                 valleyview_enable_rps(dev_priv);
8505         } else if (INTEL_GEN(dev_priv) >= 9) {
8506                 gen9_enable_rps(dev_priv);
8507         } else if (IS_BROADWELL(dev_priv)) {
8508                 gen8_enable_rps(dev_priv);
8509         } else if (INTEL_GEN(dev_priv) >= 6) {
8510                 gen6_enable_rps(dev_priv);
8511         } else if (IS_IRONLAKE_M(dev_priv)) {
8512                 ironlake_enable_drps(dev_priv);
8513                 intel_init_emon(dev_priv);
8514         }
8515
8516         WARN_ON(rps->max_freq < rps->min_freq);
8517         WARN_ON(rps->idle_freq > rps->max_freq);
8518
8519         WARN_ON(rps->efficient_freq < rps->min_freq);
8520         WARN_ON(rps->efficient_freq > rps->max_freq);
8521
8522         rps->enabled = true;
8523 }
8524
8525 void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
8526 {
8527         /* Powersaving is controlled by the host when inside a VM */
8528         if (intel_vgpu_active(dev_priv))
8529                 return;
8530
8531         mutex_lock(&dev_priv->pcu_lock);
8532
8533         if (HAS_RC6(dev_priv))
8534                 intel_enable_rc6(dev_priv);
8535         intel_enable_rps(dev_priv);
8536         if (HAS_LLC(dev_priv))
8537                 intel_enable_llc_pstate(dev_priv);
8538
8539         mutex_unlock(&dev_priv->pcu_lock);
8540 }
8541
8542 static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
8543 {
8544         /*
8545          * On Ibex Peak and Cougar Point, we need to disable clock
8546          * gating for the panel power sequencer or it will fail to
8547          * start up when no ports are active.
8548          */
8549         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
8550 }
8551
8552 static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
8553 {
8554         enum pipe pipe;
8555
8556         for_each_pipe(dev_priv, pipe) {
8557                 I915_WRITE(DSPCNTR(pipe),
8558                            I915_READ(DSPCNTR(pipe)) |
8559                            DISPPLANE_TRICKLE_FEED_DISABLE);
8560
8561                 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
8562                 POSTING_READ(DSPSURF(pipe));
8563         }
8564 }
8565
8566 static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
8567 {
8568         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8569
8570         /*
8571          * Required for FBC
8572          * WaFbcDisableDpfcClockGating:ilk
8573          */
8574         dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
8575                    ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
8576                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
8577
8578         I915_WRITE(PCH_3DCGDIS0,
8579                    MARIUNIT_CLOCK_GATE_DISABLE |
8580                    SVSMUNIT_CLOCK_GATE_DISABLE);
8581         I915_WRITE(PCH_3DCGDIS1,
8582                    VFMUNIT_CLOCK_GATE_DISABLE);
8583
8584         /*
8585          * According to the spec the following bits should be set in
8586          * order to enable memory self-refresh
8587          * The bit 22/21 of 0x42004
8588          * The bit 5 of 0x42020
8589          * The bit 15 of 0x45000
8590          */
8591         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8592                    (I915_READ(ILK_DISPLAY_CHICKEN2) |
8593                     ILK_DPARB_GATE | ILK_VSDPFD_FULL));
8594         dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
8595         I915_WRITE(DISP_ARB_CTL,
8596                    (I915_READ(DISP_ARB_CTL) |
8597                     DISP_FBC_WM_DIS));
8598
8599         /*
8600          * Based on the document from hardware guys the following bits
8601          * should be set unconditionally in order to enable FBC.
8602          * The bit 22 of 0x42000
8603          * The bit 22 of 0x42004
8604          * The bit 7,8,9 of 0x42020.
8605          */
8606         if (IS_IRONLAKE_M(dev_priv)) {
8607                 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
8608                 I915_WRITE(ILK_DISPLAY_CHICKEN1,
8609                            I915_READ(ILK_DISPLAY_CHICKEN1) |
8610                            ILK_FBCQ_DIS);
8611                 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8612                            I915_READ(ILK_DISPLAY_CHICKEN2) |
8613                            ILK_DPARB_GATE);
8614         }
8615
8616         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8617
8618         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8619                    I915_READ(ILK_DISPLAY_CHICKEN2) |
8620                    ILK_ELPIN_409_SELECT);
8621         I915_WRITE(_3D_CHICKEN2,
8622                    _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
8623                    _3D_CHICKEN2_WM_READ_PIPELINED);
8624
8625         /* WaDisableRenderCachePipelinedFlush:ilk */
8626         I915_WRITE(CACHE_MODE_0,
8627                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
8628
8629         /* WaDisable_RenderCache_OperationalFlush:ilk */
8630         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8631
8632         g4x_disable_trickle_feed(dev_priv);
8633
8634         ibx_init_clock_gating(dev_priv);
8635 }
8636
8637 static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
8638 {
8639         int pipe;
8640         uint32_t val;
8641
8642         /*
8643          * On Ibex Peak and Cougar Point, we need to disable clock
8644          * gating for the panel power sequencer or it will fail to
8645          * start up when no ports are active.
8646          */
8647         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
8648                    PCH_DPLUNIT_CLOCK_GATE_DISABLE |
8649                    PCH_CPUNIT_CLOCK_GATE_DISABLE);
8650         I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
8651                    DPLS_EDP_PPS_FIX_DIS);
8652         /* The below fixes the weird display corruption, a few pixels shifted
8653          * downward, on (only) LVDS of some HP laptops with IVY.
8654          */
8655         for_each_pipe(dev_priv, pipe) {
8656                 val = I915_READ(TRANS_CHICKEN2(pipe));
8657                 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
8658                 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8659                 if (dev_priv->vbt.fdi_rx_polarity_inverted)
8660                         val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8661                 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
8662                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
8663                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
8664                 I915_WRITE(TRANS_CHICKEN2(pipe), val);
8665         }
8666         /* WADP0ClockGatingDisable */
8667         for_each_pipe(dev_priv, pipe) {
8668                 I915_WRITE(TRANS_CHICKEN1(pipe),
8669                            TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8670         }
8671 }
8672
8673 static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
8674 {
8675         uint32_t tmp;
8676
8677         tmp = I915_READ(MCH_SSKPD);
8678         if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
8679                 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
8680                               tmp);
8681 }
8682
8683 static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
8684 {
8685         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8686
8687         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8688
8689         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8690                    I915_READ(ILK_DISPLAY_CHICKEN2) |
8691                    ILK_ELPIN_409_SELECT);
8692
8693         /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
8694         I915_WRITE(_3D_CHICKEN,
8695                    _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
8696
8697         /* WaDisable_RenderCache_OperationalFlush:snb */
8698         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8699
8700         /*
8701          * BSpec recoomends 8x4 when MSAA is used,
8702          * however in practice 16x4 seems fastest.
8703          *
8704          * Note that PS/WM thread counts depend on the WIZ hashing
8705          * disable bit, which we don't touch here, but it's good
8706          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8707          */
8708         I915_WRITE(GEN6_GT_MODE,
8709                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8710
8711         I915_WRITE(CACHE_MODE_0,
8712                    _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
8713
8714         I915_WRITE(GEN6_UCGCTL1,
8715                    I915_READ(GEN6_UCGCTL1) |
8716                    GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
8717                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
8718
8719         /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
8720          * gating disable must be set.  Failure to set it results in
8721          * flickering pixels due to Z write ordering failures after
8722          * some amount of runtime in the Mesa "fire" demo, and Unigine
8723          * Sanctuary and Tropics, and apparently anything else with
8724          * alpha test or pixel discard.
8725          *
8726          * According to the spec, bit 11 (RCCUNIT) must also be set,
8727          * but we didn't debug actual testcases to find it out.
8728          *
8729          * WaDisableRCCUnitClockGating:snb
8730          * WaDisableRCPBUnitClockGating:snb
8731          */
8732         I915_WRITE(GEN6_UCGCTL2,
8733                    GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
8734                    GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
8735
8736         /* WaStripsFansDisableFastClipPerformanceFix:snb */
8737         I915_WRITE(_3D_CHICKEN3,
8738                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
8739
8740         /*
8741          * Bspec says:
8742          * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
8743          * 3DSTATE_SF number of SF output attributes is more than 16."
8744          */
8745         I915_WRITE(_3D_CHICKEN3,
8746                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
8747
8748         /*
8749          * According to the spec the following bits should be
8750          * set in order to enable memory self-refresh and fbc:
8751          * The bit21 and bit22 of 0x42000
8752          * The bit21 and bit22 of 0x42004
8753          * The bit5 and bit7 of 0x42020
8754          * The bit14 of 0x70180
8755          * The bit14 of 0x71180
8756          *
8757          * WaFbcAsynchFlipDisableFbcQueue:snb
8758          */
8759         I915_WRITE(ILK_DISPLAY_CHICKEN1,
8760                    I915_READ(ILK_DISPLAY_CHICKEN1) |
8761                    ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
8762         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8763                    I915_READ(ILK_DISPLAY_CHICKEN2) |
8764                    ILK_DPARB_GATE | ILK_VSDPFD_FULL);
8765         I915_WRITE(ILK_DSPCLK_GATE_D,
8766                    I915_READ(ILK_DSPCLK_GATE_D) |
8767                    ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
8768                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
8769
8770         g4x_disable_trickle_feed(dev_priv);
8771
8772         cpt_init_clock_gating(dev_priv);
8773
8774         gen6_check_mch_setup(dev_priv);
8775 }
8776
8777 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
8778 {
8779         uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
8780
8781         /*
8782          * WaVSThreadDispatchOverride:ivb,vlv
8783          *
8784          * This actually overrides the dispatch
8785          * mode for all thread types.
8786          */
8787         reg &= ~GEN7_FF_SCHED_MASK;
8788         reg |= GEN7_FF_TS_SCHED_HW;
8789         reg |= GEN7_FF_VS_SCHED_HW;
8790         reg |= GEN7_FF_DS_SCHED_HW;
8791
8792         I915_WRITE(GEN7_FF_THREAD_MODE, reg);
8793 }
8794
8795 static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
8796 {
8797         /*
8798          * TODO: this bit should only be enabled when really needed, then
8799          * disabled when not needed anymore in order to save power.
8800          */
8801         if (HAS_PCH_LPT_LP(dev_priv))
8802                 I915_WRITE(SOUTH_DSPCLK_GATE_D,
8803                            I915_READ(SOUTH_DSPCLK_GATE_D) |
8804                            PCH_LP_PARTITION_LEVEL_DISABLE);
8805
8806         /* WADPOClockGatingDisable:hsw */
8807         I915_WRITE(TRANS_CHICKEN1(PIPE_A),
8808                    I915_READ(TRANS_CHICKEN1(PIPE_A)) |
8809                    TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8810 }
8811
8812 static void lpt_suspend_hw(struct drm_i915_private *dev_priv)
8813 {
8814         if (HAS_PCH_LPT_LP(dev_priv)) {
8815                 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
8816
8817                 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
8818                 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
8819         }
8820 }
8821
8822 static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
8823                                    int general_prio_credits,
8824                                    int high_prio_credits)
8825 {
8826         u32 misccpctl;
8827         u32 val;
8828
8829         /* WaTempDisableDOPClkGating:bdw */
8830         misccpctl = I915_READ(GEN7_MISCCPCTL);
8831         I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
8832
8833         val = I915_READ(GEN8_L3SQCREG1);
8834         val &= ~L3_PRIO_CREDITS_MASK;
8835         val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
8836         val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
8837         I915_WRITE(GEN8_L3SQCREG1, val);
8838
8839         /*
8840          * Wait at least 100 clocks before re-enabling clock gating.
8841          * See the definition of L3SQCREG1 in BSpec.
8842          */
8843         POSTING_READ(GEN8_L3SQCREG1);
8844         udelay(1);
8845         I915_WRITE(GEN7_MISCCPCTL, misccpctl);
8846 }
8847
8848 static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
8849 {
8850         /* This is not an Wa. Enable to reduce Sampler power */
8851         I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN,
8852                    I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE);
8853 }
8854
8855 static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
8856 {
8857         if (!HAS_PCH_CNP(dev_priv))
8858                 return;
8859
8860         /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */
8861         I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) |
8862                    CNP_PWM_CGE_GATING_DISABLE);
8863 }
8864
8865 static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)
8866 {
8867         u32 val;
8868         cnp_init_clock_gating(dev_priv);
8869
8870         /* This is not an Wa. Enable for better image quality */
8871         I915_WRITE(_3D_CHICKEN3,
8872                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
8873
8874         /* WaEnableChickenDCPR:cnl */
8875         I915_WRITE(GEN8_CHICKEN_DCPR_1,
8876                    I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
8877
8878         /* WaFbcWakeMemOn:cnl */
8879         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
8880                    DISP_FBC_MEMORY_WAKE);
8881
8882         val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE);
8883         /* ReadHitWriteOnlyDisable:cnl */
8884         val |= RCCUNIT_CLKGATE_DIS;
8885         /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */
8886         if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0))
8887                 val |= SARBUNIT_CLKGATE_DIS;
8888         I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val);
8889
8890         /* Wa_2201832410:cnl */
8891         val = I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE);
8892         val |= GWUNIT_CLKGATE_DIS;
8893         I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, val);
8894
8895         /* WaDisableVFclkgate:cnl */
8896         /* WaVFUnitClockGatingDisable:cnl */
8897         val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE);
8898         val |= VFUNIT_CLKGATE_DIS;
8899         I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val);
8900 }
8901
8902 static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
8903 {
8904         cnp_init_clock_gating(dev_priv);
8905         gen9_init_clock_gating(dev_priv);
8906
8907         /* WaFbcNukeOnHostModify:cfl */
8908         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8909                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8910 }
8911
8912 static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
8913 {
8914         gen9_init_clock_gating(dev_priv);
8915
8916         /* WaDisableSDEUnitClockGating:kbl */
8917         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
8918                 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
8919                            GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
8920
8921         /* WaDisableGamClockGating:kbl */
8922         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
8923                 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
8924                            GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
8925
8926         /* WaFbcNukeOnHostModify:kbl */
8927         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8928                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8929 }
8930
8931 static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
8932 {
8933         gen9_init_clock_gating(dev_priv);
8934
8935         /* WAC6entrylatency:skl */
8936         I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) |
8937                    FBC_LLC_FULLY_OPEN);
8938
8939         /* WaFbcNukeOnHostModify:skl */
8940         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8941                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8942 }
8943
8944 static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
8945 {
8946         /* The GTT cache must be disabled if the system is using 2M pages. */
8947         bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv,
8948                                                  I915_GTT_PAGE_SIZE_2M);
8949         enum pipe pipe;
8950
8951         /* WaSwitchSolVfFArbitrationPriority:bdw */
8952         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
8953
8954         /* WaPsrDPAMaskVBlankInSRD:bdw */
8955         I915_WRITE(CHICKEN_PAR1_1,
8956                    I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
8957
8958         /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
8959         for_each_pipe(dev_priv, pipe) {
8960                 I915_WRITE(CHICKEN_PIPESL_1(pipe),
8961                            I915_READ(CHICKEN_PIPESL_1(pipe)) |
8962                            BDW_DPRS_MASK_VBLANK_SRD);
8963         }
8964
8965         /* WaVSRefCountFullforceMissDisable:bdw */
8966         /* WaDSRefCountFullforceMissDisable:bdw */
8967         I915_WRITE(GEN7_FF_THREAD_MODE,
8968                    I915_READ(GEN7_FF_THREAD_MODE) &
8969                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
8970
8971         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
8972                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
8973
8974         /* WaDisableSDEUnitClockGating:bdw */
8975         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
8976                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
8977
8978         /* WaProgramL3SqcReg1Default:bdw */
8979         gen8_set_l3sqc_credits(dev_priv, 30, 2);
8980
8981         /* WaGttCachingOffByDefault:bdw */
8982         I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
8983
8984         /* WaKVMNotificationOnConfigChange:bdw */
8985         I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
8986                    | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
8987
8988         lpt_init_clock_gating(dev_priv);
8989
8990         /* WaDisableDopClockGating:bdw
8991          *
8992          * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP
8993          * clock gating.
8994          */
8995         I915_WRITE(GEN6_UCGCTL1,
8996                    I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
8997 }
8998
8999 static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
9000 {
9001         /* L3 caching of data atomics doesn't work -- disable it. */
9002         I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
9003         I915_WRITE(HSW_ROW_CHICKEN3,
9004                    _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
9005
9006         /* This is required by WaCatErrorRejectionIssue:hsw */
9007         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9008                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9009                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9010
9011         /* WaVSRefCountFullforceMissDisable:hsw */
9012         I915_WRITE(GEN7_FF_THREAD_MODE,
9013                    I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
9014
9015         /* WaDisable_RenderCache_OperationalFlush:hsw */
9016         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9017
9018         /* enable HiZ Raw Stall Optimization */
9019         I915_WRITE(CACHE_MODE_0_GEN7,
9020                    _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
9021
9022         /* WaDisable4x2SubspanOptimization:hsw */
9023         I915_WRITE(CACHE_MODE_1,
9024                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9025
9026         /*
9027          * BSpec recommends 8x4 when MSAA is used,
9028          * however in practice 16x4 seems fastest.
9029          *
9030          * Note that PS/WM thread counts depend on the WIZ hashing
9031          * disable bit, which we don't touch here, but it's good
9032          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9033          */
9034         I915_WRITE(GEN7_GT_MODE,
9035                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9036
9037         /* WaSampleCChickenBitEnable:hsw */
9038         I915_WRITE(HALF_SLICE_CHICKEN3,
9039                    _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
9040
9041         /* WaSwitchSolVfFArbitrationPriority:hsw */
9042         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
9043
9044         lpt_init_clock_gating(dev_priv);
9045 }
9046
9047 static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
9048 {
9049         uint32_t snpcr;
9050
9051         I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
9052
9053         /* WaDisableEarlyCull:ivb */
9054         I915_WRITE(_3D_CHICKEN3,
9055                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
9056
9057         /* WaDisableBackToBackFlipFix:ivb */
9058         I915_WRITE(IVB_CHICKEN3,
9059                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
9060                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
9061
9062         /* WaDisablePSDDualDispatchEnable:ivb */
9063         if (IS_IVB_GT1(dev_priv))
9064                 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
9065                            _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
9066
9067         /* WaDisable_RenderCache_OperationalFlush:ivb */
9068         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9069
9070         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
9071         I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
9072                    GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
9073
9074         /* WaApplyL3ControlAndL3ChickenMode:ivb */
9075         I915_WRITE(GEN7_L3CNTLREG1,
9076                         GEN7_WA_FOR_GEN7_L3_CONTROL);
9077         I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
9078                    GEN7_WA_L3_CHICKEN_MODE);
9079         if (IS_IVB_GT1(dev_priv))
9080                 I915_WRITE(GEN7_ROW_CHICKEN2,
9081                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9082         else {
9083                 /* must write both registers */
9084                 I915_WRITE(GEN7_ROW_CHICKEN2,
9085                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9086                 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
9087                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9088         }
9089
9090         /* WaForceL3Serialization:ivb */
9091         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9092                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9093
9094         /*
9095          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
9096          * This implements the WaDisableRCZUnitClockGating:ivb workaround.
9097          */
9098         I915_WRITE(GEN6_UCGCTL2,
9099                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
9100
9101         /* This is required by WaCatErrorRejectionIssue:ivb */
9102         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9103                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9104                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9105
9106         g4x_disable_trickle_feed(dev_priv);
9107
9108         gen7_setup_fixed_func_scheduler(dev_priv);
9109
9110         if (0) { /* causes HiZ corruption on ivb:gt1 */
9111                 /* enable HiZ Raw Stall Optimization */
9112                 I915_WRITE(CACHE_MODE_0_GEN7,
9113                            _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
9114         }
9115
9116         /* WaDisable4x2SubspanOptimization:ivb */
9117         I915_WRITE(CACHE_MODE_1,
9118                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9119
9120         /*
9121          * BSpec recommends 8x4 when MSAA is used,
9122          * however in practice 16x4 seems fastest.
9123          *
9124          * Note that PS/WM thread counts depend on the WIZ hashing
9125          * disable bit, which we don't touch here, but it's good
9126          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9127          */
9128         I915_WRITE(GEN7_GT_MODE,
9129                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9130
9131         snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
9132         snpcr &= ~GEN6_MBC_SNPCR_MASK;
9133         snpcr |= GEN6_MBC_SNPCR_MED;
9134         I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
9135
9136         if (!HAS_PCH_NOP(dev_priv))
9137                 cpt_init_clock_gating(dev_priv);
9138
9139         gen6_check_mch_setup(dev_priv);
9140 }
9141
9142 static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
9143 {
9144         /* WaDisableEarlyCull:vlv */
9145         I915_WRITE(_3D_CHICKEN3,
9146                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
9147
9148         /* WaDisableBackToBackFlipFix:vlv */
9149         I915_WRITE(IVB_CHICKEN3,
9150                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
9151                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
9152
9153         /* WaPsdDispatchEnable:vlv */
9154         /* WaDisablePSDDualDispatchEnable:vlv */
9155         I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
9156                    _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
9157                                       GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
9158
9159         /* WaDisable_RenderCache_OperationalFlush:vlv */
9160         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9161
9162         /* WaForceL3Serialization:vlv */
9163         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9164                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9165
9166         /* WaDisableDopClockGating:vlv */
9167         I915_WRITE(GEN7_ROW_CHICKEN2,
9168                    _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9169
9170         /* This is required by WaCatErrorRejectionIssue:vlv */
9171         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9172                    I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9173                    GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9174
9175         gen7_setup_fixed_func_scheduler(dev_priv);
9176
9177         /*
9178          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
9179          * This implements the WaDisableRCZUnitClockGating:vlv workaround.
9180          */
9181         I915_WRITE(GEN6_UCGCTL2,
9182                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
9183
9184         /* WaDisableL3Bank2xClockGate:vlv
9185          * Disabling L3 clock gating- MMIO 940c[25] = 1
9186          * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
9187         I915_WRITE(GEN7_UCGCTL4,
9188                    I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
9189
9190         /*
9191          * BSpec says this must be set, even though
9192          * WaDisable4x2SubspanOptimization isn't listed for VLV.
9193          */
9194         I915_WRITE(CACHE_MODE_1,
9195                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9196
9197         /*
9198          * BSpec recommends 8x4 when MSAA is used,
9199          * however in practice 16x4 seems fastest.
9200          *
9201          * Note that PS/WM thread counts depend on the WIZ hashing
9202          * disable bit, which we don't touch here, but it's good
9203          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9204          */
9205         I915_WRITE(GEN7_GT_MODE,
9206                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9207
9208         /*
9209          * WaIncreaseL3CreditsForVLVB0:vlv
9210          * This is the hardware default actually.
9211          */
9212         I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
9213
9214         /*
9215          * WaDisableVLVClockGating_VBIIssue:vlv
9216          * Disable clock gating on th GCFG unit to prevent a delay
9217          * in the reporting of vblank events.
9218          */
9219         I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
9220 }
9221
9222 static void chv_init_clock_gating(struct drm_i915_private *dev_priv)
9223 {
9224         /* WaVSRefCountFullforceMissDisable:chv */
9225         /* WaDSRefCountFullforceMissDisable:chv */
9226         I915_WRITE(GEN7_FF_THREAD_MODE,
9227                    I915_READ(GEN7_FF_THREAD_MODE) &
9228                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
9229
9230         /* WaDisableSemaphoreAndSyncFlipWait:chv */
9231         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
9232                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
9233
9234         /* WaDisableCSUnitClockGating:chv */
9235         I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
9236                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
9237
9238         /* WaDisableSDEUnitClockGating:chv */
9239         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9240                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
9241
9242         /*
9243          * WaProgramL3SqcReg1Default:chv
9244          * See gfxspecs/Related Documents/Performance Guide/
9245          * LSQC Setting Recommendations.
9246          */
9247         gen8_set_l3sqc_credits(dev_priv, 38, 2);
9248
9249         /*
9250          * GTT cache may not work with big pages, so if those
9251          * are ever enabled GTT cache may need to be disabled.
9252          */
9253         I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
9254 }
9255
9256 static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)
9257 {
9258         uint32_t dspclk_gate;
9259
9260         I915_WRITE(RENCLK_GATE_D1, 0);
9261         I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
9262                    GS_UNIT_CLOCK_GATE_DISABLE |
9263                    CL_UNIT_CLOCK_GATE_DISABLE);
9264         I915_WRITE(RAMCLK_GATE_D, 0);
9265         dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
9266                 OVRUNIT_CLOCK_GATE_DISABLE |
9267                 OVCUNIT_CLOCK_GATE_DISABLE;
9268         if (IS_GM45(dev_priv))
9269                 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
9270         I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
9271
9272         /* WaDisableRenderCachePipelinedFlush */
9273         I915_WRITE(CACHE_MODE_0,
9274                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
9275
9276         /* WaDisable_RenderCache_OperationalFlush:g4x */
9277         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9278
9279         g4x_disable_trickle_feed(dev_priv);
9280 }
9281
9282 static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv)
9283 {
9284         I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
9285         I915_WRITE(RENCLK_GATE_D2, 0);
9286         I915_WRITE(DSPCLK_GATE_D, 0);
9287         I915_WRITE(RAMCLK_GATE_D, 0);
9288         I915_WRITE16(DEUC, 0);
9289         I915_WRITE(MI_ARB_STATE,
9290                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9291
9292         /* WaDisable_RenderCache_OperationalFlush:gen4 */
9293         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9294 }
9295
9296 static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
9297 {
9298         I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
9299                    I965_RCC_CLOCK_GATE_DISABLE |
9300                    I965_RCPB_CLOCK_GATE_DISABLE |
9301                    I965_ISC_CLOCK_GATE_DISABLE |
9302                    I965_FBC_CLOCK_GATE_DISABLE);
9303         I915_WRITE(RENCLK_GATE_D2, 0);
9304         I915_WRITE(MI_ARB_STATE,
9305                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9306
9307         /* WaDisable_RenderCache_OperationalFlush:gen4 */
9308         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9309 }
9310
9311 static void gen3_init_clock_gating(struct drm_i915_private *dev_priv)
9312 {
9313         u32 dstate = I915_READ(D_STATE);
9314
9315         dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
9316                 DSTATE_DOT_CLOCK_GATING;
9317         I915_WRITE(D_STATE, dstate);
9318
9319         if (IS_PINEVIEW(dev_priv))
9320                 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
9321
9322         /* IIR "flip pending" means done if this bit is set */
9323         I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
9324
9325         /* interrupts should cause a wake up from C3 */
9326         I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
9327
9328         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
9329         I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
9330
9331         I915_WRITE(MI_ARB_STATE,
9332                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9333 }
9334
9335 static void i85x_init_clock_gating(struct drm_i915_private *dev_priv)
9336 {
9337         I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
9338
9339         /* interrupts should cause a wake up from C3 */
9340         I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
9341                    _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
9342
9343         I915_WRITE(MEM_MODE,
9344                    _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
9345 }
9346
9347 static void i830_init_clock_gating(struct drm_i915_private *dev_priv)
9348 {
9349         I915_WRITE(MEM_MODE,
9350                    _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
9351                    _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
9352 }
9353
9354 void intel_init_clock_gating(struct drm_i915_private *dev_priv)
9355 {
9356         dev_priv->display.init_clock_gating(dev_priv);
9357 }
9358
9359 void intel_suspend_hw(struct drm_i915_private *dev_priv)
9360 {
9361         if (HAS_PCH_LPT(dev_priv))
9362                 lpt_suspend_hw(dev_priv);
9363 }
9364
9365 static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
9366 {
9367         DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n");
9368 }
9369
9370 /**
9371  * intel_init_clock_gating_hooks - setup the clock gating hooks
9372  * @dev_priv: device private
9373  *
9374  * Setup the hooks that configure which clocks of a given platform can be
9375  * gated and also apply various GT and display specific workarounds for these
9376  * platforms. Note that some GT specific workarounds are applied separately
9377  * when GPU contexts or batchbuffers start their execution.
9378  */
9379 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
9380 {
9381         if (IS_ICELAKE(dev_priv))
9382                 dev_priv->display.init_clock_gating = icl_init_clock_gating;
9383         else if (IS_CANNONLAKE(dev_priv))
9384                 dev_priv->display.init_clock_gating = cnl_init_clock_gating;
9385         else if (IS_COFFEELAKE(dev_priv))
9386                 dev_priv->display.init_clock_gating = cfl_init_clock_gating;
9387         else if (IS_SKYLAKE(dev_priv))
9388                 dev_priv->display.init_clock_gating = skl_init_clock_gating;
9389         else if (IS_KABYLAKE(dev_priv))
9390                 dev_priv->display.init_clock_gating = kbl_init_clock_gating;
9391         else if (IS_BROXTON(dev_priv))
9392                 dev_priv->display.init_clock_gating = bxt_init_clock_gating;
9393         else if (IS_GEMINILAKE(dev_priv))
9394                 dev_priv->display.init_clock_gating = glk_init_clock_gating;
9395         else if (IS_BROADWELL(dev_priv))
9396                 dev_priv->display.init_clock_gating = bdw_init_clock_gating;
9397         else if (IS_CHERRYVIEW(dev_priv))
9398                 dev_priv->display.init_clock_gating = chv_init_clock_gating;
9399         else if (IS_HASWELL(dev_priv))
9400                 dev_priv->display.init_clock_gating = hsw_init_clock_gating;
9401         else if (IS_IVYBRIDGE(dev_priv))
9402                 dev_priv->display.init_clock_gating = ivb_init_clock_gating;
9403         else if (IS_VALLEYVIEW(dev_priv))
9404                 dev_priv->display.init_clock_gating = vlv_init_clock_gating;
9405         else if (IS_GEN6(dev_priv))
9406                 dev_priv->display.init_clock_gating = gen6_init_clock_gating;
9407         else if (IS_GEN5(dev_priv))
9408                 dev_priv->display.init_clock_gating = ilk_init_clock_gating;
9409         else if (IS_G4X(dev_priv))
9410                 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
9411         else if (IS_I965GM(dev_priv))
9412                 dev_priv->display.init_clock_gating = i965gm_init_clock_gating;
9413         else if (IS_I965G(dev_priv))
9414                 dev_priv->display.init_clock_gating = i965g_init_clock_gating;
9415         else if (IS_GEN3(dev_priv))
9416                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
9417         else if (IS_I85X(dev_priv) || IS_I865G(dev_priv))
9418                 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
9419         else if (IS_GEN2(dev_priv))
9420                 dev_priv->display.init_clock_gating = i830_init_clock_gating;
9421         else {
9422                 MISSING_CASE(INTEL_DEVID(dev_priv));
9423                 dev_priv->display.init_clock_gating = nop_init_clock_gating;
9424         }
9425 }
9426
9427 /* Set up chip specific power management-related functions */
9428 void intel_init_pm(struct drm_i915_private *dev_priv)
9429 {
9430         intel_fbc_init(dev_priv);
9431
9432         /* For cxsr */
9433         if (IS_PINEVIEW(dev_priv))
9434                 i915_pineview_get_mem_freq(dev_priv);
9435         else if (IS_GEN5(dev_priv))
9436                 i915_ironlake_get_mem_freq(dev_priv);
9437
9438         /* For FIFO watermark updates */
9439         if (INTEL_GEN(dev_priv) >= 9) {
9440                 skl_setup_wm_latency(dev_priv);
9441                 dev_priv->display.initial_watermarks = skl_initial_wm;
9442                 dev_priv->display.atomic_update_watermarks = skl_atomic_update_crtc_wm;
9443                 dev_priv->display.compute_global_watermarks = skl_compute_wm;
9444         } else if (HAS_PCH_SPLIT(dev_priv)) {
9445                 ilk_setup_wm_latency(dev_priv);
9446
9447                 if ((IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[1] &&
9448                      dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
9449                     (!IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[0] &&
9450                      dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
9451                         dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
9452                         dev_priv->display.compute_intermediate_wm =
9453                                 ilk_compute_intermediate_wm;
9454                         dev_priv->display.initial_watermarks =
9455                                 ilk_initial_watermarks;
9456                         dev_priv->display.optimize_watermarks =
9457                                 ilk_optimize_watermarks;
9458                 } else {
9459                         DRM_DEBUG_KMS("Failed to read display plane latency. "
9460                                       "Disable CxSR\n");
9461                 }
9462         } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
9463                 vlv_setup_wm_latency(dev_priv);
9464                 dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm;
9465                 dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm;
9466                 dev_priv->display.initial_watermarks = vlv_initial_watermarks;
9467                 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks;
9468                 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo;
9469         } else if (IS_G4X(dev_priv)) {
9470                 g4x_setup_wm_latency(dev_priv);
9471                 dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm;
9472                 dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm;
9473                 dev_priv->display.initial_watermarks = g4x_initial_watermarks;
9474                 dev_priv->display.optimize_watermarks = g4x_optimize_watermarks;
9475         } else if (IS_PINEVIEW(dev_priv)) {
9476                 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
9477                                             dev_priv->is_ddr3,
9478                                             dev_priv->fsb_freq,
9479                                             dev_priv->mem_freq)) {
9480                         DRM_INFO("failed to find known CxSR latency "
9481                                  "(found ddr%s fsb freq %d, mem freq %d), "
9482                                  "disabling CxSR\n",
9483                                  (dev_priv->is_ddr3 == 1) ? "3" : "2",
9484                                  dev_priv->fsb_freq, dev_priv->mem_freq);
9485                         /* Disable CxSR and never update its watermark again */
9486                         intel_set_memory_cxsr(dev_priv, false);
9487                         dev_priv->display.update_wm = NULL;
9488                 } else
9489                         dev_priv->display.update_wm = pineview_update_wm;
9490         } else if (IS_GEN4(dev_priv)) {
9491                 dev_priv->display.update_wm = i965_update_wm;
9492         } else if (IS_GEN3(dev_priv)) {
9493                 dev_priv->display.update_wm = i9xx_update_wm;
9494                 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
9495         } else if (IS_GEN2(dev_priv)) {
9496                 if (INTEL_INFO(dev_priv)->num_pipes == 1) {
9497                         dev_priv->display.update_wm = i845_update_wm;
9498                         dev_priv->display.get_fifo_size = i845_get_fifo_size;
9499                 } else {
9500                         dev_priv->display.update_wm = i9xx_update_wm;
9501                         dev_priv->display.get_fifo_size = i830_get_fifo_size;
9502                 }
9503         } else {
9504                 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
9505         }
9506 }
9507
9508 static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
9509 {
9510         uint32_t flags =
9511                 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9512
9513         switch (flags) {
9514         case GEN6_PCODE_SUCCESS:
9515                 return 0;
9516         case GEN6_PCODE_UNIMPLEMENTED_CMD:
9517                 return -ENODEV;
9518         case GEN6_PCODE_ILLEGAL_CMD:
9519                 return -ENXIO;
9520         case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9521         case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9522                 return -EOVERFLOW;
9523         case GEN6_PCODE_TIMEOUT:
9524                 return -ETIMEDOUT;
9525         default:
9526                 MISSING_CASE(flags);
9527                 return 0;
9528         }
9529 }
9530
9531 static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
9532 {
9533         uint32_t flags =
9534                 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9535
9536         switch (flags) {
9537         case GEN6_PCODE_SUCCESS:
9538                 return 0;
9539         case GEN6_PCODE_ILLEGAL_CMD:
9540                 return -ENXIO;
9541         case GEN7_PCODE_TIMEOUT:
9542                 return -ETIMEDOUT;
9543         case GEN7_PCODE_ILLEGAL_DATA:
9544                 return -EINVAL;
9545         case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9546                 return -EOVERFLOW;
9547         default:
9548                 MISSING_CASE(flags);
9549                 return 0;
9550         }
9551 }
9552
9553 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
9554 {
9555         int status;
9556
9557         WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9558
9559         /* GEN6_PCODE_* are outside of the forcewake domain, we can
9560          * use te fw I915_READ variants to reduce the amount of work
9561          * required when reading/writing.
9562          */
9563
9564         if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
9565                 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n",
9566                                  mbox, __builtin_return_address(0));
9567                 return -EAGAIN;
9568         }
9569
9570         I915_WRITE_FW(GEN6_PCODE_DATA, *val);
9571         I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9572         I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
9573
9574         if (__intel_wait_for_register_fw(dev_priv,
9575                                          GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9576                                          500, 0, NULL)) {
9577                 DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n",
9578                           mbox, __builtin_return_address(0));
9579                 return -ETIMEDOUT;
9580         }
9581
9582         *val = I915_READ_FW(GEN6_PCODE_DATA);
9583         I915_WRITE_FW(GEN6_PCODE_DATA, 0);
9584
9585         if (INTEL_GEN(dev_priv) > 6)
9586                 status = gen7_check_mailbox_status(dev_priv);
9587         else
9588                 status = gen6_check_mailbox_status(dev_priv);
9589
9590         if (status) {
9591                 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
9592                                  mbox, __builtin_return_address(0), status);
9593                 return status;
9594         }
9595
9596         return 0;
9597 }
9598
9599 int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
9600                                     u32 mbox, u32 val,
9601                                     int fast_timeout_us, int slow_timeout_ms)
9602 {
9603         int status;
9604
9605         WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9606
9607         /* GEN6_PCODE_* are outside of the forcewake domain, we can
9608          * use te fw I915_READ variants to reduce the amount of work
9609          * required when reading/writing.
9610          */
9611
9612         if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
9613                 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n",
9614                                  val, mbox, __builtin_return_address(0));
9615                 return -EAGAIN;
9616         }
9617
9618         I915_WRITE_FW(GEN6_PCODE_DATA, val);
9619         I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9620         I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
9621
9622         if (__intel_wait_for_register_fw(dev_priv,
9623                                          GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9624                                          fast_timeout_us, slow_timeout_ms,
9625                                          NULL)) {
9626                 DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
9627                           val, mbox, __builtin_return_address(0));
9628                 return -ETIMEDOUT;
9629         }
9630
9631         I915_WRITE_FW(GEN6_PCODE_DATA, 0);
9632
9633         if (INTEL_GEN(dev_priv) > 6)
9634                 status = gen7_check_mailbox_status(dev_priv);
9635         else
9636                 status = gen6_check_mailbox_status(dev_priv);
9637
9638         if (status) {
9639                 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
9640                                  val, mbox, __builtin_return_address(0), status);
9641                 return status;
9642         }
9643
9644         return 0;
9645 }
9646
9647 static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
9648                                   u32 request, u32 reply_mask, u32 reply,
9649                                   u32 *status)
9650 {
9651         u32 val = request;
9652
9653         *status = sandybridge_pcode_read(dev_priv, mbox, &val);
9654
9655         return *status || ((val & reply_mask) == reply);
9656 }
9657
9658 /**
9659  * skl_pcode_request - send PCODE request until acknowledgment
9660  * @dev_priv: device private
9661  * @mbox: PCODE mailbox ID the request is targeted for
9662  * @request: request ID
9663  * @reply_mask: mask used to check for request acknowledgment
9664  * @reply: value used to check for request acknowledgment
9665  * @timeout_base_ms: timeout for polling with preemption enabled
9666  *
9667  * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
9668  * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
9669  * The request is acknowledged once the PCODE reply dword equals @reply after
9670  * applying @reply_mask. Polling is first attempted with preemption enabled
9671  * for @timeout_base_ms and if this times out for another 50 ms with
9672  * preemption disabled.
9673  *
9674  * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
9675  * other error as reported by PCODE.
9676  */
9677 int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
9678                       u32 reply_mask, u32 reply, int timeout_base_ms)
9679 {
9680         u32 status;
9681         int ret;
9682
9683         WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9684
9685 #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
9686                                    &status)
9687
9688         /*
9689          * Prime the PCODE by doing a request first. Normally it guarantees
9690          * that a subsequent request, at most @timeout_base_ms later, succeeds.
9691          * _wait_for() doesn't guarantee when its passed condition is evaluated
9692          * first, so send the first request explicitly.
9693          */
9694         if (COND) {
9695                 ret = 0;
9696                 goto out;
9697         }
9698         ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
9699         if (!ret)
9700                 goto out;
9701
9702         /*
9703          * The above can time out if the number of requests was low (2 in the
9704          * worst case) _and_ PCODE was busy for some reason even after a
9705          * (queued) request and @timeout_base_ms delay. As a workaround retry
9706          * the poll with preemption disabled to maximize the number of
9707          * requests. Increase the timeout from @timeout_base_ms to 50ms to
9708          * account for interrupts that could reduce the number of these
9709          * requests, and for any quirks of the PCODE firmware that delays
9710          * the request completion.
9711          */
9712         DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
9713         WARN_ON_ONCE(timeout_base_ms > 3);
9714         preempt_disable();
9715         ret = wait_for_atomic(COND, 50);
9716         preempt_enable();
9717
9718 out:
9719         return ret ? ret : status;
9720 #undef COND
9721 }
9722
9723 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
9724 {
9725         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9726
9727         /*
9728          * N = val - 0xb7
9729          * Slow = Fast = GPLL ref * N
9730          */
9731         return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
9732 }
9733
9734 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
9735 {
9736         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9737
9738         return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
9739 }
9740
9741 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
9742 {
9743         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9744
9745         /*
9746          * N = val / 2
9747          * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
9748          */
9749         return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
9750 }
9751
9752 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
9753 {
9754         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9755
9756         /* CHV needs even values */
9757         return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
9758 }
9759
9760 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
9761 {
9762         if (INTEL_GEN(dev_priv) >= 9)
9763                 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
9764                                          GEN9_FREQ_SCALER);
9765         else if (IS_CHERRYVIEW(dev_priv))
9766                 return chv_gpu_freq(dev_priv, val);
9767         else if (IS_VALLEYVIEW(dev_priv))
9768                 return byt_gpu_freq(dev_priv, val);
9769         else
9770                 return val * GT_FREQUENCY_MULTIPLIER;
9771 }
9772
9773 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
9774 {
9775         if (INTEL_GEN(dev_priv) >= 9)
9776                 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
9777                                          GT_FREQUENCY_MULTIPLIER);
9778         else if (IS_CHERRYVIEW(dev_priv))
9779                 return chv_freq_opcode(dev_priv, val);
9780         else if (IS_VALLEYVIEW(dev_priv))
9781                 return byt_freq_opcode(dev_priv, val);
9782         else
9783                 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
9784 }
9785
9786 void intel_pm_setup(struct drm_i915_private *dev_priv)
9787 {
9788         mutex_init(&dev_priv->pcu_lock);
9789         mutex_init(&dev_priv->gt_pm.rps.power.mutex);
9790
9791         atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
9792
9793         dev_priv->runtime_pm.suspended = false;
9794         atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
9795 }
9796
9797 static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
9798                              const i915_reg_t reg)
9799 {
9800         u32 lower, upper, tmp;
9801         int loop = 2;
9802
9803         /*
9804          * The register accessed do not need forcewake. We borrow
9805          * uncore lock to prevent concurrent access to range reg.
9806          */
9807         lockdep_assert_held(&dev_priv->uncore.lock);
9808
9809         /*
9810          * vlv and chv residency counters are 40 bits in width.
9811          * With a control bit, we can choose between upper or lower
9812          * 32bit window into this counter.
9813          *
9814          * Although we always use the counter in high-range mode elsewhere,
9815          * userspace may attempt to read the value before rc6 is initialised,
9816          * before we have set the default VLV_COUNTER_CONTROL value. So always
9817          * set the high bit to be safe.
9818          */
9819         I915_WRITE_FW(VLV_COUNTER_CONTROL,
9820                       _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9821         upper = I915_READ_FW(reg);
9822         do {
9823                 tmp = upper;
9824
9825                 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9826                               _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
9827                 lower = I915_READ_FW(reg);
9828
9829                 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9830                               _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9831                 upper = I915_READ_FW(reg);
9832         } while (upper != tmp && --loop);
9833
9834         /*
9835          * Everywhere else we always use VLV_COUNTER_CONTROL with the
9836          * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
9837          * now.
9838          */
9839
9840         return lower | (u64)upper << 8;
9841 }
9842
9843 u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
9844                            const i915_reg_t reg)
9845 {
9846         u64 time_hw, prev_hw, overflow_hw;
9847         unsigned int fw_domains;
9848         unsigned long flags;
9849         unsigned int i;
9850         u32 mul, div;
9851
9852         if (!HAS_RC6(dev_priv))
9853                 return 0;
9854
9855         /*
9856          * Store previous hw counter values for counter wrap-around handling.
9857          *
9858          * There are only four interesting registers and they live next to each
9859          * other so we can use the relative address, compared to the smallest
9860          * one as the index into driver storage.
9861          */
9862         i = (i915_mmio_reg_offset(reg) -
9863              i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
9864         if (WARN_ON_ONCE(i >= ARRAY_SIZE(dev_priv->gt_pm.rc6.cur_residency)))
9865                 return 0;
9866
9867         fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
9868
9869         spin_lock_irqsave(&dev_priv->uncore.lock, flags);
9870         intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
9871
9872         /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
9873         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
9874                 mul = 1000000;
9875                 div = dev_priv->czclk_freq;
9876                 overflow_hw = BIT_ULL(40);
9877                 time_hw = vlv_residency_raw(dev_priv, reg);
9878         } else {
9879                 /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
9880                 if (IS_GEN9_LP(dev_priv)) {
9881                         mul = 10000;
9882                         div = 12;
9883                 } else {
9884                         mul = 1280;
9885                         div = 1;
9886                 }
9887
9888                 overflow_hw = BIT_ULL(32);
9889                 time_hw = I915_READ_FW(reg);
9890         }
9891
9892         /*
9893          * Counter wrap handling.
9894          *
9895          * But relying on a sufficient frequency of queries otherwise counters
9896          * can still wrap.
9897          */
9898         prev_hw = dev_priv->gt_pm.rc6.prev_hw_residency[i];
9899         dev_priv->gt_pm.rc6.prev_hw_residency[i] = time_hw;
9900
9901         /* RC6 delta from last sample. */
9902         if (time_hw >= prev_hw)
9903                 time_hw -= prev_hw;
9904         else
9905                 time_hw += overflow_hw - prev_hw;
9906
9907         /* Add delta to RC6 extended raw driver copy. */
9908         time_hw += dev_priv->gt_pm.rc6.cur_residency[i];
9909         dev_priv->gt_pm.rc6.cur_residency[i] = time_hw;
9910
9911         intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
9912         spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
9913
9914         return mul_u64_u32_div(time_hw, mul, div);
9915 }
9916
9917 u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat)
9918 {
9919         u32 cagf;
9920
9921         if (INTEL_GEN(dev_priv) >= 9)
9922                 cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
9923         else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
9924                 cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
9925         else
9926                 cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
9927
9928         return  cagf;
9929 }