GNU Linux-libre 6.7.9-gnu
[releases.git] / drivers / media / platform / verisilicon / rockchip_vpu981_hw_av1_dec.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2023, Collabora
4  *
5  * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
6  */
7
8 #include <media/v4l2-mem2mem.h>
9 #include "hantro.h"
10 #include "hantro_v4l2.h"
11 #include "rockchip_vpu981_regs.h"
12
13 #define AV1_DEC_MODE            17
14 #define GM_GLOBAL_MODELS_PER_FRAME      7
15 #define GLOBAL_MODEL_TOTAL_SIZE (6 * 4 + 4 * 2)
16 #define GLOBAL_MODEL_SIZE       ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
17 #define AV1_MAX_TILES           128
18 #define AV1_TILE_INFO_SIZE      (AV1_MAX_TILES * 16)
19 #define AV1DEC_MAX_PIC_BUFFERS  24
20 #define AV1_REF_SCALE_SHIFT     14
21 #define AV1_INVALID_IDX         -1
22 #define MAX_FRAME_DISTANCE      31
23 #define AV1_PRIMARY_REF_NONE    7
24 #define AV1_TILE_SIZE           ALIGN(32 * 128, 4096)
25 /*
26  * These 3 values aren't defined enum v4l2_av1_segment_feature because
27  * they are not part of the specification
28  */
29 #define V4L2_AV1_SEG_LVL_ALT_LF_Y_H     2
30 #define V4L2_AV1_SEG_LVL_ALT_LF_U       3
31 #define V4L2_AV1_SEG_LVL_ALT_LF_V       4
32
33 #define SUPERRES_SCALE_BITS 3
34 #define SCALE_NUMERATOR 8
35 #define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
36
37 #define RS_SUBPEL_BITS 6
38 #define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
39 #define RS_SCALE_SUBPEL_BITS 14
40 #define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
41 #define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
42 #define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
43
44 #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
45
46 #define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
47 #define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
48 #define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
49 #define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
50 #define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
51 #define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
52 #define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
53
54 #define DIV_LUT_PREC_BITS 14
55 #define DIV_LUT_BITS 8
56 #define DIV_LUT_NUM BIT(DIV_LUT_BITS)
57 #define WARP_PARAM_REDUCE_BITS 6
58 #define WARPEDMODEL_PREC_BITS 16
59
60 #define AV1_DIV_ROUND_UP_POW2(value, n)                 \
61 ({                                                      \
62         typeof(n) _n  = n;                              \
63         typeof(value) _value = value;                   \
64         (_value + (BIT(_n) >> 1)) >> _n;                \
65 })
66
67 #define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n)                          \
68 ({                                                                      \
69         typeof(n) _n_  = n;                                             \
70         typeof(value) _value_ = value;                                  \
71         (((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_))    \
72                 : AV1_DIV_ROUND_UP_POW2((_value_), (_n_)));             \
73 })
74
75 struct rockchip_av1_film_grain {
76         u8 scaling_lut_y[256];
77         u8 scaling_lut_cb[256];
78         u8 scaling_lut_cr[256];
79         s16 cropped_luma_grain_block[4096];
80         s16 cropped_chroma_grain_block[1024 * 2];
81 };
82
83 static const short div_lut[DIV_LUT_NUM + 1] = {
84         16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
85         15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
86         15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
87         14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
88         13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
89         13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
90         13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
91         12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
92         12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
93         11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
94         11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
95         11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
96         10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
97         10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
98         10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
99         9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
100         9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
101         9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
102         9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
103         9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
104         8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
105         8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
106         8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
107         8240,  8224,  8208,  8192,
108 };
109
110 static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
111 {
112         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
113         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
114         const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
115         u64 timestamp;
116         int i, idx = frame->ref_frame_idx[ref];
117
118         if (idx >= V4L2_AV1_TOTAL_REFS_PER_FRAME || idx < 0)
119                 return AV1_INVALID_IDX;
120
121         timestamp = frame->reference_frame_ts[idx];
122         for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
123                 if (!av1_dec->frame_refs[i].used)
124                         continue;
125                 if (av1_dec->frame_refs[i].timestamp == timestamp)
126                         return i;
127         }
128
129         return AV1_INVALID_IDX;
130 }
131
132 static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
133 {
134         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
135         int idx = rockchip_vpu981_get_frame_index(ctx, ref);
136
137         if (idx != AV1_INVALID_IDX)
138                 return av1_dec->frame_refs[idx].order_hint;
139
140         return 0;
141 }
142
143 static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
144                                              u64 timestamp)
145 {
146         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
147         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
148         const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
149         int i;
150
151         for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
152                 int j;
153
154                 if (av1_dec->frame_refs[i].used)
155                         continue;
156
157                 av1_dec->frame_refs[i].width = frame->frame_width_minus_1 + 1;
158                 av1_dec->frame_refs[i].height = frame->frame_height_minus_1 + 1;
159                 av1_dec->frame_refs[i].mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
160                 av1_dec->frame_refs[i].mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
161                 av1_dec->frame_refs[i].timestamp = timestamp;
162                 av1_dec->frame_refs[i].frame_type = frame->frame_type;
163                 av1_dec->frame_refs[i].order_hint = frame->order_hint;
164                 if (!av1_dec->frame_refs[i].vb2_ref)
165                         av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
166
167                 for (j = 0; j < V4L2_AV1_TOTAL_REFS_PER_FRAME; j++)
168                         av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
169                 av1_dec->frame_refs[i].used = true;
170                 av1_dec->current_frame_index = i;
171
172                 return i;
173         }
174
175         return AV1_INVALID_IDX;
176 }
177
178 static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
179 {
180         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
181
182         if (idx >= 0)
183                 av1_dec->frame_refs[idx].used = false;
184 }
185
186 static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
187 {
188         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
189         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
190
191         int ref, idx;
192
193         for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
194                 u64 timestamp = av1_dec->frame_refs[idx].timestamp;
195                 bool used = false;
196
197                 if (!av1_dec->frame_refs[idx].used)
198                         continue;
199
200                 for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
201                         if (ctrls->frame->reference_frame_ts[ref] == timestamp)
202                                 used = true;
203                 }
204
205                 if (!used)
206                         rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
207         }
208 }
209
210 static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
211 {
212         return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
213 }
214
215 static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
216 {
217         size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
218
219         return ALIGN((cr_offset * 3) / 2, 64);
220 }
221
222 static void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
223 {
224         struct hantro_dev *vpu = ctx->dev;
225         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
226
227         if (av1_dec->db_data_col.cpu)
228                 dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
229                                   av1_dec->db_data_col.cpu,
230                                   av1_dec->db_data_col.dma);
231         av1_dec->db_data_col.cpu = NULL;
232
233         if (av1_dec->db_ctrl_col.cpu)
234                 dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
235                                   av1_dec->db_ctrl_col.cpu,
236                                   av1_dec->db_ctrl_col.dma);
237         av1_dec->db_ctrl_col.cpu = NULL;
238
239         if (av1_dec->cdef_col.cpu)
240                 dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
241                                   av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
242         av1_dec->cdef_col.cpu = NULL;
243
244         if (av1_dec->sr_col.cpu)
245                 dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
246                                   av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
247         av1_dec->sr_col.cpu = NULL;
248
249         if (av1_dec->lr_col.cpu)
250                 dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
251                                   av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
252         av1_dec->lr_col.cpu = NULL;
253 }
254
255 static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
256 {
257         struct hantro_dev *vpu = ctx->dev;
258         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
259         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
260         unsigned int num_tile_cols = 1 << ctrls->tile_group_entry->tile_col;
261         unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
262         unsigned int height_in_sb = height / 64;
263         unsigned int stripe_num = ((height + 8) + 63) / 64;
264         size_t size;
265
266         if (av1_dec->db_data_col.size >=
267             ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols)
268                 return 0;
269
270         rockchip_vpu981_av1_dec_tiles_free(ctx);
271
272         size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
273         av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
274                                                       &av1_dec->db_data_col.dma,
275                                                       GFP_KERNEL);
276         if (!av1_dec->db_data_col.cpu)
277                 goto buffer_allocation_error;
278         av1_dec->db_data_col.size = size;
279
280         size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
281         av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
282                                                       &av1_dec->db_ctrl_col.dma,
283                                                       GFP_KERNEL);
284         if (!av1_dec->db_ctrl_col.cpu)
285                 goto buffer_allocation_error;
286         av1_dec->db_ctrl_col.size = size;
287
288         size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
289         av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
290                                                    &av1_dec->cdef_col.dma,
291                                                    GFP_KERNEL);
292         if (!av1_dec->cdef_col.cpu)
293                 goto buffer_allocation_error;
294         av1_dec->cdef_col.size = size;
295
296         size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
297         av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
298                                                  &av1_dec->sr_col.dma,
299                                                  GFP_KERNEL);
300         if (!av1_dec->sr_col.cpu)
301                 goto buffer_allocation_error;
302         av1_dec->sr_col.size = size;
303
304         size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
305         av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
306                                                  &av1_dec->lr_col.dma,
307                                                  GFP_KERNEL);
308         if (!av1_dec->lr_col.cpu)
309                 goto buffer_allocation_error;
310         av1_dec->lr_col.size = size;
311
312         av1_dec->num_tile_cols_allocated = num_tile_cols;
313         return 0;
314
315 buffer_allocation_error:
316         rockchip_vpu981_av1_dec_tiles_free(ctx);
317         return -ENOMEM;
318 }
319
320 void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
321 {
322         struct hantro_dev *vpu = ctx->dev;
323         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
324
325         if (av1_dec->global_model.cpu)
326                 dma_free_coherent(vpu->dev, av1_dec->global_model.size,
327                                   av1_dec->global_model.cpu,
328                                   av1_dec->global_model.dma);
329         av1_dec->global_model.cpu = NULL;
330
331         if (av1_dec->tile_info.cpu)
332                 dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
333                                   av1_dec->tile_info.cpu,
334                                   av1_dec->tile_info.dma);
335         av1_dec->tile_info.cpu = NULL;
336
337         if (av1_dec->film_grain.cpu)
338                 dma_free_coherent(vpu->dev, av1_dec->film_grain.size,
339                                   av1_dec->film_grain.cpu,
340                                   av1_dec->film_grain.dma);
341         av1_dec->film_grain.cpu = NULL;
342
343         if (av1_dec->prob_tbl.cpu)
344                 dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
345                                   av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
346         av1_dec->prob_tbl.cpu = NULL;
347
348         if (av1_dec->prob_tbl_out.cpu)
349                 dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
350                                   av1_dec->prob_tbl_out.cpu,
351                                   av1_dec->prob_tbl_out.dma);
352         av1_dec->prob_tbl_out.cpu = NULL;
353
354         if (av1_dec->tile_buf.cpu)
355                 dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
356                                   av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
357         av1_dec->tile_buf.cpu = NULL;
358
359         rockchip_vpu981_av1_dec_tiles_free(ctx);
360 }
361
362 int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
363 {
364         struct hantro_dev *vpu = ctx->dev;
365         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
366
367         memset(av1_dec, 0, sizeof(*av1_dec));
368
369         av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
370                                                        &av1_dec->global_model.dma,
371                                                        GFP_KERNEL);
372         if (!av1_dec->global_model.cpu)
373                 return -ENOMEM;
374         av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
375
376         av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
377                                                     &av1_dec->tile_info.dma,
378                                                     GFP_KERNEL);
379         if (!av1_dec->tile_info.cpu)
380                 return -ENOMEM;
381         av1_dec->tile_info.size = AV1_MAX_TILES;
382
383         av1_dec->film_grain.cpu = dma_alloc_coherent(vpu->dev,
384                                                      ALIGN(sizeof(struct rockchip_av1_film_grain), 2048),
385                                                      &av1_dec->film_grain.dma,
386                                                      GFP_KERNEL);
387         if (!av1_dec->film_grain.cpu)
388                 return -ENOMEM;
389         av1_dec->film_grain.size = ALIGN(sizeof(struct rockchip_av1_film_grain), 2048);
390
391         av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
392                                                    ALIGN(sizeof(struct av1cdfs), 2048),
393                                                    &av1_dec->prob_tbl.dma,
394                                                    GFP_KERNEL);
395         if (!av1_dec->prob_tbl.cpu)
396                 return -ENOMEM;
397         av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
398
399         av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
400                                                        ALIGN(sizeof(struct av1cdfs), 2048),
401                                                        &av1_dec->prob_tbl_out.dma,
402                                                        GFP_KERNEL);
403         if (!av1_dec->prob_tbl_out.cpu)
404                 return -ENOMEM;
405         av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
406         av1_dec->cdfs = &av1_dec->default_cdfs;
407         av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
408
409         rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
410
411         av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
412                                                    AV1_TILE_SIZE,
413                                                    &av1_dec->tile_buf.dma,
414                                                    GFP_KERNEL);
415         if (!av1_dec->tile_buf.cpu)
416                 return -ENOMEM;
417         av1_dec->tile_buf.size = AV1_TILE_SIZE;
418
419         return 0;
420 }
421
422 static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
423 {
424         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
425         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
426
427         ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
428         if (WARN_ON(!ctrls->sequence))
429                 return -EINVAL;
430
431         ctrls->tile_group_entry =
432             hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
433         if (WARN_ON(!ctrls->tile_group_entry))
434                 return -EINVAL;
435
436         ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
437         if (WARN_ON(!ctrls->frame))
438                 return -EINVAL;
439
440         ctrls->film_grain =
441             hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
442
443         return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
444 }
445
446 static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
447 {
448         if (n == 0)
449                 return 0;
450         return 31 ^ __builtin_clz(n);
451 }
452
453 static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
454 {
455         int f;
456         u64 e;
457
458         *shift = rockchip_vpu981_av1_dec_get_msb(d);
459         /* e is obtained from D after resetting the most significant 1 bit. */
460         e = d - ((u32)1 << *shift);
461         /* Get the most significant DIV_LUT_BITS (8) bits of e into f */
462         if (*shift > DIV_LUT_BITS)
463                 f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
464         else
465                 f = e << (DIV_LUT_BITS - *shift);
466         if (f > DIV_LUT_NUM)
467                 return -1;
468         *shift += DIV_LUT_PREC_BITS;
469         /* Use f as lookup into the precomputed table of multipliers */
470         return div_lut[f];
471 }
472
473 static void
474 rockchip_vpu981_av1_dec_get_shear_params(const u32 *params, s64 *alpha,
475                                          s64 *beta, s64 *gamma, s64 *delta)
476 {
477         const int *mat = params;
478         short shift;
479         short y;
480         long long gv, dv;
481
482         if (mat[2] <= 0)
483                 return;
484
485         *alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
486         *beta = clamp_val(mat[3], S16_MIN, S16_MAX);
487
488         y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
489
490         gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
491
492         *gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
493
494         dv = ((long long)mat[3] * mat[4]) * y;
495         *delta = clamp_val(mat[5] -
496                 (int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
497                 S16_MIN, S16_MAX);
498
499         *alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
500                  * (1 << WARP_PARAM_REDUCE_BITS);
501         *beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
502                 * (1 << WARP_PARAM_REDUCE_BITS);
503         *gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
504                  * (1 << WARP_PARAM_REDUCE_BITS);
505         *delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
506                 * (1 << WARP_PARAM_REDUCE_BITS);
507 }
508
509 static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
510 {
511         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
512         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
513         const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
514         const struct v4l2_av1_global_motion *gm = &frame->global_motion;
515         u8 *dst = av1_dec->global_model.cpu;
516         struct hantro_dev *vpu = ctx->dev;
517         int ref_frame, i;
518
519         memset(dst, 0, GLOBAL_MODEL_SIZE);
520         for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
521                 s64 alpha = 0, beta = 0, gamma = 0, delta = 0;
522
523                 for (i = 0; i < 6; ++i) {
524                         if (i == 2)
525                                 *(s32 *)dst =
526                                         gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
527                         else if (i == 3)
528                                 *(s32 *)dst =
529                                         gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
530                         else
531                                 *(s32 *)dst =
532                                         gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
533                         dst += 4;
534                 }
535
536                 if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
537                         rockchip_vpu981_av1_dec_get_shear_params(&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
538                                                                  &alpha, &beta, &gamma, &delta);
539
540                 *(s16 *)dst = alpha;
541                 dst += 2;
542                 *(s16 *)dst = beta;
543                 dst += 2;
544                 *(s16 *)dst = gamma;
545                 dst += 2;
546                 *(s16 *)dst = delta;
547                 dst += 2;
548         }
549
550         hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
551 }
552
553 static int rockchip_vpu981_av1_tile_log2(int target)
554 {
555         int k;
556
557         /*
558          * returns the smallest value for k such that 1 << k is greater
559          * than or equal to target
560          */
561         for (k = 0; (1 << k) < target; k++);
562
563         return k;
564 }
565
566 static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
567 {
568         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
569         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
570         const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
571         const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
572             ctrls->tile_group_entry;
573         int context_update_y =
574             tile_info->context_update_tile_id / tile_info->tile_cols;
575         int context_update_x =
576             tile_info->context_update_tile_id % tile_info->tile_cols;
577         int context_update_tile_id =
578             context_update_x * tile_info->tile_rows + context_update_y;
579         u8 *dst = av1_dec->tile_info.cpu;
580         struct hantro_dev *vpu = ctx->dev;
581         int tile0, tile1;
582
583         memset(dst, 0, av1_dec->tile_info.size);
584
585         for (tile0 = 0; tile0 < tile_info->tile_cols; tile0++) {
586                 for (tile1 = 0; tile1 < tile_info->tile_rows; tile1++) {
587                         int tile_id = tile1 * tile_info->tile_cols + tile0;
588                         u32 start, end;
589                         u32 y0 =
590                             tile_info->height_in_sbs_minus_1[tile1] + 1;
591                         u32 x0 = tile_info->width_in_sbs_minus_1[tile0] + 1;
592
593                         /* tile size in SB units (width,height) */
594                         *dst++ = x0;
595                         *dst++ = 0;
596                         *dst++ = 0;
597                         *dst++ = 0;
598                         *dst++ = y0;
599                         *dst++ = 0;
600                         *dst++ = 0;
601                         *dst++ = 0;
602
603                         /* tile start position */
604                         start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
605                         *dst++ = start & 255;
606                         *dst++ = (start >> 8) & 255;
607                         *dst++ = (start >> 16) & 255;
608                         *dst++ = (start >> 24) & 255;
609
610                         /* number of bytes in tile data */
611                         end = start + group_entry[tile_id].tile_size;
612                         *dst++ = end & 255;
613                         *dst++ = (end >> 8) & 255;
614                         *dst++ = (end >> 16) & 255;
615                         *dst++ = (end >> 24) & 255;
616                 }
617         }
618
619         hantro_reg_write(vpu, &av1_multicore_expect_context_update, !!(context_update_x == 0));
620         hantro_reg_write(vpu, &av1_tile_enable,
621                          !!((tile_info->tile_cols > 1) || (tile_info->tile_rows > 1)));
622         hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info->tile_cols);
623         hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info->tile_rows);
624         hantro_reg_write(vpu, &av1_context_update_tile_id, context_update_tile_id);
625         hantro_reg_write(vpu, &av1_tile_transpose, 1);
626         if (rockchip_vpu981_av1_tile_log2(tile_info->tile_cols) ||
627             rockchip_vpu981_av1_tile_log2(tile_info->tile_rows))
628                 hantro_reg_write(vpu, &av1_dec_tile_size_mag, tile_info->tile_size_bytes - 1);
629         else
630                 hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
631
632         hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
633 }
634
635 static int rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx *ctx,
636                                             int a, int b)
637 {
638         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
639         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
640         int bits = ctrls->sequence->order_hint_bits - 1;
641         int diff, m;
642
643         if (!ctrls->sequence->order_hint_bits)
644                 return 0;
645
646         diff = a - b;
647         m = 1 << bits;
648         diff = (diff & (m - 1)) - (diff & m);
649
650         return diff;
651 }
652
653 static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
654 {
655         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
656         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
657         const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
658         const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
659         int i;
660
661         if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
662                 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
663                         av1_dec->ref_frame_sign_bias[i] = 0;
664
665                 return;
666         }
667         // Identify the nearest forward and backward references.
668         for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; i++) {
669                 if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
670                         int rel_off =
671                             rockchip_vpu981_av1_dec_get_dist(ctx,
672                                                              rockchip_vpu981_get_order_hint(ctx, i),
673                                                              frame->order_hint);
674                         av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
675                 }
676         }
677 }
678
679 static bool
680 rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
681                                 int width, int height)
682 {
683         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
684         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
685         const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
686         struct hantro_dev *vpu = ctx->dev;
687         struct hantro_decoded_buffer *dst;
688         dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
689         size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
690         size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
691         int cur_width = frame->frame_width_minus_1 + 1;
692         int cur_height = frame->frame_height_minus_1 + 1;
693         int scale_width =
694             ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
695         int scale_height =
696             ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
697
698         switch (ref) {
699         case 0:
700                 hantro_reg_write(vpu, &av1_ref0_height, height);
701                 hantro_reg_write(vpu, &av1_ref0_width, width);
702                 hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
703                 hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
704                 break;
705         case 1:
706                 hantro_reg_write(vpu, &av1_ref1_height, height);
707                 hantro_reg_write(vpu, &av1_ref1_width, width);
708                 hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
709                 hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
710                 break;
711         case 2:
712                 hantro_reg_write(vpu, &av1_ref2_height, height);
713                 hantro_reg_write(vpu, &av1_ref2_width, width);
714                 hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
715                 hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
716                 break;
717         case 3:
718                 hantro_reg_write(vpu, &av1_ref3_height, height);
719                 hantro_reg_write(vpu, &av1_ref3_width, width);
720                 hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
721                 hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
722                 break;
723         case 4:
724                 hantro_reg_write(vpu, &av1_ref4_height, height);
725                 hantro_reg_write(vpu, &av1_ref4_width, width);
726                 hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
727                 hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
728                 break;
729         case 5:
730                 hantro_reg_write(vpu, &av1_ref5_height, height);
731                 hantro_reg_write(vpu, &av1_ref5_width, width);
732                 hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
733                 hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
734                 break;
735         case 6:
736                 hantro_reg_write(vpu, &av1_ref6_height, height);
737                 hantro_reg_write(vpu, &av1_ref6_width, width);
738                 hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
739                 hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
740                 break;
741         default:
742                 pr_warn("AV1 invalid reference frame index\n");
743         }
744
745         dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
746         luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
747         chroma_addr = luma_addr + cr_offset;
748         mv_addr = luma_addr + mv_offset;
749
750         hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
751         hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
752         hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
753
754         return (scale_width != (1 << AV1_REF_SCALE_SHIFT)) ||
755                 (scale_height != (1 << AV1_REF_SCALE_SHIFT));
756 }
757
758 static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
759                                                   int ref, int val)
760 {
761         struct hantro_dev *vpu = ctx->dev;
762
763         switch (ref) {
764         case 0:
765                 hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
766                 break;
767         case 1:
768                 hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
769                 break;
770         case 2:
771                 hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
772                 break;
773         case 3:
774                 hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
775                 break;
776         case 4:
777                 hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
778                 break;
779         case 5:
780                 hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
781                 break;
782         case 6:
783                 hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
784                 break;
785         default:
786                 pr_warn("AV1 invalid sign bias index\n");
787                 break;
788         }
789 }
790
791 static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
792 {
793         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
794         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
795         const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
796         const struct v4l2_av1_segmentation *seg = &frame->segmentation;
797         u32 segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
798         struct hantro_dev *vpu = ctx->dev;
799         u8 segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
800
801         if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) &&
802             frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME) {
803                 int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
804
805                 if (idx >= 0) {
806                         dma_addr_t luma_addr, mv_addr = 0;
807                         struct hantro_decoded_buffer *seg;
808                         size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
809
810                         seg = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
811                         luma_addr = hantro_get_dec_buf_addr(ctx, &seg->base.vb.vb2_buf);
812                         mv_addr = luma_addr + mv_offset;
813
814                         hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
815                         hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
816                 }
817         }
818
819         hantro_reg_write(vpu, &av1_segment_temp_upd_e,
820                          !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
821         hantro_reg_write(vpu, &av1_segment_upd_e,
822                          !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
823         hantro_reg_write(vpu, &av1_segment_e,
824                          !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
825
826         hantro_reg_write(vpu, &av1_error_resilient,
827                          !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
828
829         if (IS_INTRA(frame->frame_type) ||
830             !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
831                 hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
832         }
833
834         if (seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) {
835                 int s;
836
837                 for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
838                         if (seg->feature_enabled[s] &
839                             V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
840                                 segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
841                                     clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]),
842                                           0, 255);
843                                 segsign |=
844                                         (seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
845                         }
846
847                         if (seg->feature_enabled[s] &
848                             V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
849                                 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
850                                         clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]),
851                                               -63, 63);
852
853                         if (seg->feature_enabled[s] &
854                             V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
855                                 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
856                                     clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]),
857                                           -63, 63);
858
859                         if (seg->feature_enabled[s] &
860                             V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
861                                 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
862                                     clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]),
863                                           -63, 63);
864
865                         if (seg->feature_enabled[s] &
866                             V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
867                                 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
868                                     clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]),
869                                           -63, 63);
870
871                         if (frame->frame_type && seg->feature_enabled[s] &
872                             V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
873                                 segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
874
875                         if (seg->feature_enabled[s] &
876                             V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
877                                 segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
878
879                         if (seg->feature_enabled[s] &
880                             V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
881                                 segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
882                 }
883         }
884
885         for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
886                 for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
887                         if (seg->feature_enabled[i]
888                             & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
889                                 preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
890                                 last_active_seg = max(i, last_active_seg);
891                         }
892                 }
893         }
894
895         hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
896         hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
897
898         hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
899
900         /* Write QP, filter level, ref frame and skip for every segment */
901         hantro_reg_write(vpu, &av1_quant_seg0,
902                          segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
903         hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
904                          segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
905         hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
906                          segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
907         hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
908                          segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
909         hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
910                          segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
911         hantro_reg_write(vpu, &av1_refpic_seg0,
912                          segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
913         hantro_reg_write(vpu, &av1_skip_seg0,
914                          segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
915         hantro_reg_write(vpu, &av1_global_mv_seg0,
916                          segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
917
918         hantro_reg_write(vpu, &av1_quant_seg1,
919                          segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
920         hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
921                          segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
922         hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
923                          segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
924         hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
925                          segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
926         hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
927                          segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
928         hantro_reg_write(vpu, &av1_refpic_seg1,
929                          segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
930         hantro_reg_write(vpu, &av1_skip_seg1,
931                          segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
932         hantro_reg_write(vpu, &av1_global_mv_seg1,
933                          segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
934
935         hantro_reg_write(vpu, &av1_quant_seg2,
936                          segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
937         hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
938                          segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
939         hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
940                          segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
941         hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
942                          segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
943         hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
944                          segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
945         hantro_reg_write(vpu, &av1_refpic_seg2,
946                          segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
947         hantro_reg_write(vpu, &av1_skip_seg2,
948                          segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
949         hantro_reg_write(vpu, &av1_global_mv_seg2,
950                          segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
951
952         hantro_reg_write(vpu, &av1_quant_seg3,
953                          segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
954         hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
955                          segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
956         hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
957                          segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
958         hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
959                          segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
960         hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
961                          segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
962         hantro_reg_write(vpu, &av1_refpic_seg3,
963                          segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
964         hantro_reg_write(vpu, &av1_skip_seg3,
965                          segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
966         hantro_reg_write(vpu, &av1_global_mv_seg3,
967                          segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
968
969         hantro_reg_write(vpu, &av1_quant_seg4,
970                          segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
971         hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
972                          segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
973         hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
974                          segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
975         hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
976                          segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
977         hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
978                          segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
979         hantro_reg_write(vpu, &av1_refpic_seg4,
980                          segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
981         hantro_reg_write(vpu, &av1_skip_seg4,
982                          segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
983         hantro_reg_write(vpu, &av1_global_mv_seg4,
984                          segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
985
986         hantro_reg_write(vpu, &av1_quant_seg5,
987                          segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
988         hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
989                          segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
990         hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
991                          segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
992         hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
993                          segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
994         hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
995                          segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
996         hantro_reg_write(vpu, &av1_refpic_seg5,
997                          segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
998         hantro_reg_write(vpu, &av1_skip_seg5,
999                          segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
1000         hantro_reg_write(vpu, &av1_global_mv_seg5,
1001                          segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1002
1003         hantro_reg_write(vpu, &av1_quant_seg6,
1004                          segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
1005         hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
1006                          segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1007         hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
1008                          segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1009         hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
1010                          segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1011         hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
1012                          segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1013         hantro_reg_write(vpu, &av1_refpic_seg6,
1014                          segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
1015         hantro_reg_write(vpu, &av1_skip_seg6,
1016                          segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
1017         hantro_reg_write(vpu, &av1_global_mv_seg6,
1018                          segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1019
1020         hantro_reg_write(vpu, &av1_quant_seg7,
1021                          segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
1022         hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
1023                          segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1024         hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
1025                          segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1026         hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
1027                          segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1028         hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
1029                          segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1030         hantro_reg_write(vpu, &av1_refpic_seg7,
1031                          segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
1032         hantro_reg_write(vpu, &av1_skip_seg7,
1033                          segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
1034         hantro_reg_write(vpu, &av1_global_mv_seg7,
1035                          segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1036 }
1037
1038 static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
1039 {
1040         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1041         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1042         const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1043         const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
1044         const struct v4l2_av1_quantization *quantization = &frame->quantization;
1045         int i;
1046
1047         for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1048                 int qindex = quantization->base_q_idx;
1049
1050                 if (segmentation->feature_enabled[i] &
1051                     V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
1052                         qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
1053                 }
1054                 qindex = clamp(qindex, 0, 255);
1055
1056                 if (qindex ||
1057                     quantization->delta_q_y_dc ||
1058                     quantization->delta_q_u_dc ||
1059                     quantization->delta_q_u_ac ||
1060                     quantization->delta_q_v_dc ||
1061                     quantization->delta_q_v_ac)
1062                         return false;
1063         }
1064         return true;
1065 }
1066
1067 static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
1068 {
1069         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1070         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1071         const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1072         const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
1073         bool filtering_dis = (loop_filter->level[0] == 0) && (loop_filter->level[1] == 0);
1074         struct hantro_dev *vpu = ctx->dev;
1075
1076         hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
1077         hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
1078         hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
1079
1080         hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
1081         hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
1082         hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
1083         hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
1084
1085         if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED &&
1086             !rockchip_vpu981_av1_dec_is_lossless(ctx) &&
1087             !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
1088                 hantro_reg_write(vpu, &av1_filt_ref_adj_0,
1089                                  loop_filter->ref_deltas[0]);
1090                 hantro_reg_write(vpu, &av1_filt_ref_adj_1,
1091                                  loop_filter->ref_deltas[1]);
1092                 hantro_reg_write(vpu, &av1_filt_ref_adj_2,
1093                                  loop_filter->ref_deltas[2]);
1094                 hantro_reg_write(vpu, &av1_filt_ref_adj_3,
1095                                  loop_filter->ref_deltas[3]);
1096                 hantro_reg_write(vpu, &av1_filt_ref_adj_4,
1097                                  loop_filter->ref_deltas[4]);
1098                 hantro_reg_write(vpu, &av1_filt_ref_adj_5,
1099                                  loop_filter->ref_deltas[5]);
1100                 hantro_reg_write(vpu, &av1_filt_ref_adj_6,
1101                                  loop_filter->ref_deltas[6]);
1102                 hantro_reg_write(vpu, &av1_filt_ref_adj_7,
1103                                  loop_filter->ref_deltas[7]);
1104                 hantro_reg_write(vpu, &av1_filt_mb_adj_0,
1105                                  loop_filter->mode_deltas[0]);
1106                 hantro_reg_write(vpu, &av1_filt_mb_adj_1,
1107                                  loop_filter->mode_deltas[1]);
1108         } else {
1109                 hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
1110                 hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
1111                 hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
1112                 hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
1113                 hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
1114                 hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
1115                 hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
1116                 hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
1117                 hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
1118                 hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
1119         }
1120
1121         hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
1122         hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
1123 }
1124
1125 static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
1126 {
1127         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1128         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1129         const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1130         bool frame_is_intra = IS_INTRA(frame->frame_type);
1131         struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
1132         int i;
1133
1134         if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
1135                 return;
1136
1137         for (i = 0; i < NUM_REF_FRAMES; i++) {
1138                 if (frame->refresh_frame_flags & BIT(i)) {
1139                         struct mvcdfs stored_mv_cdf;
1140
1141                         rockchip_av1_get_cdfs(ctx, i);
1142                         stored_mv_cdf = av1_dec->cdfs->mv_cdf;
1143                         *av1_dec->cdfs = *out_cdfs;
1144                         if (frame_is_intra) {
1145                                 av1_dec->cdfs->mv_cdf = stored_mv_cdf;
1146                                 *av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
1147                         }
1148                         rockchip_av1_store_cdfs(ctx,
1149                                                 frame->refresh_frame_flags);
1150                         break;
1151                 }
1152         }
1153 }
1154
1155 void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
1156 {
1157         rockchip_vpu981_av1_dec_update_prob(ctx);
1158 }
1159
1160 static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
1161 {
1162         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1163         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1164         const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1165         const struct v4l2_av1_quantization *quantization = &frame->quantization;
1166         struct hantro_dev *vpu = ctx->dev;
1167         bool error_resilient_mode =
1168             !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
1169         bool frame_is_intra = IS_INTRA(frame->frame_type);
1170
1171         if (error_resilient_mode || frame_is_intra ||
1172             frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
1173                 av1_dec->cdfs = &av1_dec->default_cdfs;
1174                 av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
1175                 rockchip_av1_default_coeff_probs(quantization->base_q_idx,
1176                                                  av1_dec->cdfs);
1177         } else {
1178                 rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
1179         }
1180         rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
1181
1182         memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
1183
1184         if (frame_is_intra) {
1185                 int mv_offset = offsetof(struct av1cdfs, mv_cdf);
1186                 /* Overwrite MV context area with intrabc MV context */
1187                 memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
1188                        sizeof(struct mvcdfs));
1189         }
1190
1191         hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
1192         hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
1193 }
1194
1195 static void
1196 rockchip_vpu981_av1_dec_init_scaling_function(const u8 *values, const u8 *scaling,
1197                                               u8 num_points, u8 *scaling_lut)
1198 {
1199         int i, point;
1200
1201         if (num_points == 0) {
1202                 memset(scaling_lut, 0, 256);
1203                 return;
1204         }
1205
1206         for (point = 0; point < num_points - 1; point++) {
1207                 int x;
1208                 s32 delta_y = scaling[point + 1] - scaling[point];
1209                 s32 delta_x = values[point + 1] - values[point];
1210                 s64 delta =
1211                     delta_x ? delta_y * ((65536 + (delta_x >> 1)) /
1212                                          delta_x) : 0;
1213
1214                 for (x = 0; x < delta_x; x++) {
1215                         scaling_lut[values[point] + x] =
1216                             scaling[point] +
1217                             (s32)((x * delta + 32768) >> 16);
1218                 }
1219         }
1220
1221         for (i = values[num_points - 1]; i < 256; i++)
1222                 scaling_lut[i] = scaling[num_points - 1];
1223 }
1224
1225 static void rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx *ctx)
1226 {
1227         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1228         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1229         const struct v4l2_ctrl_av1_film_grain *film_grain = ctrls->film_grain;
1230         struct rockchip_av1_film_grain *fgmem = av1_dec->film_grain.cpu;
1231         struct hantro_dev *vpu = ctx->dev;
1232         bool scaling_from_luma =
1233                 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA);
1234         s32 (*ar_coeffs_y)[24];
1235         s32 (*ar_coeffs_cb)[25];
1236         s32 (*ar_coeffs_cr)[25];
1237         s32 (*luma_grain_block)[73][82];
1238         s32 (*cb_grain_block)[38][44];
1239         s32 (*cr_grain_block)[38][44];
1240         s32 ar_coeff_lag, ar_coeff_shift;
1241         s32 grain_scale_shift, bitdepth;
1242         s32 grain_center, grain_min, grain_max;
1243         int i, j;
1244
1245         hantro_reg_write(vpu, &av1_apply_grain, 0);
1246
1247         if (!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN)) {
1248                 hantro_reg_write(vpu, &av1_num_y_points_b, 0);
1249                 hantro_reg_write(vpu, &av1_num_cb_points_b, 0);
1250                 hantro_reg_write(vpu, &av1_num_cr_points_b, 0);
1251                 hantro_reg_write(vpu, &av1_scaling_shift, 0);
1252                 hantro_reg_write(vpu, &av1_cb_mult, 0);
1253                 hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1254                 hantro_reg_write(vpu, &av1_cb_offset, 0);
1255                 hantro_reg_write(vpu, &av1_cr_mult, 0);
1256                 hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1257                 hantro_reg_write(vpu, &av1_cr_offset, 0);
1258                 hantro_reg_write(vpu, &av1_overlap_flag, 0);
1259                 hantro_reg_write(vpu, &av1_clip_to_restricted_range, 0);
1260                 hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, 0);
1261                 hantro_reg_write(vpu, &av1_random_seed, 0);
1262                 hantro_write_addr(vpu, AV1_FILM_GRAIN, 0);
1263                 return;
1264         }
1265
1266         ar_coeffs_y = kzalloc(sizeof(int32_t) * 24, GFP_KERNEL);
1267         ar_coeffs_cb = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1268         ar_coeffs_cr = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1269         luma_grain_block = kzalloc(sizeof(int32_t) * 73 * 82, GFP_KERNEL);
1270         cb_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1271         cr_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1272
1273         if (!ar_coeffs_y || !ar_coeffs_cb || !ar_coeffs_cr ||
1274             !luma_grain_block || !cb_grain_block || !cr_grain_block) {
1275                 pr_warn("Fail allocating memory for film grain parameters\n");
1276                 goto alloc_fail;
1277         }
1278
1279         hantro_reg_write(vpu, &av1_apply_grain, 1);
1280
1281         hantro_reg_write(vpu, &av1_num_y_points_b,
1282                          film_grain->num_y_points > 0);
1283         hantro_reg_write(vpu, &av1_num_cb_points_b,
1284                          film_grain->num_cb_points > 0);
1285         hantro_reg_write(vpu, &av1_num_cr_points_b,
1286                          film_grain->num_cr_points > 0);
1287         hantro_reg_write(vpu, &av1_scaling_shift,
1288                          film_grain->grain_scaling_minus_8 + 8);
1289
1290         if (!scaling_from_luma) {
1291                 hantro_reg_write(vpu, &av1_cb_mult, film_grain->cb_mult - 128);
1292                 hantro_reg_write(vpu, &av1_cb_luma_mult, film_grain->cb_luma_mult - 128);
1293                 hantro_reg_write(vpu, &av1_cb_offset, film_grain->cb_offset - 256);
1294                 hantro_reg_write(vpu, &av1_cr_mult, film_grain->cr_mult - 128);
1295                 hantro_reg_write(vpu, &av1_cr_luma_mult, film_grain->cr_luma_mult - 128);
1296                 hantro_reg_write(vpu, &av1_cr_offset, film_grain->cr_offset - 256);
1297         } else {
1298                 hantro_reg_write(vpu, &av1_cb_mult, 0);
1299                 hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1300                 hantro_reg_write(vpu, &av1_cb_offset, 0);
1301                 hantro_reg_write(vpu, &av1_cr_mult, 0);
1302                 hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1303                 hantro_reg_write(vpu, &av1_cr_offset, 0);
1304         }
1305
1306         hantro_reg_write(vpu, &av1_overlap_flag,
1307                          !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_OVERLAP));
1308         hantro_reg_write(vpu, &av1_clip_to_restricted_range,
1309                          !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CLIP_TO_RESTRICTED_RANGE));
1310         hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, scaling_from_luma);
1311         hantro_reg_write(vpu, &av1_random_seed, film_grain->grain_seed);
1312
1313         rockchip_vpu981_av1_dec_init_scaling_function(film_grain->point_y_value,
1314                                                       film_grain->point_y_scaling,
1315                                                       film_grain->num_y_points,
1316                                                       fgmem->scaling_lut_y);
1317
1318         if (film_grain->flags &
1319             V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA) {
1320                 memcpy(fgmem->scaling_lut_cb, fgmem->scaling_lut_y,
1321                        sizeof(*fgmem->scaling_lut_y) * 256);
1322                 memcpy(fgmem->scaling_lut_cr, fgmem->scaling_lut_y,
1323                        sizeof(*fgmem->scaling_lut_y) * 256);
1324         } else {
1325                 rockchip_vpu981_av1_dec_init_scaling_function
1326                     (film_grain->point_cb_value, film_grain->point_cb_scaling,
1327                      film_grain->num_cb_points, fgmem->scaling_lut_cb);
1328                 rockchip_vpu981_av1_dec_init_scaling_function
1329                     (film_grain->point_cr_value, film_grain->point_cr_scaling,
1330                      film_grain->num_cr_points, fgmem->scaling_lut_cr);
1331         }
1332
1333         for (i = 0; i < V4L2_AV1_AR_COEFFS_SIZE; i++) {
1334                 if (i < 24)
1335                         (*ar_coeffs_y)[i] = film_grain->ar_coeffs_y_plus_128[i] - 128;
1336                 (*ar_coeffs_cb)[i] = film_grain->ar_coeffs_cb_plus_128[i] - 128;
1337                 (*ar_coeffs_cr)[i] = film_grain->ar_coeffs_cr_plus_128[i] - 128;
1338         }
1339
1340         ar_coeff_lag = film_grain->ar_coeff_lag;
1341         ar_coeff_shift = film_grain->ar_coeff_shift_minus_6 + 6;
1342         grain_scale_shift = film_grain->grain_scale_shift;
1343         bitdepth = ctx->bit_depth;
1344         grain_center = 128 << (bitdepth - 8);
1345         grain_min = 0 - grain_center;
1346         grain_max = (256 << (bitdepth - 8)) - 1 - grain_center;
1347
1348         rockchip_av1_generate_luma_grain_block(luma_grain_block, bitdepth,
1349                                                film_grain->num_y_points, grain_scale_shift,
1350                                                ar_coeff_lag, ar_coeffs_y, ar_coeff_shift,
1351                                                grain_min, grain_max, film_grain->grain_seed);
1352
1353         rockchip_av1_generate_chroma_grain_block(luma_grain_block, cb_grain_block,
1354                                                  cr_grain_block, bitdepth,
1355                                                  film_grain->num_y_points,
1356                                                  film_grain->num_cb_points,
1357                                                  film_grain->num_cr_points,
1358                                                  grain_scale_shift, ar_coeff_lag, ar_coeffs_cb,
1359                                                  ar_coeffs_cr, ar_coeff_shift, grain_min,
1360                                                  grain_max,
1361                                                  scaling_from_luma,
1362                                                  film_grain->grain_seed);
1363
1364         for (i = 0; i < 64; i++) {
1365                 for (j = 0; j < 64; j++)
1366                         fgmem->cropped_luma_grain_block[i * 64 + j] =
1367                                 (*luma_grain_block)[i + 9][j + 9];
1368         }
1369
1370         for (i = 0; i < 32; i++) {
1371                 for (j = 0; j < 32; j++) {
1372                         fgmem->cropped_chroma_grain_block[i * 64 + 2 * j] =
1373                                 (*cb_grain_block)[i + 6][j + 6];
1374                         fgmem->cropped_chroma_grain_block[i * 64 + 2 * j + 1] =
1375                                 (*cr_grain_block)[i + 6][j + 6];
1376                 }
1377         }
1378
1379         hantro_write_addr(vpu, AV1_FILM_GRAIN, av1_dec->film_grain.dma);
1380
1381 alloc_fail:
1382         kfree(ar_coeffs_y);
1383         kfree(ar_coeffs_cb);
1384         kfree(ar_coeffs_cr);
1385         kfree(luma_grain_block);
1386         kfree(cb_grain_block);
1387         kfree(cr_grain_block);
1388 }
1389
1390 static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
1391 {
1392         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1393         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1394         const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1395         const struct v4l2_av1_cdef *cdef = &frame->cdef;
1396         struct hantro_dev *vpu = ctx->dev;
1397         u32 luma_pri_strength = 0;
1398         u16 luma_sec_strength = 0;
1399         u32 chroma_pri_strength = 0;
1400         u16 chroma_sec_strength = 0;
1401         int i;
1402
1403         hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
1404         hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
1405
1406         for (i = 0; i < BIT(cdef->bits); i++) {
1407                 luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
1408                 if (cdef->y_sec_strength[i] == 4)
1409                         luma_sec_strength |= 3 << (i * 2);
1410                 else
1411                         luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
1412
1413                 chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
1414                 if (cdef->uv_sec_strength[i] == 4)
1415                         chroma_sec_strength |= 3 << (i * 2);
1416                 else
1417                         chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
1418         }
1419
1420         hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
1421                          luma_pri_strength);
1422         hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
1423                          luma_sec_strength);
1424         hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
1425                          chroma_pri_strength);
1426         hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
1427                          chroma_sec_strength);
1428
1429         hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
1430 }
1431
1432 static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
1433 {
1434         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1435         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1436         const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1437         const struct v4l2_av1_loop_restoration *loop_restoration =
1438             &frame->loop_restoration;
1439         struct hantro_dev *vpu = ctx->dev;
1440         u16 lr_type = 0, lr_unit_size = 0;
1441         u8 restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
1442         int i;
1443
1444         if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
1445                 restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
1446                 restoration_unit_size[1] =
1447                     1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1448                 restoration_unit_size[2] =
1449                     1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1450         }
1451
1452         for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
1453                 lr_type |=
1454                     loop_restoration->frame_restoration_type[i] << (i * 2);
1455                 lr_unit_size |= restoration_unit_size[i] << (i * 2);
1456         }
1457
1458         hantro_reg_write(vpu, &av1_lr_type, lr_type);
1459         hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
1460         hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
1461 }
1462
1463 static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
1464 {
1465         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1466         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1467         const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1468         struct hantro_dev *vpu = ctx->dev;
1469         u8 superres_scale_denominator = SCALE_NUMERATOR;
1470         int superres_luma_step = RS_SCALE_SUBPEL_BITS;
1471         int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
1472         int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
1473         int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
1474         int superres_init_luma_subpel_x = 0;
1475         int superres_init_chroma_subpel_x = 0;
1476         int superres_is_scaled = 0;
1477         int min_w = min_t(uint32_t, 16, frame->upscaled_width);
1478         int upscaled_luma, downscaled_luma;
1479         int downscaled_chroma, upscaled_chroma;
1480         int step_luma, step_chroma;
1481         int err_luma, err_chroma;
1482         int initial_luma, initial_chroma;
1483         int width = 0;
1484
1485         if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1486                 superres_scale_denominator = frame->superres_denom;
1487
1488         if (superres_scale_denominator <= SCALE_NUMERATOR)
1489                 goto set_regs;
1490
1491         width = (frame->upscaled_width * SCALE_NUMERATOR +
1492                 (superres_scale_denominator / 2)) / superres_scale_denominator;
1493
1494         if (width < min_w)
1495                 width = min_w;
1496
1497         if (width == frame->upscaled_width)
1498                 goto set_regs;
1499
1500         superres_is_scaled = 1;
1501         upscaled_luma = frame->upscaled_width;
1502         downscaled_luma = width;
1503         downscaled_chroma = (downscaled_luma + 1) >> 1;
1504         upscaled_chroma = (upscaled_luma + 1) >> 1;
1505         step_luma =
1506                 ((downscaled_luma << RS_SCALE_SUBPEL_BITS) +
1507                  (upscaled_luma / 2)) / upscaled_luma;
1508         step_chroma =
1509                 ((downscaled_chroma << RS_SCALE_SUBPEL_BITS) +
1510                  (upscaled_chroma / 2)) / upscaled_chroma;
1511         err_luma =
1512                 (upscaled_luma * step_luma)
1513                 - (downscaled_luma << RS_SCALE_SUBPEL_BITS);
1514         err_chroma =
1515                 (upscaled_chroma * step_chroma)
1516                 - (downscaled_chroma << RS_SCALE_SUBPEL_BITS);
1517         initial_luma =
1518                 ((-((upscaled_luma - downscaled_luma) << (RS_SCALE_SUBPEL_BITS - 1))
1519                   + upscaled_luma / 2)
1520                  / upscaled_luma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_luma / 2)
1521                 & RS_SCALE_SUBPEL_MASK;
1522         initial_chroma =
1523                 ((-((upscaled_chroma - downscaled_chroma) << (RS_SCALE_SUBPEL_BITS - 1))
1524                   + upscaled_chroma / 2)
1525                  / upscaled_chroma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_chroma / 2)
1526                 & RS_SCALE_SUBPEL_MASK;
1527         superres_luma_step = step_luma;
1528         superres_chroma_step = step_chroma;
1529         superres_luma_step_invra =
1530                 ((upscaled_luma << RS_SCALE_SUBPEL_BITS) + (downscaled_luma / 2))
1531                 / downscaled_luma;
1532         superres_chroma_step_invra =
1533                 ((upscaled_chroma << RS_SCALE_SUBPEL_BITS) + (downscaled_chroma / 2))
1534                 / downscaled_chroma;
1535         superres_init_luma_subpel_x = initial_luma;
1536         superres_init_chroma_subpel_x = initial_chroma;
1537
1538 set_regs:
1539         hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
1540
1541         if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1542                 hantro_reg_write(vpu, &av1_scale_denom_minus9,
1543                                  frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
1544         else
1545                 hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
1546
1547         hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
1548         hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
1549         hantro_reg_write(vpu, &av1_superres_luma_step_invra,
1550                          superres_luma_step_invra);
1551         hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
1552                          superres_chroma_step_invra);
1553         hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
1554                          superres_init_luma_subpel_x);
1555         hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
1556                          superres_init_chroma_subpel_x);
1557         hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
1558
1559         hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
1560 }
1561
1562 static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
1563 {
1564         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1565         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1566         const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1567         struct hantro_dev *vpu = ctx->dev;
1568         int pic_width_in_cbs = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1569         int pic_height_in_cbs = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1570         int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
1571                             - (frame->frame_width_minus_1 + 1);
1572         int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
1573                              - (frame->frame_height_minus_1 + 1);
1574
1575         hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
1576         hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
1577         hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
1578         hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
1579
1580         rockchip_vpu981_av1_dec_set_superres_params(ctx);
1581 }
1582
1583 static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
1584 {
1585         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1586         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1587         const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1588         struct hantro_dev *vpu = ctx->dev;
1589         bool use_ref_frame_mvs =
1590             !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
1591         int cur_frame_offset = frame->order_hint;
1592         int alt_frame_offset = 0;
1593         int gld_frame_offset = 0;
1594         int bwd_frame_offset = 0;
1595         int alt2_frame_offset = 0;
1596         int refs_selected[3] = { 0, 0, 0 };
1597         int cur_mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1598         int cur_mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1599         int cur_offset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1600         int cur_roffset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1601         int mf_types[3] = { 0, 0, 0 };
1602         int ref_stamp = 2;
1603         int ref_ind = 0;
1604         int rf, idx;
1605
1606         alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
1607         gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
1608         bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
1609         alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
1610
1611         idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
1612         if (idx >= 0) {
1613                 int alt_frame_offset_in_lst =
1614                         av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
1615                 bool is_lst_overlay =
1616                     (alt_frame_offset_in_lst == gld_frame_offset);
1617
1618                 if (!is_lst_overlay) {
1619                         int lst_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1620                         int lst_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1621                         bool lst_intra_only =
1622                             IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1623
1624                         if (lst_mi_cols == cur_mi_cols &&
1625                             lst_mi_rows == cur_mi_rows && !lst_intra_only) {
1626                                 mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
1627                                 refs_selected[ref_ind++] = LST_BUF_IDX;
1628                         }
1629                 }
1630                 ref_stamp--;
1631         }
1632
1633         idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
1634         if (rockchip_vpu981_av1_dec_get_dist(ctx, bwd_frame_offset, cur_frame_offset) > 0) {
1635                 int bwd_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1636                 int bwd_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1637                 bool bwd_intra_only =
1638                     IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1639
1640                 if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
1641                     !bwd_intra_only) {
1642                         mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
1643                         refs_selected[ref_ind++] = BWD_BUF_IDX;
1644                         ref_stamp--;
1645                 }
1646         }
1647
1648         idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
1649         if (rockchip_vpu981_av1_dec_get_dist(ctx, alt2_frame_offset, cur_frame_offset) > 0) {
1650                 int alt2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1651                 int alt2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1652                 bool alt2_intra_only =
1653                     IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1654
1655                 if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows &&
1656                     !alt2_intra_only) {
1657                         mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
1658                         refs_selected[ref_ind++] = ALT2_BUF_IDX;
1659                         ref_stamp--;
1660                 }
1661         }
1662
1663         idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
1664         if (rockchip_vpu981_av1_dec_get_dist(ctx, alt_frame_offset, cur_frame_offset) > 0 &&
1665             ref_stamp >= 0) {
1666                 int alt_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1667                 int alt_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1668                 bool alt_intra_only =
1669                     IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1670
1671                 if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
1672                     !alt_intra_only) {
1673                         mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
1674                         refs_selected[ref_ind++] = ALT_BUF_IDX;
1675                         ref_stamp--;
1676                 }
1677         }
1678
1679         idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
1680         if (idx >= 0 && ref_stamp >= 0) {
1681                 int lst2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1682                 int lst2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1683                 bool lst2_intra_only =
1684                     IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1685
1686                 if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows &&
1687                     !lst2_intra_only) {
1688                         mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
1689                         refs_selected[ref_ind++] = LST2_BUF_IDX;
1690                         ref_stamp--;
1691                 }
1692         }
1693
1694         for (rf = 0; rf < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; ++rf) {
1695                 idx = rockchip_vpu981_get_frame_index(ctx, rf);
1696                 if (idx >= 0) {
1697                         int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
1698
1699                         cur_offset[rf] =
1700                             rockchip_vpu981_av1_dec_get_dist(ctx, cur_frame_offset, rf_order_hint);
1701                         cur_roffset[rf] =
1702                             rockchip_vpu981_av1_dec_get_dist(ctx, rf_order_hint, cur_frame_offset);
1703                 } else {
1704                         cur_offset[rf] = 0;
1705                         cur_roffset[rf] = 0;
1706                 }
1707         }
1708
1709         hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
1710         hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
1711         hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
1712         hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
1713
1714         hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
1715         hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
1716         hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
1717         hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
1718         hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
1719         hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
1720         hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
1721
1722         if (use_ref_frame_mvs && ref_ind > 0 &&
1723             cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1724             cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1725                 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
1726                 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
1727                 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1728                 int val;
1729
1730                 hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
1731
1732                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1733                 hantro_reg_write(vpu, &av1_mf1_last_offset, val);
1734
1735                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1736                 hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
1737
1738                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1739                 hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
1740
1741                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1742                 hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
1743
1744                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1745                 hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
1746
1747                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1748                 hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
1749
1750                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1751                 hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
1752         }
1753
1754         hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
1755         hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
1756         hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
1757         hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
1758         hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
1759         hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
1760         hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
1761
1762         if (use_ref_frame_mvs && ref_ind > 1 &&
1763             cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1764             cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1765                 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
1766                 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
1767                 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1768                 int val;
1769
1770                 hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
1771
1772                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1773                 hantro_reg_write(vpu, &av1_mf2_last_offset, val);
1774
1775                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1776                 hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
1777
1778                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1779                 hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
1780
1781                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1782                 hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
1783
1784                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1785                 hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
1786
1787                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1788                 hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
1789
1790                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1791                 hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
1792         }
1793
1794         hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
1795         hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
1796         hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
1797         hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
1798         hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
1799         hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
1800         hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
1801
1802         if (use_ref_frame_mvs && ref_ind > 2 &&
1803             cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1804             cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1805                 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
1806                 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
1807                 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1808                 int val;
1809
1810                 hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
1811
1812                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1813                 hantro_reg_write(vpu, &av1_mf3_last_offset, val);
1814
1815                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1816                 hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
1817
1818                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1819                 hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
1820
1821                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1822                 hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
1823
1824                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1825                 hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
1826
1827                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1828                 hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
1829
1830                 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1831                 hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
1832         }
1833
1834         hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
1835         hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
1836         hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
1837         hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
1838         hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
1839         hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
1840         hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
1841
1842         hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
1843         hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
1844         hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
1845         hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
1846         hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
1847         hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
1848         hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
1849
1850         hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
1851         hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
1852         hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
1853 }
1854
1855 static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
1856 {
1857         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1858         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1859         const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1860         int frame_type = frame->frame_type;
1861         bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
1862         int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
1863         struct hantro_dev *vpu = ctx->dev;
1864         int i, ref_frames = 0;
1865         bool scale_enable = false;
1866
1867         if (IS_INTRA(frame_type) && !allow_intrabc)
1868                 return;
1869
1870         if (!allow_intrabc) {
1871                 for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1872                         int idx = rockchip_vpu981_get_frame_index(ctx, i);
1873
1874                         if (idx >= 0)
1875                                 ref_count[idx]++;
1876                 }
1877
1878                 for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
1879                         if (ref_count[i])
1880                                 ref_frames++;
1881                 }
1882         } else {
1883                 ref_frames = 1;
1884         }
1885         hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
1886
1887         rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
1888
1889         for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
1890                 u32 ref = i - 1;
1891                 int idx = 0;
1892                 int width, height;
1893
1894                 if (allow_intrabc) {
1895                         idx = av1_dec->current_frame_index;
1896                         width = frame->frame_width_minus_1 + 1;
1897                         height = frame->frame_height_minus_1 + 1;
1898                 } else {
1899                         if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
1900                                 idx = rockchip_vpu981_get_frame_index(ctx, ref);
1901                         width = av1_dec->frame_refs[idx].width;
1902                         height = av1_dec->frame_refs[idx].height;
1903                 }
1904
1905                 scale_enable |=
1906                     rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
1907                                                     height);
1908
1909                 rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
1910                                                       av1_dec->ref_frame_sign_bias[i]);
1911         }
1912         hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
1913
1914         hantro_reg_write(vpu, &av1_ref0_gm_mode,
1915                          frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
1916         hantro_reg_write(vpu, &av1_ref1_gm_mode,
1917                          frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
1918         hantro_reg_write(vpu, &av1_ref2_gm_mode,
1919                          frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
1920         hantro_reg_write(vpu, &av1_ref3_gm_mode,
1921                          frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
1922         hantro_reg_write(vpu, &av1_ref4_gm_mode,
1923                          frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
1924         hantro_reg_write(vpu, &av1_ref5_gm_mode,
1925                          frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
1926         hantro_reg_write(vpu, &av1_ref6_gm_mode,
1927                          frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
1928
1929         rockchip_vpu981_av1_dec_set_other_frames(ctx);
1930 }
1931
1932 static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
1933 {
1934         struct hantro_dev *vpu = ctx->dev;
1935         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1936         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1937
1938         hantro_reg_write(vpu, &av1_skip_mode,
1939                          !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
1940         hantro_reg_write(vpu, &av1_tempor_mvp_e,
1941                          !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
1942         hantro_reg_write(vpu, &av1_delta_lf_res_log,
1943                          ctrls->frame->loop_filter.delta_lf_res);
1944         hantro_reg_write(vpu, &av1_delta_lf_multi,
1945                          !!(ctrls->frame->loop_filter.flags
1946                             & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
1947         hantro_reg_write(vpu, &av1_delta_lf_present,
1948                          !!(ctrls->frame->loop_filter.flags
1949                             & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
1950         hantro_reg_write(vpu, &av1_disable_cdf_update,
1951                          !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
1952         hantro_reg_write(vpu, &av1_allow_warp,
1953                          !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
1954         hantro_reg_write(vpu, &av1_show_frame,
1955                          !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
1956         hantro_reg_write(vpu, &av1_switchable_motion_mode,
1957                          !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
1958         hantro_reg_write(vpu, &av1_enable_cdef,
1959                          !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
1960         hantro_reg_write(vpu, &av1_allow_masked_compound,
1961                          !!(ctrls->sequence->flags
1962                             & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
1963         hantro_reg_write(vpu, &av1_allow_interintra,
1964                          !!(ctrls->sequence->flags
1965                             & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
1966         hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
1967                          !!(ctrls->sequence->flags
1968                             & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
1969         hantro_reg_write(vpu, &av1_allow_filter_intra,
1970                          !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
1971         hantro_reg_write(vpu, &av1_enable_jnt_comp,
1972                          !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
1973         hantro_reg_write(vpu, &av1_enable_dual_filter,
1974                          !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
1975         hantro_reg_write(vpu, &av1_reduced_tx_set_used,
1976                          !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
1977         hantro_reg_write(vpu, &av1_allow_screen_content_tools,
1978                          !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
1979         hantro_reg_write(vpu, &av1_allow_intrabc,
1980                          !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
1981
1982         if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
1983                 hantro_reg_write(vpu, &av1_force_interger_mv, 0);
1984         else
1985                 hantro_reg_write(vpu, &av1_force_interger_mv,
1986                                  !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
1987
1988         hantro_reg_write(vpu, &av1_blackwhite_e, 0);
1989         hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
1990         hantro_reg_write(vpu, &av1_delta_q_present,
1991                          !!(ctrls->frame->quantization.flags
1992                             & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
1993
1994         hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
1995         hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
1996         hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
1997         hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
1998
1999         hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
2000         hantro_reg_write(vpu, &av1_high_prec_mv_e,
2001                          !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
2002         hantro_reg_write(vpu, &av1_comp_pred_mode,
2003                          (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
2004         hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
2005         hantro_reg_write(vpu, &av1_max_cb_size,
2006                          (ctrls->sequence->flags
2007                           & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
2008         hantro_reg_write(vpu, &av1_min_cb_size, 3);
2009
2010         hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
2011         hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
2012         hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
2013         hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
2014         hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
2015         hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
2016         hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
2017         hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
2018         hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
2019         hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
2020         hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
2021
2022         hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
2023         hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
2024         hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
2025         if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
2026                 hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
2027                 hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
2028                 hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
2029         } else {
2030                 hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
2031                 hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
2032                 hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
2033         }
2034
2035         hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
2036         hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
2037         hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
2038
2039         hantro_reg_write(vpu, &av1_skip_ref0,
2040                          (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
2041         hantro_reg_write(vpu, &av1_skip_ref1,
2042                          (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
2043
2044         hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
2045         hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
2046 }
2047
2048 static void
2049 rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
2050                                          struct vb2_v4l2_buffer *vb2_src)
2051 {
2052         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2053         struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
2054         const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
2055             ctrls->tile_group_entry;
2056         struct hantro_dev *vpu = ctx->dev;
2057         dma_addr_t src_dma;
2058         u32 src_len, src_buf_len;
2059         int start_bit, offset;
2060
2061         src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
2062         src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
2063         src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
2064
2065         start_bit = (group_entry[0].tile_offset & 0xf) * 8;
2066         offset = group_entry[0].tile_offset & ~0xf;
2067
2068         hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
2069         hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
2070         hantro_reg_write(vpu, &av1_stream_len, src_len);
2071         hantro_reg_write(vpu, &av1_strm_start_offset, 0);
2072         hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
2073 }
2074
2075 static void
2076 rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
2077 {
2078         struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2079         struct hantro_dev *vpu = ctx->dev;
2080         struct hantro_decoded_buffer *dst;
2081         struct vb2_v4l2_buffer *vb2_dst;
2082         dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
2083         size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
2084         size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
2085
2086         vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
2087         dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
2088         luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
2089         chroma_addr = luma_addr + cr_offset;
2090         mv_addr = luma_addr + mv_offset;
2091
2092         hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
2093         hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
2094         hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
2095 }
2096
2097 int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
2098 {
2099         struct hantro_dev *vpu = ctx->dev;
2100         struct vb2_v4l2_buffer *vb2_src;
2101         int ret;
2102
2103         hantro_start_prepare_run(ctx);
2104
2105         ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
2106         if (ret)
2107                 goto prepare_error;
2108
2109         vb2_src = hantro_get_src_buf(ctx);
2110         if (!vb2_src) {
2111                 ret = -EINVAL;
2112                 goto prepare_error;
2113         }
2114
2115         rockchip_vpu981_av1_dec_clean_refs(ctx);
2116         rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
2117
2118         rockchip_vpu981_av1_dec_set_parameters(ctx);
2119         rockchip_vpu981_av1_dec_set_global_model(ctx);
2120         rockchip_vpu981_av1_dec_set_tile_info(ctx);
2121         rockchip_vpu981_av1_dec_set_reference_frames(ctx);
2122         rockchip_vpu981_av1_dec_set_segmentation(ctx);
2123         rockchip_vpu981_av1_dec_set_loopfilter(ctx);
2124         rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
2125         rockchip_vpu981_av1_dec_set_cdef(ctx);
2126         rockchip_vpu981_av1_dec_set_lr(ctx);
2127         rockchip_vpu981_av1_dec_set_fgs(ctx);
2128         rockchip_vpu981_av1_dec_set_prob(ctx);
2129
2130         hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
2131         hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
2132         hantro_reg_write(vpu, &av1_write_mvs_e, 1);
2133         hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
2134         hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
2135
2136         hantro_reg_write(vpu, &av1_dec_abort_e, 0);
2137         hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
2138
2139         hantro_reg_write(vpu, &av1_dec_alignment, 64);
2140         hantro_reg_write(vpu, &av1_apf_disable, 0);
2141         hantro_reg_write(vpu, &av1_apf_threshold, 8);
2142         hantro_reg_write(vpu, &av1_dec_buswidth, 2);
2143         hantro_reg_write(vpu, &av1_dec_max_burst, 16);
2144         hantro_reg_write(vpu, &av1_error_conceal_e, 0);
2145         hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
2146         hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
2147
2148         hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
2149         hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
2150         hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
2151         hantro_reg_write(vpu, &av1_timeout_override_e, 1);
2152
2153         rockchip_vpu981_av1_dec_set_output_buffer(ctx);
2154         rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
2155
2156         hantro_end_prepare_run(ctx);
2157
2158         hantro_reg_write(vpu, &av1_dec_e, 1);
2159
2160         return 0;
2161
2162 prepare_error:
2163         hantro_end_prepare_run(ctx);
2164         hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
2165         return ret;
2166 }
2167
2168 static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
2169 {
2170         struct hantro_dev *vpu = ctx->dev;
2171         int width = ctx->dst_fmt.width;
2172         int height = ctx->dst_fmt.height;
2173         struct vb2_v4l2_buffer *vb2_dst;
2174         size_t chroma_offset;
2175         dma_addr_t dst_dma;
2176
2177         vb2_dst = hantro_get_dst_buf(ctx);
2178
2179         dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
2180         chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
2181             ctx->dst_fmt.height;
2182
2183         /* enable post processor */
2184         hantro_reg_write(vpu, &av1_pp_out_e, 1);
2185         hantro_reg_write(vpu, &av1_pp_in_format, 0);
2186         hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
2187         hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
2188
2189         hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
2190         hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
2191         hantro_reg_write(vpu, &av1_pp_out_height, height);
2192         hantro_reg_write(vpu, &av1_pp_out_width, width);
2193         hantro_reg_write(vpu, &av1_pp_out_y_stride,
2194                          ctx->dst_fmt.plane_fmt[0].bytesperline);
2195         hantro_reg_write(vpu, &av1_pp_out_c_stride,
2196                          ctx->dst_fmt.plane_fmt[0].bytesperline);
2197         switch (ctx->dst_fmt.pixelformat) {
2198         case V4L2_PIX_FMT_P010:
2199                 hantro_reg_write(vpu, &av1_pp_out_format, 1);
2200                 break;
2201         case V4L2_PIX_FMT_NV12:
2202                 hantro_reg_write(vpu, &av1_pp_out_format, 3);
2203                 break;
2204         default:
2205                 hantro_reg_write(vpu, &av1_pp_out_format, 0);
2206         }
2207
2208         hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
2209         hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
2210         hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
2211         hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
2212         hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
2213         hantro_reg_write(vpu, &av1_pp_up_level, 0);
2214         hantro_reg_write(vpu, &av1_pp_down_level, 0);
2215         hantro_reg_write(vpu, &av1_pp_exist, 0);
2216
2217         hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
2218         hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
2219 }
2220
2221 static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
2222 {
2223         struct hantro_dev *vpu = ctx->dev;
2224
2225         /* disable post processor */
2226         hantro_reg_write(vpu, &av1_pp_out_e, 0);
2227 }
2228
2229 const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
2230         .enable = rockchip_vpu981_postproc_enable,
2231         .disable = rockchip_vpu981_postproc_disable,
2232 };