1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2023, Collabora
5 * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
8 #include <media/v4l2-mem2mem.h>
10 #include "hantro_v4l2.h"
11 #include "rockchip_vpu981_regs.h"
13 #define AV1_DEC_MODE 17
14 #define GM_GLOBAL_MODELS_PER_FRAME 7
15 #define GLOBAL_MODEL_TOTAL_SIZE (6 * 4 + 4 * 2)
16 #define GLOBAL_MODEL_SIZE ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
17 #define AV1_MAX_TILES 128
18 #define AV1_TILE_INFO_SIZE (AV1_MAX_TILES * 16)
19 #define AV1DEC_MAX_PIC_BUFFERS 24
20 #define AV1_REF_SCALE_SHIFT 14
21 #define AV1_INVALID_IDX -1
22 #define MAX_FRAME_DISTANCE 31
23 #define AV1_PRIMARY_REF_NONE 7
24 #define AV1_TILE_SIZE ALIGN(32 * 128, 4096)
26 * These 3 values aren't defined enum v4l2_av1_segment_feature because
27 * they are not part of the specification
29 #define V4L2_AV1_SEG_LVL_ALT_LF_Y_H 2
30 #define V4L2_AV1_SEG_LVL_ALT_LF_U 3
31 #define V4L2_AV1_SEG_LVL_ALT_LF_V 4
33 #define SUPERRES_SCALE_BITS 3
34 #define SCALE_NUMERATOR 8
35 #define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
37 #define RS_SUBPEL_BITS 6
38 #define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
39 #define RS_SCALE_SUBPEL_BITS 14
40 #define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
41 #define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
42 #define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
44 #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
46 #define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
47 #define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
48 #define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
49 #define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
50 #define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
51 #define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
52 #define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
54 #define DIV_LUT_PREC_BITS 14
55 #define DIV_LUT_BITS 8
56 #define DIV_LUT_NUM BIT(DIV_LUT_BITS)
57 #define WARP_PARAM_REDUCE_BITS 6
58 #define WARPEDMODEL_PREC_BITS 16
60 #define AV1_DIV_ROUND_UP_POW2(value, n) \
63 typeof(value) _value = value; \
64 (_value + (BIT(_n) >> 1)) >> _n; \
67 #define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n) \
70 typeof(value) _value_ = value; \
71 (((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_)) \
72 : AV1_DIV_ROUND_UP_POW2((_value_), (_n_))); \
75 struct rockchip_av1_film_grain {
76 u8 scaling_lut_y[256];
77 u8 scaling_lut_cb[256];
78 u8 scaling_lut_cr[256];
79 s16 cropped_luma_grain_block[4096];
80 s16 cropped_chroma_grain_block[1024 * 2];
83 static const short div_lut[DIV_LUT_NUM + 1] = {
84 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
85 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
86 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
87 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
88 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
89 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
90 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
91 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
92 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
93 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
94 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
95 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
96 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
97 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
98 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
99 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732,
100 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489,
101 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259,
102 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039,
103 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830,
104 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630,
105 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439,
106 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257,
107 8240, 8224, 8208, 8192,
110 static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
112 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
113 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
114 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
116 int i, idx = frame->ref_frame_idx[ref];
118 if (idx >= V4L2_AV1_TOTAL_REFS_PER_FRAME || idx < 0)
119 return AV1_INVALID_IDX;
121 timestamp = frame->reference_frame_ts[idx];
122 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
123 if (!av1_dec->frame_refs[i].used)
125 if (av1_dec->frame_refs[i].timestamp == timestamp)
129 return AV1_INVALID_IDX;
132 static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
134 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
135 int idx = rockchip_vpu981_get_frame_index(ctx, ref);
137 if (idx != AV1_INVALID_IDX)
138 return av1_dec->frame_refs[idx].order_hint;
143 static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
146 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
147 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
148 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
151 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
154 if (av1_dec->frame_refs[i].used)
157 av1_dec->frame_refs[i].width = frame->frame_width_minus_1 + 1;
158 av1_dec->frame_refs[i].height = frame->frame_height_minus_1 + 1;
159 av1_dec->frame_refs[i].mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
160 av1_dec->frame_refs[i].mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
161 av1_dec->frame_refs[i].timestamp = timestamp;
162 av1_dec->frame_refs[i].frame_type = frame->frame_type;
163 av1_dec->frame_refs[i].order_hint = frame->order_hint;
164 if (!av1_dec->frame_refs[i].vb2_ref)
165 av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
167 for (j = 0; j < V4L2_AV1_TOTAL_REFS_PER_FRAME; j++)
168 av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
169 av1_dec->frame_refs[i].used = true;
170 av1_dec->current_frame_index = i;
175 return AV1_INVALID_IDX;
178 static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
180 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
183 av1_dec->frame_refs[idx].used = false;
186 static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
188 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
189 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
193 for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
194 u64 timestamp = av1_dec->frame_refs[idx].timestamp;
197 if (!av1_dec->frame_refs[idx].used)
200 for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
201 if (ctrls->frame->reference_frame_ts[ref] == timestamp)
206 rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
210 static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
212 return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
215 static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
217 size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
219 return ALIGN((cr_offset * 3) / 2, 64);
222 static void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
224 struct hantro_dev *vpu = ctx->dev;
225 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
227 if (av1_dec->db_data_col.cpu)
228 dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
229 av1_dec->db_data_col.cpu,
230 av1_dec->db_data_col.dma);
231 av1_dec->db_data_col.cpu = NULL;
233 if (av1_dec->db_ctrl_col.cpu)
234 dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
235 av1_dec->db_ctrl_col.cpu,
236 av1_dec->db_ctrl_col.dma);
237 av1_dec->db_ctrl_col.cpu = NULL;
239 if (av1_dec->cdef_col.cpu)
240 dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
241 av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
242 av1_dec->cdef_col.cpu = NULL;
244 if (av1_dec->sr_col.cpu)
245 dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
246 av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
247 av1_dec->sr_col.cpu = NULL;
249 if (av1_dec->lr_col.cpu)
250 dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
251 av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
252 av1_dec->lr_col.cpu = NULL;
255 static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
257 struct hantro_dev *vpu = ctx->dev;
258 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
259 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
260 unsigned int num_tile_cols = 1 << ctrls->tile_group_entry->tile_col;
261 unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
262 unsigned int height_in_sb = height / 64;
263 unsigned int stripe_num = ((height + 8) + 63) / 64;
266 if (av1_dec->db_data_col.size >=
267 ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols)
270 rockchip_vpu981_av1_dec_tiles_free(ctx);
272 size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
273 av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
274 &av1_dec->db_data_col.dma,
276 if (!av1_dec->db_data_col.cpu)
277 goto buffer_allocation_error;
278 av1_dec->db_data_col.size = size;
280 size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
281 av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
282 &av1_dec->db_ctrl_col.dma,
284 if (!av1_dec->db_ctrl_col.cpu)
285 goto buffer_allocation_error;
286 av1_dec->db_ctrl_col.size = size;
288 size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
289 av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
290 &av1_dec->cdef_col.dma,
292 if (!av1_dec->cdef_col.cpu)
293 goto buffer_allocation_error;
294 av1_dec->cdef_col.size = size;
296 size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
297 av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
298 &av1_dec->sr_col.dma,
300 if (!av1_dec->sr_col.cpu)
301 goto buffer_allocation_error;
302 av1_dec->sr_col.size = size;
304 size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
305 av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
306 &av1_dec->lr_col.dma,
308 if (!av1_dec->lr_col.cpu)
309 goto buffer_allocation_error;
310 av1_dec->lr_col.size = size;
312 av1_dec->num_tile_cols_allocated = num_tile_cols;
315 buffer_allocation_error:
316 rockchip_vpu981_av1_dec_tiles_free(ctx);
320 void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
322 struct hantro_dev *vpu = ctx->dev;
323 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
325 if (av1_dec->global_model.cpu)
326 dma_free_coherent(vpu->dev, av1_dec->global_model.size,
327 av1_dec->global_model.cpu,
328 av1_dec->global_model.dma);
329 av1_dec->global_model.cpu = NULL;
331 if (av1_dec->tile_info.cpu)
332 dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
333 av1_dec->tile_info.cpu,
334 av1_dec->tile_info.dma);
335 av1_dec->tile_info.cpu = NULL;
337 if (av1_dec->film_grain.cpu)
338 dma_free_coherent(vpu->dev, av1_dec->film_grain.size,
339 av1_dec->film_grain.cpu,
340 av1_dec->film_grain.dma);
341 av1_dec->film_grain.cpu = NULL;
343 if (av1_dec->prob_tbl.cpu)
344 dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
345 av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
346 av1_dec->prob_tbl.cpu = NULL;
348 if (av1_dec->prob_tbl_out.cpu)
349 dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
350 av1_dec->prob_tbl_out.cpu,
351 av1_dec->prob_tbl_out.dma);
352 av1_dec->prob_tbl_out.cpu = NULL;
354 if (av1_dec->tile_buf.cpu)
355 dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
356 av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
357 av1_dec->tile_buf.cpu = NULL;
359 rockchip_vpu981_av1_dec_tiles_free(ctx);
362 int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
364 struct hantro_dev *vpu = ctx->dev;
365 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
367 memset(av1_dec, 0, sizeof(*av1_dec));
369 av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
370 &av1_dec->global_model.dma,
372 if (!av1_dec->global_model.cpu)
374 av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
376 av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
377 &av1_dec->tile_info.dma,
379 if (!av1_dec->tile_info.cpu)
381 av1_dec->tile_info.size = AV1_MAX_TILES;
383 av1_dec->film_grain.cpu = dma_alloc_coherent(vpu->dev,
384 ALIGN(sizeof(struct rockchip_av1_film_grain), 2048),
385 &av1_dec->film_grain.dma,
387 if (!av1_dec->film_grain.cpu)
389 av1_dec->film_grain.size = ALIGN(sizeof(struct rockchip_av1_film_grain), 2048);
391 av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
392 ALIGN(sizeof(struct av1cdfs), 2048),
393 &av1_dec->prob_tbl.dma,
395 if (!av1_dec->prob_tbl.cpu)
397 av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
399 av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
400 ALIGN(sizeof(struct av1cdfs), 2048),
401 &av1_dec->prob_tbl_out.dma,
403 if (!av1_dec->prob_tbl_out.cpu)
405 av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
406 av1_dec->cdfs = &av1_dec->default_cdfs;
407 av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
409 rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
411 av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
413 &av1_dec->tile_buf.dma,
415 if (!av1_dec->tile_buf.cpu)
417 av1_dec->tile_buf.size = AV1_TILE_SIZE;
422 static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
424 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
425 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
427 ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
428 if (WARN_ON(!ctrls->sequence))
431 ctrls->tile_group_entry =
432 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
433 if (WARN_ON(!ctrls->tile_group_entry))
436 ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
437 if (WARN_ON(!ctrls->frame))
441 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
443 return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
446 static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
450 return 31 ^ __builtin_clz(n);
453 static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
458 *shift = rockchip_vpu981_av1_dec_get_msb(d);
459 /* e is obtained from D after resetting the most significant 1 bit. */
460 e = d - ((u32)1 << *shift);
461 /* Get the most significant DIV_LUT_BITS (8) bits of e into f */
462 if (*shift > DIV_LUT_BITS)
463 f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
465 f = e << (DIV_LUT_BITS - *shift);
468 *shift += DIV_LUT_PREC_BITS;
469 /* Use f as lookup into the precomputed table of multipliers */
474 rockchip_vpu981_av1_dec_get_shear_params(const u32 *params, s64 *alpha,
475 s64 *beta, s64 *gamma, s64 *delta)
477 const int *mat = params;
485 *alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
486 *beta = clamp_val(mat[3], S16_MIN, S16_MAX);
488 y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
490 gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
492 *gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
494 dv = ((long long)mat[3] * mat[4]) * y;
495 *delta = clamp_val(mat[5] -
496 (int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
499 *alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
500 * (1 << WARP_PARAM_REDUCE_BITS);
501 *beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
502 * (1 << WARP_PARAM_REDUCE_BITS);
503 *gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
504 * (1 << WARP_PARAM_REDUCE_BITS);
505 *delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
506 * (1 << WARP_PARAM_REDUCE_BITS);
509 static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
511 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
512 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
513 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
514 const struct v4l2_av1_global_motion *gm = &frame->global_motion;
515 u8 *dst = av1_dec->global_model.cpu;
516 struct hantro_dev *vpu = ctx->dev;
519 memset(dst, 0, GLOBAL_MODEL_SIZE);
520 for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
521 s64 alpha = 0, beta = 0, gamma = 0, delta = 0;
523 for (i = 0; i < 6; ++i) {
526 gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
529 gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
532 gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
536 if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
537 rockchip_vpu981_av1_dec_get_shear_params(&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
538 &alpha, &beta, &gamma, &delta);
550 hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
553 static int rockchip_vpu981_av1_tile_log2(int target)
558 * returns the smallest value for k such that 1 << k is greater
559 * than or equal to target
561 for (k = 0; (1 << k) < target; k++);
566 static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
568 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
569 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
570 const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
571 const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
572 ctrls->tile_group_entry;
573 int context_update_y =
574 tile_info->context_update_tile_id / tile_info->tile_cols;
575 int context_update_x =
576 tile_info->context_update_tile_id % tile_info->tile_cols;
577 int context_update_tile_id =
578 context_update_x * tile_info->tile_rows + context_update_y;
579 u8 *dst = av1_dec->tile_info.cpu;
580 struct hantro_dev *vpu = ctx->dev;
583 memset(dst, 0, av1_dec->tile_info.size);
585 for (tile0 = 0; tile0 < tile_info->tile_cols; tile0++) {
586 for (tile1 = 0; tile1 < tile_info->tile_rows; tile1++) {
587 int tile_id = tile1 * tile_info->tile_cols + tile0;
590 tile_info->height_in_sbs_minus_1[tile1] + 1;
591 u32 x0 = tile_info->width_in_sbs_minus_1[tile0] + 1;
593 /* tile size in SB units (width,height) */
603 /* tile start position */
604 start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
605 *dst++ = start & 255;
606 *dst++ = (start >> 8) & 255;
607 *dst++ = (start >> 16) & 255;
608 *dst++ = (start >> 24) & 255;
610 /* number of bytes in tile data */
611 end = start + group_entry[tile_id].tile_size;
613 *dst++ = (end >> 8) & 255;
614 *dst++ = (end >> 16) & 255;
615 *dst++ = (end >> 24) & 255;
619 hantro_reg_write(vpu, &av1_multicore_expect_context_update, !!(context_update_x == 0));
620 hantro_reg_write(vpu, &av1_tile_enable,
621 !!((tile_info->tile_cols > 1) || (tile_info->tile_rows > 1)));
622 hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info->tile_cols);
623 hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info->tile_rows);
624 hantro_reg_write(vpu, &av1_context_update_tile_id, context_update_tile_id);
625 hantro_reg_write(vpu, &av1_tile_transpose, 1);
626 if (rockchip_vpu981_av1_tile_log2(tile_info->tile_cols) ||
627 rockchip_vpu981_av1_tile_log2(tile_info->tile_rows))
628 hantro_reg_write(vpu, &av1_dec_tile_size_mag, tile_info->tile_size_bytes - 1);
630 hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
632 hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
635 static int rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx *ctx,
638 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
639 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
640 int bits = ctrls->sequence->order_hint_bits - 1;
643 if (!ctrls->sequence->order_hint_bits)
648 diff = (diff & (m - 1)) - (diff & m);
653 static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
655 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
656 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
657 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
658 const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
661 if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
662 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
663 av1_dec->ref_frame_sign_bias[i] = 0;
667 // Identify the nearest forward and backward references.
668 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; i++) {
669 if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
671 rockchip_vpu981_av1_dec_get_dist(ctx,
672 rockchip_vpu981_get_order_hint(ctx, i),
674 av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
680 rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
681 int width, int height)
683 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
684 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
685 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
686 struct hantro_dev *vpu = ctx->dev;
687 struct hantro_decoded_buffer *dst;
688 dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
689 size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
690 size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
691 int cur_width = frame->frame_width_minus_1 + 1;
692 int cur_height = frame->frame_height_minus_1 + 1;
694 ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
696 ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
700 hantro_reg_write(vpu, &av1_ref0_height, height);
701 hantro_reg_write(vpu, &av1_ref0_width, width);
702 hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
703 hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
706 hantro_reg_write(vpu, &av1_ref1_height, height);
707 hantro_reg_write(vpu, &av1_ref1_width, width);
708 hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
709 hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
712 hantro_reg_write(vpu, &av1_ref2_height, height);
713 hantro_reg_write(vpu, &av1_ref2_width, width);
714 hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
715 hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
718 hantro_reg_write(vpu, &av1_ref3_height, height);
719 hantro_reg_write(vpu, &av1_ref3_width, width);
720 hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
721 hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
724 hantro_reg_write(vpu, &av1_ref4_height, height);
725 hantro_reg_write(vpu, &av1_ref4_width, width);
726 hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
727 hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
730 hantro_reg_write(vpu, &av1_ref5_height, height);
731 hantro_reg_write(vpu, &av1_ref5_width, width);
732 hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
733 hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
736 hantro_reg_write(vpu, &av1_ref6_height, height);
737 hantro_reg_write(vpu, &av1_ref6_width, width);
738 hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
739 hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
742 pr_warn("AV1 invalid reference frame index\n");
745 dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
746 luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
747 chroma_addr = luma_addr + cr_offset;
748 mv_addr = luma_addr + mv_offset;
750 hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
751 hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
752 hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
754 return (scale_width != (1 << AV1_REF_SCALE_SHIFT)) ||
755 (scale_height != (1 << AV1_REF_SCALE_SHIFT));
758 static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
761 struct hantro_dev *vpu = ctx->dev;
765 hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
768 hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
771 hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
774 hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
777 hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
780 hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
783 hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
786 pr_warn("AV1 invalid sign bias index\n");
791 static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
793 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
794 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
795 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
796 const struct v4l2_av1_segmentation *seg = &frame->segmentation;
797 u32 segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
798 struct hantro_dev *vpu = ctx->dev;
799 u8 segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
801 if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) &&
802 frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME) {
803 int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
806 dma_addr_t luma_addr, mv_addr = 0;
807 struct hantro_decoded_buffer *seg;
808 size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
810 seg = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
811 luma_addr = hantro_get_dec_buf_addr(ctx, &seg->base.vb.vb2_buf);
812 mv_addr = luma_addr + mv_offset;
814 hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
815 hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
819 hantro_reg_write(vpu, &av1_segment_temp_upd_e,
820 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
821 hantro_reg_write(vpu, &av1_segment_upd_e,
822 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
823 hantro_reg_write(vpu, &av1_segment_e,
824 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
826 hantro_reg_write(vpu, &av1_error_resilient,
827 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
829 if (IS_INTRA(frame->frame_type) ||
830 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
831 hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
834 if (seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) {
837 for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
838 if (seg->feature_enabled[s] &
839 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
840 segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
841 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]),
844 (seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
847 if (seg->feature_enabled[s] &
848 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
849 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
850 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]),
853 if (seg->feature_enabled[s] &
854 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
855 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
856 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]),
859 if (seg->feature_enabled[s] &
860 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
861 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
862 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]),
865 if (seg->feature_enabled[s] &
866 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
867 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
868 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]),
871 if (frame->frame_type && seg->feature_enabled[s] &
872 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
873 segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
875 if (seg->feature_enabled[s] &
876 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
877 segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
879 if (seg->feature_enabled[s] &
880 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
881 segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
885 for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
886 for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
887 if (seg->feature_enabled[i]
888 & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
889 preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
890 last_active_seg = max(i, last_active_seg);
895 hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
896 hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
898 hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
900 /* Write QP, filter level, ref frame and skip for every segment */
901 hantro_reg_write(vpu, &av1_quant_seg0,
902 segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
903 hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
904 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
905 hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
906 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
907 hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
908 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
909 hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
910 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
911 hantro_reg_write(vpu, &av1_refpic_seg0,
912 segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
913 hantro_reg_write(vpu, &av1_skip_seg0,
914 segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
915 hantro_reg_write(vpu, &av1_global_mv_seg0,
916 segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
918 hantro_reg_write(vpu, &av1_quant_seg1,
919 segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
920 hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
921 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
922 hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
923 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
924 hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
925 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
926 hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
927 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
928 hantro_reg_write(vpu, &av1_refpic_seg1,
929 segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
930 hantro_reg_write(vpu, &av1_skip_seg1,
931 segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
932 hantro_reg_write(vpu, &av1_global_mv_seg1,
933 segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
935 hantro_reg_write(vpu, &av1_quant_seg2,
936 segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
937 hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
938 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
939 hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
940 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
941 hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
942 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
943 hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
944 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
945 hantro_reg_write(vpu, &av1_refpic_seg2,
946 segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
947 hantro_reg_write(vpu, &av1_skip_seg2,
948 segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
949 hantro_reg_write(vpu, &av1_global_mv_seg2,
950 segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
952 hantro_reg_write(vpu, &av1_quant_seg3,
953 segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
954 hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
955 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
956 hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
957 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
958 hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
959 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
960 hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
961 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
962 hantro_reg_write(vpu, &av1_refpic_seg3,
963 segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
964 hantro_reg_write(vpu, &av1_skip_seg3,
965 segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
966 hantro_reg_write(vpu, &av1_global_mv_seg3,
967 segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
969 hantro_reg_write(vpu, &av1_quant_seg4,
970 segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
971 hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
972 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
973 hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
974 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
975 hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
976 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
977 hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
978 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
979 hantro_reg_write(vpu, &av1_refpic_seg4,
980 segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
981 hantro_reg_write(vpu, &av1_skip_seg4,
982 segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
983 hantro_reg_write(vpu, &av1_global_mv_seg4,
984 segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
986 hantro_reg_write(vpu, &av1_quant_seg5,
987 segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
988 hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
989 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
990 hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
991 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
992 hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
993 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
994 hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
995 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
996 hantro_reg_write(vpu, &av1_refpic_seg5,
997 segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
998 hantro_reg_write(vpu, &av1_skip_seg5,
999 segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
1000 hantro_reg_write(vpu, &av1_global_mv_seg5,
1001 segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1003 hantro_reg_write(vpu, &av1_quant_seg6,
1004 segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
1005 hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
1006 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1007 hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
1008 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1009 hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
1010 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1011 hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
1012 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1013 hantro_reg_write(vpu, &av1_refpic_seg6,
1014 segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
1015 hantro_reg_write(vpu, &av1_skip_seg6,
1016 segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
1017 hantro_reg_write(vpu, &av1_global_mv_seg6,
1018 segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1020 hantro_reg_write(vpu, &av1_quant_seg7,
1021 segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
1022 hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
1023 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1024 hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
1025 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1026 hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
1027 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1028 hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
1029 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1030 hantro_reg_write(vpu, &av1_refpic_seg7,
1031 segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
1032 hantro_reg_write(vpu, &av1_skip_seg7,
1033 segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
1034 hantro_reg_write(vpu, &av1_global_mv_seg7,
1035 segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1038 static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
1040 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1041 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1042 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1043 const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
1044 const struct v4l2_av1_quantization *quantization = &frame->quantization;
1047 for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1048 int qindex = quantization->base_q_idx;
1050 if (segmentation->feature_enabled[i] &
1051 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
1052 qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
1054 qindex = clamp(qindex, 0, 255);
1057 quantization->delta_q_y_dc ||
1058 quantization->delta_q_u_dc ||
1059 quantization->delta_q_u_ac ||
1060 quantization->delta_q_v_dc ||
1061 quantization->delta_q_v_ac)
1067 static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
1069 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1070 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1071 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1072 const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
1073 bool filtering_dis = (loop_filter->level[0] == 0) && (loop_filter->level[1] == 0);
1074 struct hantro_dev *vpu = ctx->dev;
1076 hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
1077 hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
1078 hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
1080 hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
1081 hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
1082 hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
1083 hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
1085 if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED &&
1086 !rockchip_vpu981_av1_dec_is_lossless(ctx) &&
1087 !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
1088 hantro_reg_write(vpu, &av1_filt_ref_adj_0,
1089 loop_filter->ref_deltas[0]);
1090 hantro_reg_write(vpu, &av1_filt_ref_adj_1,
1091 loop_filter->ref_deltas[1]);
1092 hantro_reg_write(vpu, &av1_filt_ref_adj_2,
1093 loop_filter->ref_deltas[2]);
1094 hantro_reg_write(vpu, &av1_filt_ref_adj_3,
1095 loop_filter->ref_deltas[3]);
1096 hantro_reg_write(vpu, &av1_filt_ref_adj_4,
1097 loop_filter->ref_deltas[4]);
1098 hantro_reg_write(vpu, &av1_filt_ref_adj_5,
1099 loop_filter->ref_deltas[5]);
1100 hantro_reg_write(vpu, &av1_filt_ref_adj_6,
1101 loop_filter->ref_deltas[6]);
1102 hantro_reg_write(vpu, &av1_filt_ref_adj_7,
1103 loop_filter->ref_deltas[7]);
1104 hantro_reg_write(vpu, &av1_filt_mb_adj_0,
1105 loop_filter->mode_deltas[0]);
1106 hantro_reg_write(vpu, &av1_filt_mb_adj_1,
1107 loop_filter->mode_deltas[1]);
1109 hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
1110 hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
1111 hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
1112 hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
1113 hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
1114 hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
1115 hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
1116 hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
1117 hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
1118 hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
1121 hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
1122 hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
1125 static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
1127 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1128 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1129 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1130 bool frame_is_intra = IS_INTRA(frame->frame_type);
1131 struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
1134 if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
1137 for (i = 0; i < NUM_REF_FRAMES; i++) {
1138 if (frame->refresh_frame_flags & BIT(i)) {
1139 struct mvcdfs stored_mv_cdf;
1141 rockchip_av1_get_cdfs(ctx, i);
1142 stored_mv_cdf = av1_dec->cdfs->mv_cdf;
1143 *av1_dec->cdfs = *out_cdfs;
1144 if (frame_is_intra) {
1145 av1_dec->cdfs->mv_cdf = stored_mv_cdf;
1146 *av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
1148 rockchip_av1_store_cdfs(ctx,
1149 frame->refresh_frame_flags);
1155 void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
1157 rockchip_vpu981_av1_dec_update_prob(ctx);
1160 static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
1162 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1163 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1164 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1165 const struct v4l2_av1_quantization *quantization = &frame->quantization;
1166 struct hantro_dev *vpu = ctx->dev;
1167 bool error_resilient_mode =
1168 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
1169 bool frame_is_intra = IS_INTRA(frame->frame_type);
1171 if (error_resilient_mode || frame_is_intra ||
1172 frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
1173 av1_dec->cdfs = &av1_dec->default_cdfs;
1174 av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
1175 rockchip_av1_default_coeff_probs(quantization->base_q_idx,
1178 rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
1180 rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
1182 memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
1184 if (frame_is_intra) {
1185 int mv_offset = offsetof(struct av1cdfs, mv_cdf);
1186 /* Overwrite MV context area with intrabc MV context */
1187 memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
1188 sizeof(struct mvcdfs));
1191 hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
1192 hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
1196 rockchip_vpu981_av1_dec_init_scaling_function(const u8 *values, const u8 *scaling,
1197 u8 num_points, u8 *scaling_lut)
1201 if (num_points == 0) {
1202 memset(scaling_lut, 0, 256);
1206 for (point = 0; point < num_points - 1; point++) {
1208 s32 delta_y = scaling[point + 1] - scaling[point];
1209 s32 delta_x = values[point + 1] - values[point];
1211 delta_x ? delta_y * ((65536 + (delta_x >> 1)) /
1214 for (x = 0; x < delta_x; x++) {
1215 scaling_lut[values[point] + x] =
1217 (s32)((x * delta + 32768) >> 16);
1221 for (i = values[num_points - 1]; i < 256; i++)
1222 scaling_lut[i] = scaling[num_points - 1];
1225 static void rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx *ctx)
1227 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1228 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1229 const struct v4l2_ctrl_av1_film_grain *film_grain = ctrls->film_grain;
1230 struct rockchip_av1_film_grain *fgmem = av1_dec->film_grain.cpu;
1231 struct hantro_dev *vpu = ctx->dev;
1232 bool scaling_from_luma =
1233 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA);
1234 s32 (*ar_coeffs_y)[24];
1235 s32 (*ar_coeffs_cb)[25];
1236 s32 (*ar_coeffs_cr)[25];
1237 s32 (*luma_grain_block)[73][82];
1238 s32 (*cb_grain_block)[38][44];
1239 s32 (*cr_grain_block)[38][44];
1240 s32 ar_coeff_lag, ar_coeff_shift;
1241 s32 grain_scale_shift, bitdepth;
1242 s32 grain_center, grain_min, grain_max;
1245 hantro_reg_write(vpu, &av1_apply_grain, 0);
1247 if (!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN)) {
1248 hantro_reg_write(vpu, &av1_num_y_points_b, 0);
1249 hantro_reg_write(vpu, &av1_num_cb_points_b, 0);
1250 hantro_reg_write(vpu, &av1_num_cr_points_b, 0);
1251 hantro_reg_write(vpu, &av1_scaling_shift, 0);
1252 hantro_reg_write(vpu, &av1_cb_mult, 0);
1253 hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1254 hantro_reg_write(vpu, &av1_cb_offset, 0);
1255 hantro_reg_write(vpu, &av1_cr_mult, 0);
1256 hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1257 hantro_reg_write(vpu, &av1_cr_offset, 0);
1258 hantro_reg_write(vpu, &av1_overlap_flag, 0);
1259 hantro_reg_write(vpu, &av1_clip_to_restricted_range, 0);
1260 hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, 0);
1261 hantro_reg_write(vpu, &av1_random_seed, 0);
1262 hantro_write_addr(vpu, AV1_FILM_GRAIN, 0);
1266 ar_coeffs_y = kzalloc(sizeof(int32_t) * 24, GFP_KERNEL);
1267 ar_coeffs_cb = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1268 ar_coeffs_cr = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1269 luma_grain_block = kzalloc(sizeof(int32_t) * 73 * 82, GFP_KERNEL);
1270 cb_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1271 cr_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1273 if (!ar_coeffs_y || !ar_coeffs_cb || !ar_coeffs_cr ||
1274 !luma_grain_block || !cb_grain_block || !cr_grain_block) {
1275 pr_warn("Fail allocating memory for film grain parameters\n");
1279 hantro_reg_write(vpu, &av1_apply_grain, 1);
1281 hantro_reg_write(vpu, &av1_num_y_points_b,
1282 film_grain->num_y_points > 0);
1283 hantro_reg_write(vpu, &av1_num_cb_points_b,
1284 film_grain->num_cb_points > 0);
1285 hantro_reg_write(vpu, &av1_num_cr_points_b,
1286 film_grain->num_cr_points > 0);
1287 hantro_reg_write(vpu, &av1_scaling_shift,
1288 film_grain->grain_scaling_minus_8 + 8);
1290 if (!scaling_from_luma) {
1291 hantro_reg_write(vpu, &av1_cb_mult, film_grain->cb_mult - 128);
1292 hantro_reg_write(vpu, &av1_cb_luma_mult, film_grain->cb_luma_mult - 128);
1293 hantro_reg_write(vpu, &av1_cb_offset, film_grain->cb_offset - 256);
1294 hantro_reg_write(vpu, &av1_cr_mult, film_grain->cr_mult - 128);
1295 hantro_reg_write(vpu, &av1_cr_luma_mult, film_grain->cr_luma_mult - 128);
1296 hantro_reg_write(vpu, &av1_cr_offset, film_grain->cr_offset - 256);
1298 hantro_reg_write(vpu, &av1_cb_mult, 0);
1299 hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1300 hantro_reg_write(vpu, &av1_cb_offset, 0);
1301 hantro_reg_write(vpu, &av1_cr_mult, 0);
1302 hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1303 hantro_reg_write(vpu, &av1_cr_offset, 0);
1306 hantro_reg_write(vpu, &av1_overlap_flag,
1307 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_OVERLAP));
1308 hantro_reg_write(vpu, &av1_clip_to_restricted_range,
1309 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CLIP_TO_RESTRICTED_RANGE));
1310 hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, scaling_from_luma);
1311 hantro_reg_write(vpu, &av1_random_seed, film_grain->grain_seed);
1313 rockchip_vpu981_av1_dec_init_scaling_function(film_grain->point_y_value,
1314 film_grain->point_y_scaling,
1315 film_grain->num_y_points,
1316 fgmem->scaling_lut_y);
1318 if (film_grain->flags &
1319 V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA) {
1320 memcpy(fgmem->scaling_lut_cb, fgmem->scaling_lut_y,
1321 sizeof(*fgmem->scaling_lut_y) * 256);
1322 memcpy(fgmem->scaling_lut_cr, fgmem->scaling_lut_y,
1323 sizeof(*fgmem->scaling_lut_y) * 256);
1325 rockchip_vpu981_av1_dec_init_scaling_function
1326 (film_grain->point_cb_value, film_grain->point_cb_scaling,
1327 film_grain->num_cb_points, fgmem->scaling_lut_cb);
1328 rockchip_vpu981_av1_dec_init_scaling_function
1329 (film_grain->point_cr_value, film_grain->point_cr_scaling,
1330 film_grain->num_cr_points, fgmem->scaling_lut_cr);
1333 for (i = 0; i < V4L2_AV1_AR_COEFFS_SIZE; i++) {
1335 (*ar_coeffs_y)[i] = film_grain->ar_coeffs_y_plus_128[i] - 128;
1336 (*ar_coeffs_cb)[i] = film_grain->ar_coeffs_cb_plus_128[i] - 128;
1337 (*ar_coeffs_cr)[i] = film_grain->ar_coeffs_cr_plus_128[i] - 128;
1340 ar_coeff_lag = film_grain->ar_coeff_lag;
1341 ar_coeff_shift = film_grain->ar_coeff_shift_minus_6 + 6;
1342 grain_scale_shift = film_grain->grain_scale_shift;
1343 bitdepth = ctx->bit_depth;
1344 grain_center = 128 << (bitdepth - 8);
1345 grain_min = 0 - grain_center;
1346 grain_max = (256 << (bitdepth - 8)) - 1 - grain_center;
1348 rockchip_av1_generate_luma_grain_block(luma_grain_block, bitdepth,
1349 film_grain->num_y_points, grain_scale_shift,
1350 ar_coeff_lag, ar_coeffs_y, ar_coeff_shift,
1351 grain_min, grain_max, film_grain->grain_seed);
1353 rockchip_av1_generate_chroma_grain_block(luma_grain_block, cb_grain_block,
1354 cr_grain_block, bitdepth,
1355 film_grain->num_y_points,
1356 film_grain->num_cb_points,
1357 film_grain->num_cr_points,
1358 grain_scale_shift, ar_coeff_lag, ar_coeffs_cb,
1359 ar_coeffs_cr, ar_coeff_shift, grain_min,
1362 film_grain->grain_seed);
1364 for (i = 0; i < 64; i++) {
1365 for (j = 0; j < 64; j++)
1366 fgmem->cropped_luma_grain_block[i * 64 + j] =
1367 (*luma_grain_block)[i + 9][j + 9];
1370 for (i = 0; i < 32; i++) {
1371 for (j = 0; j < 32; j++) {
1372 fgmem->cropped_chroma_grain_block[i * 64 + 2 * j] =
1373 (*cb_grain_block)[i + 6][j + 6];
1374 fgmem->cropped_chroma_grain_block[i * 64 + 2 * j + 1] =
1375 (*cr_grain_block)[i + 6][j + 6];
1379 hantro_write_addr(vpu, AV1_FILM_GRAIN, av1_dec->film_grain.dma);
1383 kfree(ar_coeffs_cb);
1384 kfree(ar_coeffs_cr);
1385 kfree(luma_grain_block);
1386 kfree(cb_grain_block);
1387 kfree(cr_grain_block);
1390 static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
1392 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1393 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1394 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1395 const struct v4l2_av1_cdef *cdef = &frame->cdef;
1396 struct hantro_dev *vpu = ctx->dev;
1397 u32 luma_pri_strength = 0;
1398 u16 luma_sec_strength = 0;
1399 u32 chroma_pri_strength = 0;
1400 u16 chroma_sec_strength = 0;
1403 hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
1404 hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
1406 for (i = 0; i < BIT(cdef->bits); i++) {
1407 luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
1408 if (cdef->y_sec_strength[i] == 4)
1409 luma_sec_strength |= 3 << (i * 2);
1411 luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
1413 chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
1414 if (cdef->uv_sec_strength[i] == 4)
1415 chroma_sec_strength |= 3 << (i * 2);
1417 chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
1420 hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
1422 hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
1424 hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
1425 chroma_pri_strength);
1426 hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
1427 chroma_sec_strength);
1429 hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
1432 static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
1434 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1435 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1436 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1437 const struct v4l2_av1_loop_restoration *loop_restoration =
1438 &frame->loop_restoration;
1439 struct hantro_dev *vpu = ctx->dev;
1440 u16 lr_type = 0, lr_unit_size = 0;
1441 u8 restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
1444 if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
1445 restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
1446 restoration_unit_size[1] =
1447 1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1448 restoration_unit_size[2] =
1449 1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1452 for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
1454 loop_restoration->frame_restoration_type[i] << (i * 2);
1455 lr_unit_size |= restoration_unit_size[i] << (i * 2);
1458 hantro_reg_write(vpu, &av1_lr_type, lr_type);
1459 hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
1460 hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
1463 static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
1465 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1466 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1467 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1468 struct hantro_dev *vpu = ctx->dev;
1469 u8 superres_scale_denominator = SCALE_NUMERATOR;
1470 int superres_luma_step = RS_SCALE_SUBPEL_BITS;
1471 int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
1472 int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
1473 int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
1474 int superres_init_luma_subpel_x = 0;
1475 int superres_init_chroma_subpel_x = 0;
1476 int superres_is_scaled = 0;
1477 int min_w = min_t(uint32_t, 16, frame->upscaled_width);
1478 int upscaled_luma, downscaled_luma;
1479 int downscaled_chroma, upscaled_chroma;
1480 int step_luma, step_chroma;
1481 int err_luma, err_chroma;
1482 int initial_luma, initial_chroma;
1485 if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1486 superres_scale_denominator = frame->superres_denom;
1488 if (superres_scale_denominator <= SCALE_NUMERATOR)
1491 width = (frame->upscaled_width * SCALE_NUMERATOR +
1492 (superres_scale_denominator / 2)) / superres_scale_denominator;
1497 if (width == frame->upscaled_width)
1500 superres_is_scaled = 1;
1501 upscaled_luma = frame->upscaled_width;
1502 downscaled_luma = width;
1503 downscaled_chroma = (downscaled_luma + 1) >> 1;
1504 upscaled_chroma = (upscaled_luma + 1) >> 1;
1506 ((downscaled_luma << RS_SCALE_SUBPEL_BITS) +
1507 (upscaled_luma / 2)) / upscaled_luma;
1509 ((downscaled_chroma << RS_SCALE_SUBPEL_BITS) +
1510 (upscaled_chroma / 2)) / upscaled_chroma;
1512 (upscaled_luma * step_luma)
1513 - (downscaled_luma << RS_SCALE_SUBPEL_BITS);
1515 (upscaled_chroma * step_chroma)
1516 - (downscaled_chroma << RS_SCALE_SUBPEL_BITS);
1518 ((-((upscaled_luma - downscaled_luma) << (RS_SCALE_SUBPEL_BITS - 1))
1519 + upscaled_luma / 2)
1520 / upscaled_luma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_luma / 2)
1521 & RS_SCALE_SUBPEL_MASK;
1523 ((-((upscaled_chroma - downscaled_chroma) << (RS_SCALE_SUBPEL_BITS - 1))
1524 + upscaled_chroma / 2)
1525 / upscaled_chroma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_chroma / 2)
1526 & RS_SCALE_SUBPEL_MASK;
1527 superres_luma_step = step_luma;
1528 superres_chroma_step = step_chroma;
1529 superres_luma_step_invra =
1530 ((upscaled_luma << RS_SCALE_SUBPEL_BITS) + (downscaled_luma / 2))
1532 superres_chroma_step_invra =
1533 ((upscaled_chroma << RS_SCALE_SUBPEL_BITS) + (downscaled_chroma / 2))
1534 / downscaled_chroma;
1535 superres_init_luma_subpel_x = initial_luma;
1536 superres_init_chroma_subpel_x = initial_chroma;
1539 hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
1541 if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1542 hantro_reg_write(vpu, &av1_scale_denom_minus9,
1543 frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
1545 hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
1547 hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
1548 hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
1549 hantro_reg_write(vpu, &av1_superres_luma_step_invra,
1550 superres_luma_step_invra);
1551 hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
1552 superres_chroma_step_invra);
1553 hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
1554 superres_init_luma_subpel_x);
1555 hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
1556 superres_init_chroma_subpel_x);
1557 hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
1559 hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
1562 static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
1564 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1565 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1566 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1567 struct hantro_dev *vpu = ctx->dev;
1568 int pic_width_in_cbs = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1569 int pic_height_in_cbs = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1570 int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
1571 - (frame->frame_width_minus_1 + 1);
1572 int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
1573 - (frame->frame_height_minus_1 + 1);
1575 hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
1576 hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
1577 hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
1578 hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
1580 rockchip_vpu981_av1_dec_set_superres_params(ctx);
1583 static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
1585 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1586 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1587 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1588 struct hantro_dev *vpu = ctx->dev;
1589 bool use_ref_frame_mvs =
1590 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
1591 int cur_frame_offset = frame->order_hint;
1592 int alt_frame_offset = 0;
1593 int gld_frame_offset = 0;
1594 int bwd_frame_offset = 0;
1595 int alt2_frame_offset = 0;
1596 int refs_selected[3] = { 0, 0, 0 };
1597 int cur_mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1598 int cur_mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1599 int cur_offset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1600 int cur_roffset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1601 int mf_types[3] = { 0, 0, 0 };
1606 alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
1607 gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
1608 bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
1609 alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
1611 idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
1613 int alt_frame_offset_in_lst =
1614 av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
1615 bool is_lst_overlay =
1616 (alt_frame_offset_in_lst == gld_frame_offset);
1618 if (!is_lst_overlay) {
1619 int lst_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1620 int lst_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1621 bool lst_intra_only =
1622 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1624 if (lst_mi_cols == cur_mi_cols &&
1625 lst_mi_rows == cur_mi_rows && !lst_intra_only) {
1626 mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
1627 refs_selected[ref_ind++] = LST_BUF_IDX;
1633 idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
1634 if (rockchip_vpu981_av1_dec_get_dist(ctx, bwd_frame_offset, cur_frame_offset) > 0) {
1635 int bwd_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1636 int bwd_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1637 bool bwd_intra_only =
1638 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1640 if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
1642 mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
1643 refs_selected[ref_ind++] = BWD_BUF_IDX;
1648 idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
1649 if (rockchip_vpu981_av1_dec_get_dist(ctx, alt2_frame_offset, cur_frame_offset) > 0) {
1650 int alt2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1651 int alt2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1652 bool alt2_intra_only =
1653 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1655 if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows &&
1657 mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
1658 refs_selected[ref_ind++] = ALT2_BUF_IDX;
1663 idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
1664 if (rockchip_vpu981_av1_dec_get_dist(ctx, alt_frame_offset, cur_frame_offset) > 0 &&
1666 int alt_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1667 int alt_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1668 bool alt_intra_only =
1669 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1671 if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
1673 mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
1674 refs_selected[ref_ind++] = ALT_BUF_IDX;
1679 idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
1680 if (idx >= 0 && ref_stamp >= 0) {
1681 int lst2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1682 int lst2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1683 bool lst2_intra_only =
1684 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1686 if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows &&
1688 mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
1689 refs_selected[ref_ind++] = LST2_BUF_IDX;
1694 for (rf = 0; rf < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; ++rf) {
1695 idx = rockchip_vpu981_get_frame_index(ctx, rf);
1697 int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
1700 rockchip_vpu981_av1_dec_get_dist(ctx, cur_frame_offset, rf_order_hint);
1702 rockchip_vpu981_av1_dec_get_dist(ctx, rf_order_hint, cur_frame_offset);
1705 cur_roffset[rf] = 0;
1709 hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
1710 hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
1711 hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
1712 hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
1714 hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
1715 hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
1716 hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
1717 hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
1718 hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
1719 hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
1720 hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
1722 if (use_ref_frame_mvs && ref_ind > 0 &&
1723 cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1724 cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1725 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
1726 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
1727 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1730 hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
1732 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1733 hantro_reg_write(vpu, &av1_mf1_last_offset, val);
1735 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1736 hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
1738 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1739 hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
1741 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1742 hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
1744 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1745 hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
1747 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1748 hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
1750 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1751 hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
1754 hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
1755 hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
1756 hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
1757 hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
1758 hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
1759 hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
1760 hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
1762 if (use_ref_frame_mvs && ref_ind > 1 &&
1763 cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1764 cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1765 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
1766 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
1767 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1770 hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
1772 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1773 hantro_reg_write(vpu, &av1_mf2_last_offset, val);
1775 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1776 hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
1778 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1779 hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
1781 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1782 hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
1784 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1785 hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
1787 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1788 hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
1790 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1791 hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
1794 hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
1795 hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
1796 hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
1797 hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
1798 hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
1799 hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
1800 hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
1802 if (use_ref_frame_mvs && ref_ind > 2 &&
1803 cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1804 cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1805 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
1806 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
1807 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1810 hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
1812 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1813 hantro_reg_write(vpu, &av1_mf3_last_offset, val);
1815 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1816 hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
1818 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1819 hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
1821 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1822 hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
1824 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1825 hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
1827 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1828 hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
1830 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1831 hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
1834 hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
1835 hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
1836 hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
1837 hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
1838 hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
1839 hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
1840 hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
1842 hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
1843 hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
1844 hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
1845 hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
1846 hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
1847 hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
1848 hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
1850 hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
1851 hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
1852 hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
1855 static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
1857 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1858 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1859 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1860 int frame_type = frame->frame_type;
1861 bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
1862 int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
1863 struct hantro_dev *vpu = ctx->dev;
1864 int i, ref_frames = 0;
1865 bool scale_enable = false;
1867 if (IS_INTRA(frame_type) && !allow_intrabc)
1870 if (!allow_intrabc) {
1871 for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1872 int idx = rockchip_vpu981_get_frame_index(ctx, i);
1878 for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
1885 hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
1887 rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
1889 for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
1894 if (allow_intrabc) {
1895 idx = av1_dec->current_frame_index;
1896 width = frame->frame_width_minus_1 + 1;
1897 height = frame->frame_height_minus_1 + 1;
1899 if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
1900 idx = rockchip_vpu981_get_frame_index(ctx, ref);
1901 width = av1_dec->frame_refs[idx].width;
1902 height = av1_dec->frame_refs[idx].height;
1906 rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
1909 rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
1910 av1_dec->ref_frame_sign_bias[i]);
1912 hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
1914 hantro_reg_write(vpu, &av1_ref0_gm_mode,
1915 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
1916 hantro_reg_write(vpu, &av1_ref1_gm_mode,
1917 frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
1918 hantro_reg_write(vpu, &av1_ref2_gm_mode,
1919 frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
1920 hantro_reg_write(vpu, &av1_ref3_gm_mode,
1921 frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
1922 hantro_reg_write(vpu, &av1_ref4_gm_mode,
1923 frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
1924 hantro_reg_write(vpu, &av1_ref5_gm_mode,
1925 frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
1926 hantro_reg_write(vpu, &av1_ref6_gm_mode,
1927 frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
1929 rockchip_vpu981_av1_dec_set_other_frames(ctx);
1932 static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
1934 struct hantro_dev *vpu = ctx->dev;
1935 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1936 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1938 hantro_reg_write(vpu, &av1_skip_mode,
1939 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
1940 hantro_reg_write(vpu, &av1_tempor_mvp_e,
1941 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
1942 hantro_reg_write(vpu, &av1_delta_lf_res_log,
1943 ctrls->frame->loop_filter.delta_lf_res);
1944 hantro_reg_write(vpu, &av1_delta_lf_multi,
1945 !!(ctrls->frame->loop_filter.flags
1946 & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
1947 hantro_reg_write(vpu, &av1_delta_lf_present,
1948 !!(ctrls->frame->loop_filter.flags
1949 & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
1950 hantro_reg_write(vpu, &av1_disable_cdf_update,
1951 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
1952 hantro_reg_write(vpu, &av1_allow_warp,
1953 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
1954 hantro_reg_write(vpu, &av1_show_frame,
1955 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
1956 hantro_reg_write(vpu, &av1_switchable_motion_mode,
1957 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
1958 hantro_reg_write(vpu, &av1_enable_cdef,
1959 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
1960 hantro_reg_write(vpu, &av1_allow_masked_compound,
1961 !!(ctrls->sequence->flags
1962 & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
1963 hantro_reg_write(vpu, &av1_allow_interintra,
1964 !!(ctrls->sequence->flags
1965 & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
1966 hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
1967 !!(ctrls->sequence->flags
1968 & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
1969 hantro_reg_write(vpu, &av1_allow_filter_intra,
1970 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
1971 hantro_reg_write(vpu, &av1_enable_jnt_comp,
1972 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
1973 hantro_reg_write(vpu, &av1_enable_dual_filter,
1974 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
1975 hantro_reg_write(vpu, &av1_reduced_tx_set_used,
1976 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
1977 hantro_reg_write(vpu, &av1_allow_screen_content_tools,
1978 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
1979 hantro_reg_write(vpu, &av1_allow_intrabc,
1980 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
1982 if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
1983 hantro_reg_write(vpu, &av1_force_interger_mv, 0);
1985 hantro_reg_write(vpu, &av1_force_interger_mv,
1986 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
1988 hantro_reg_write(vpu, &av1_blackwhite_e, 0);
1989 hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
1990 hantro_reg_write(vpu, &av1_delta_q_present,
1991 !!(ctrls->frame->quantization.flags
1992 & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
1994 hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
1995 hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
1996 hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
1997 hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
1999 hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
2000 hantro_reg_write(vpu, &av1_high_prec_mv_e,
2001 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
2002 hantro_reg_write(vpu, &av1_comp_pred_mode,
2003 (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
2004 hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
2005 hantro_reg_write(vpu, &av1_max_cb_size,
2006 (ctrls->sequence->flags
2007 & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
2008 hantro_reg_write(vpu, &av1_min_cb_size, 3);
2010 hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
2011 hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
2012 hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
2013 hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
2014 hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
2015 hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
2016 hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
2017 hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
2018 hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
2019 hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
2020 hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
2022 hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
2023 hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
2024 hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
2025 if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
2026 hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
2027 hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
2028 hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
2030 hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
2031 hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
2032 hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
2035 hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
2036 hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
2037 hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
2039 hantro_reg_write(vpu, &av1_skip_ref0,
2040 (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
2041 hantro_reg_write(vpu, &av1_skip_ref1,
2042 (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
2044 hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
2045 hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
2049 rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
2050 struct vb2_v4l2_buffer *vb2_src)
2052 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2053 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
2054 const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
2055 ctrls->tile_group_entry;
2056 struct hantro_dev *vpu = ctx->dev;
2058 u32 src_len, src_buf_len;
2059 int start_bit, offset;
2061 src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
2062 src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
2063 src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
2065 start_bit = (group_entry[0].tile_offset & 0xf) * 8;
2066 offset = group_entry[0].tile_offset & ~0xf;
2068 hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
2069 hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
2070 hantro_reg_write(vpu, &av1_stream_len, src_len);
2071 hantro_reg_write(vpu, &av1_strm_start_offset, 0);
2072 hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
2076 rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
2078 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2079 struct hantro_dev *vpu = ctx->dev;
2080 struct hantro_decoded_buffer *dst;
2081 struct vb2_v4l2_buffer *vb2_dst;
2082 dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
2083 size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
2084 size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
2086 vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
2087 dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
2088 luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
2089 chroma_addr = luma_addr + cr_offset;
2090 mv_addr = luma_addr + mv_offset;
2092 hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
2093 hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
2094 hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
2097 int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
2099 struct hantro_dev *vpu = ctx->dev;
2100 struct vb2_v4l2_buffer *vb2_src;
2103 hantro_start_prepare_run(ctx);
2105 ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
2109 vb2_src = hantro_get_src_buf(ctx);
2115 rockchip_vpu981_av1_dec_clean_refs(ctx);
2116 rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
2118 rockchip_vpu981_av1_dec_set_parameters(ctx);
2119 rockchip_vpu981_av1_dec_set_global_model(ctx);
2120 rockchip_vpu981_av1_dec_set_tile_info(ctx);
2121 rockchip_vpu981_av1_dec_set_reference_frames(ctx);
2122 rockchip_vpu981_av1_dec_set_segmentation(ctx);
2123 rockchip_vpu981_av1_dec_set_loopfilter(ctx);
2124 rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
2125 rockchip_vpu981_av1_dec_set_cdef(ctx);
2126 rockchip_vpu981_av1_dec_set_lr(ctx);
2127 rockchip_vpu981_av1_dec_set_fgs(ctx);
2128 rockchip_vpu981_av1_dec_set_prob(ctx);
2130 hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
2131 hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
2132 hantro_reg_write(vpu, &av1_write_mvs_e, 1);
2133 hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
2134 hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
2136 hantro_reg_write(vpu, &av1_dec_abort_e, 0);
2137 hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
2139 hantro_reg_write(vpu, &av1_dec_alignment, 64);
2140 hantro_reg_write(vpu, &av1_apf_disable, 0);
2141 hantro_reg_write(vpu, &av1_apf_threshold, 8);
2142 hantro_reg_write(vpu, &av1_dec_buswidth, 2);
2143 hantro_reg_write(vpu, &av1_dec_max_burst, 16);
2144 hantro_reg_write(vpu, &av1_error_conceal_e, 0);
2145 hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
2146 hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
2148 hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
2149 hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
2150 hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
2151 hantro_reg_write(vpu, &av1_timeout_override_e, 1);
2153 rockchip_vpu981_av1_dec_set_output_buffer(ctx);
2154 rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
2156 hantro_end_prepare_run(ctx);
2158 hantro_reg_write(vpu, &av1_dec_e, 1);
2163 hantro_end_prepare_run(ctx);
2164 hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
2168 static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
2170 struct hantro_dev *vpu = ctx->dev;
2171 int width = ctx->dst_fmt.width;
2172 int height = ctx->dst_fmt.height;
2173 struct vb2_v4l2_buffer *vb2_dst;
2174 size_t chroma_offset;
2177 vb2_dst = hantro_get_dst_buf(ctx);
2179 dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
2180 chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
2181 ctx->dst_fmt.height;
2183 /* enable post processor */
2184 hantro_reg_write(vpu, &av1_pp_out_e, 1);
2185 hantro_reg_write(vpu, &av1_pp_in_format, 0);
2186 hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
2187 hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
2189 hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
2190 hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
2191 hantro_reg_write(vpu, &av1_pp_out_height, height);
2192 hantro_reg_write(vpu, &av1_pp_out_width, width);
2193 hantro_reg_write(vpu, &av1_pp_out_y_stride,
2194 ctx->dst_fmt.plane_fmt[0].bytesperline);
2195 hantro_reg_write(vpu, &av1_pp_out_c_stride,
2196 ctx->dst_fmt.plane_fmt[0].bytesperline);
2197 switch (ctx->dst_fmt.pixelformat) {
2198 case V4L2_PIX_FMT_P010:
2199 hantro_reg_write(vpu, &av1_pp_out_format, 1);
2201 case V4L2_PIX_FMT_NV12:
2202 hantro_reg_write(vpu, &av1_pp_out_format, 3);
2205 hantro_reg_write(vpu, &av1_pp_out_format, 0);
2208 hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
2209 hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
2210 hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
2211 hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
2212 hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
2213 hantro_reg_write(vpu, &av1_pp_up_level, 0);
2214 hantro_reg_write(vpu, &av1_pp_down_level, 0);
2215 hantro_reg_write(vpu, &av1_pp_exist, 0);
2217 hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
2218 hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
2221 static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
2223 struct hantro_dev *vpu = ctx->dev;
2225 /* disable post processor */
2226 hantro_reg_write(vpu, &av1_pp_out_e, 0);
2229 const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
2230 .enable = rockchip_vpu981_postproc_enable,
2231 .disable = rockchip_vpu981_postproc_disable,