GNU Linux-libre 5.10.153-gnu1
[releases.git] / drivers / staging / media / hantro / hantro_g1_h264_dec.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Rockchip RK3288 VPU codec driver
4  *
5  * Copyright (c) 2014 Rockchip Electronics Co., Ltd.
6  *      Hertz Wong <hertz.wong@rock-chips.com>
7  *      Herman Chen <herman.chen@rock-chips.com>
8  *
9  * Copyright (C) 2014 Google, Inc.
10  *      Tomasz Figa <tfiga@chromium.org>
11  */
12
13 #include <linux/types.h>
14 #include <linux/sort.h>
15
16 #include <media/v4l2-mem2mem.h>
17
18 #include "hantro_g1_regs.h"
19 #include "hantro_hw.h"
20 #include "hantro_v4l2.h"
21
22 static void set_params(struct hantro_ctx *ctx)
23 {
24         const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
25         const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode;
26         const struct v4l2_ctrl_h264_sps *sps = ctrls->sps;
27         const struct v4l2_ctrl_h264_pps *pps = ctrls->pps;
28         struct vb2_v4l2_buffer *src_buf = hantro_get_src_buf(ctx);
29         struct hantro_dev *vpu = ctx->dev;
30         u32 reg;
31
32         /* Decoder control register 0. */
33         reg = G1_REG_DEC_CTRL0_DEC_AXI_WR_ID(0x0);
34         if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)
35                 reg |= G1_REG_DEC_CTRL0_SEQ_MBAFF_E;
36         if (sps->profile_idc > 66) {
37                 reg |= G1_REG_DEC_CTRL0_PICORD_COUNT_E;
38                 if (dec_param->nal_ref_idc)
39                         reg |= G1_REG_DEC_CTRL0_WRITE_MVS_E;
40         }
41
42         if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY) &&
43             (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD ||
44              dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC))
45                 reg |= G1_REG_DEC_CTRL0_PIC_INTERLACE_E;
46         if (dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)
47                 reg |= G1_REG_DEC_CTRL0_PIC_FIELDMODE_E;
48         if (!(dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD))
49                 reg |= G1_REG_DEC_CTRL0_PIC_TOPFIELD_E;
50         vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL0);
51
52         /* Decoder control register 1. */
53         reg = G1_REG_DEC_CTRL1_PIC_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width)) |
54               G1_REG_DEC_CTRL1_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->src_fmt.height)) |
55               G1_REG_DEC_CTRL1_REF_FRAMES(sps->max_num_ref_frames);
56         vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL1);
57
58         /* Decoder control register 2. */
59         reg = G1_REG_DEC_CTRL2_CH_QP_OFFSET(pps->chroma_qp_index_offset) |
60               G1_REG_DEC_CTRL2_CH_QP_OFFSET2(pps->second_chroma_qp_index_offset);
61
62         if (pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT)
63                 reg |= G1_REG_DEC_CTRL2_TYPE1_QUANT_E;
64         if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY))
65                 reg |= G1_REG_DEC_CTRL2_FIELDPIC_FLAG_E;
66         vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL2);
67
68         /* Decoder control register 3. */
69         reg = G1_REG_DEC_CTRL3_START_CODE_E |
70               G1_REG_DEC_CTRL3_INIT_QP(pps->pic_init_qp_minus26 + 26) |
71               G1_REG_DEC_CTRL3_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0));
72         vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL3);
73
74         /* Decoder control register 4. */
75         reg = G1_REG_DEC_CTRL4_FRAMENUM_LEN(sps->log2_max_frame_num_minus4 + 4) |
76               G1_REG_DEC_CTRL4_FRAMENUM(dec_param->frame_num) |
77               G1_REG_DEC_CTRL4_WEIGHT_BIPR_IDC(pps->weighted_bipred_idc);
78         if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE)
79                 reg |= G1_REG_DEC_CTRL4_CABAC_E;
80         if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
81                 reg |= G1_REG_DEC_CTRL4_DIR_8X8_INFER_E;
82         if (sps->profile_idc >= 100 && sps->chroma_format_idc == 0)
83                 reg |= G1_REG_DEC_CTRL4_BLACKWHITE_E;
84         if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED)
85                 reg |= G1_REG_DEC_CTRL4_WEIGHT_PRED_E;
86         vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL4);
87
88         /* Decoder control register 5. */
89         reg = G1_REG_DEC_CTRL5_REFPIC_MK_LEN(dec_param->dec_ref_pic_marking_bit_size) |
90               G1_REG_DEC_CTRL5_IDR_PIC_ID(dec_param->idr_pic_id);
91         if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED)
92                 reg |= G1_REG_DEC_CTRL5_CONST_INTRA_E;
93         if (pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT)
94                 reg |= G1_REG_DEC_CTRL5_FILT_CTRL_PRES;
95         if (pps->flags & V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT)
96                 reg |= G1_REG_DEC_CTRL5_RDPIC_CNT_PRES;
97         if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE)
98                 reg |= G1_REG_DEC_CTRL5_8X8TRANS_FLAG_E;
99         if (dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC)
100                 reg |= G1_REG_DEC_CTRL5_IDR_PIC_E;
101         vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL5);
102
103         /* Decoder control register 6. */
104         reg = G1_REG_DEC_CTRL6_PPS_ID(pps->pic_parameter_set_id) |
105               G1_REG_DEC_CTRL6_REFIDX0_ACTIVE(pps->num_ref_idx_l0_default_active_minus1 + 1) |
106               G1_REG_DEC_CTRL6_REFIDX1_ACTIVE(pps->num_ref_idx_l1_default_active_minus1 + 1) |
107               G1_REG_DEC_CTRL6_POC_LENGTH(dec_param->pic_order_cnt_bit_size);
108         vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL6);
109
110         /* Error concealment register. */
111         vdpu_write_relaxed(vpu, 0, G1_REG_ERR_CONC);
112
113         /* Prediction filter tap register. */
114         vdpu_write_relaxed(vpu,
115                            G1_REG_PRED_FLT_PRED_BC_TAP_0_0(1) |
116                            G1_REG_PRED_FLT_PRED_BC_TAP_0_1(-5 & 0x3ff) |
117                            G1_REG_PRED_FLT_PRED_BC_TAP_0_2(20),
118                            G1_REG_PRED_FLT);
119
120         /* Reference picture buffer control register. */
121         vdpu_write_relaxed(vpu, 0, G1_REG_REF_BUF_CTRL);
122
123         /* Reference picture buffer control register 2. */
124         vdpu_write_relaxed(vpu, G1_REG_REF_BUF_CTRL2_APF_THRESHOLD(8),
125                            G1_REG_REF_BUF_CTRL2);
126 }
127
128 static void set_ref(struct hantro_ctx *ctx)
129 {
130         struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
131         const u8 *b0_reflist, *b1_reflist, *p_reflist;
132         struct hantro_dev *vpu = ctx->dev;
133         u32 dpb_longterm = 0;
134         u32 dpb_valid = 0;
135         int reg_num;
136         u32 reg;
137         int i;
138
139         /*
140          * Set up bit maps of valid and long term DPBs.
141          * NOTE: The bits are reversed, i.e. MSb is DPB 0.
142          */
143         for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) {
144                 if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
145                         dpb_valid |= BIT(HANTRO_H264_DPB_SIZE - 1 - i);
146
147                 if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
148                         dpb_longterm |= BIT(HANTRO_H264_DPB_SIZE - 1 - i);
149         }
150         vdpu_write_relaxed(vpu, dpb_valid << 16, G1_REG_VALID_REF);
151         vdpu_write_relaxed(vpu, dpb_longterm << 16, G1_REG_LT_REF);
152
153         /*
154          * Set up reference frame picture numbers.
155          *
156          * Each G1_REG_REF_PIC(x) register contains numbers of two
157          * subsequential reference pictures.
158          */
159         for (i = 0; i < HANTRO_H264_DPB_SIZE; i += 2) {
160                 reg = 0;
161                 if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
162                         reg |= G1_REG_REF_PIC_REFER0_NBR(dpb[i].pic_num);
163                 else
164                         reg |= G1_REG_REF_PIC_REFER0_NBR(dpb[i].frame_num);
165
166                 if (dpb[i + 1].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
167                         reg |= G1_REG_REF_PIC_REFER1_NBR(dpb[i + 1].pic_num);
168                 else
169                         reg |= G1_REG_REF_PIC_REFER1_NBR(dpb[i + 1].frame_num);
170
171                 vdpu_write_relaxed(vpu, reg, G1_REG_REF_PIC(i / 2));
172         }
173
174         b0_reflist = ctx->h264_dec.reflists.b0;
175         b1_reflist = ctx->h264_dec.reflists.b1;
176         p_reflist = ctx->h264_dec.reflists.p;
177
178         /*
179          * Each G1_REG_BD_REF_PIC(x) register contains three entries
180          * of each forward and backward picture list.
181          */
182         reg_num = 0;
183         for (i = 0; i < 15; i += 3) {
184                 reg = G1_REG_BD_REF_PIC_BINIT_RLIST_F0(b0_reflist[i]) |
185                       G1_REG_BD_REF_PIC_BINIT_RLIST_F1(b0_reflist[i + 1]) |
186                       G1_REG_BD_REF_PIC_BINIT_RLIST_F2(b0_reflist[i + 2]) |
187                       G1_REG_BD_REF_PIC_BINIT_RLIST_B0(b1_reflist[i]) |
188                       G1_REG_BD_REF_PIC_BINIT_RLIST_B1(b1_reflist[i + 1]) |
189                       G1_REG_BD_REF_PIC_BINIT_RLIST_B2(b1_reflist[i + 2]);
190                 vdpu_write_relaxed(vpu, reg, G1_REG_BD_REF_PIC(reg_num++));
191         }
192
193         /*
194          * G1_REG_BD_P_REF_PIC register contains last entries (index 15)
195          * of forward and backward reference picture lists and first 4 entries
196          * of P forward picture list.
197          */
198         reg = G1_REG_BD_P_REF_PIC_BINIT_RLIST_F15(b0_reflist[15]) |
199               G1_REG_BD_P_REF_PIC_BINIT_RLIST_B15(b1_reflist[15]) |
200               G1_REG_BD_P_REF_PIC_PINIT_RLIST_F0(p_reflist[0]) |
201               G1_REG_BD_P_REF_PIC_PINIT_RLIST_F1(p_reflist[1]) |
202               G1_REG_BD_P_REF_PIC_PINIT_RLIST_F2(p_reflist[2]) |
203               G1_REG_BD_P_REF_PIC_PINIT_RLIST_F3(p_reflist[3]);
204         vdpu_write_relaxed(vpu, reg, G1_REG_BD_P_REF_PIC);
205
206         /*
207          * Each G1_REG_FWD_PIC(x) register contains six consecutive
208          * entries of P forward picture list, starting from index 4.
209          */
210         reg_num = 0;
211         for (i = 4; i < HANTRO_H264_DPB_SIZE; i += 6) {
212                 reg = G1_REG_FWD_PIC_PINIT_RLIST_F0(p_reflist[i]) |
213                       G1_REG_FWD_PIC_PINIT_RLIST_F1(p_reflist[i + 1]) |
214                       G1_REG_FWD_PIC_PINIT_RLIST_F2(p_reflist[i + 2]) |
215                       G1_REG_FWD_PIC_PINIT_RLIST_F3(p_reflist[i + 3]) |
216                       G1_REG_FWD_PIC_PINIT_RLIST_F4(p_reflist[i + 4]) |
217                       G1_REG_FWD_PIC_PINIT_RLIST_F5(p_reflist[i + 5]);
218                 vdpu_write_relaxed(vpu, reg, G1_REG_FWD_PIC(reg_num++));
219         }
220
221         /* Set up addresses of DPB buffers. */
222         for (i = 0; i < HANTRO_H264_DPB_SIZE; i++) {
223                 dma_addr_t dma_addr = hantro_h264_get_ref_buf(ctx, i);
224
225                 vdpu_write_relaxed(vpu, dma_addr, G1_REG_ADDR_REF(i));
226         }
227 }
228
229 static void set_buffers(struct hantro_ctx *ctx)
230 {
231         const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
232         struct vb2_v4l2_buffer *src_buf, *dst_buf;
233         struct hantro_dev *vpu = ctx->dev;
234         dma_addr_t src_dma, dst_dma;
235         size_t offset = 0;
236
237         src_buf = hantro_get_src_buf(ctx);
238         dst_buf = hantro_get_dst_buf(ctx);
239
240         /* Source (stream) buffer. */
241         src_dma = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
242         vdpu_write_relaxed(vpu, src_dma, G1_REG_ADDR_STR);
243
244         /* Destination (decoded frame) buffer. */
245         dst_dma = hantro_get_dec_buf_addr(ctx, &dst_buf->vb2_buf);
246         /* Adjust dma addr to start at second line for bottom field */
247         if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)
248                 offset = ALIGN(ctx->src_fmt.width, MB_DIM);
249         vdpu_write_relaxed(vpu, dst_dma + offset, G1_REG_ADDR_DST);
250
251         /* Higher profiles require DMV buffer appended to reference frames. */
252         if (ctrls->sps->profile_idc > 66 && ctrls->decode->nal_ref_idc) {
253                 unsigned int bytes_per_mb = 384;
254
255                 /* DMV buffer for monochrome start directly after Y-plane */
256                 if (ctrls->sps->profile_idc >= 100 &&
257                     ctrls->sps->chroma_format_idc == 0)
258                         bytes_per_mb = 256;
259                 offset = bytes_per_mb * MB_WIDTH(ctx->src_fmt.width) *
260                          MB_HEIGHT(ctx->src_fmt.height);
261
262                 /*
263                  * DMV buffer is split in two for field encoded frames,
264                  * adjust offset for bottom field
265                  */
266                 if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)
267                         offset += 32 * MB_WIDTH(ctx->src_fmt.width) *
268                                   MB_HEIGHT(ctx->src_fmt.height);
269                 vdpu_write_relaxed(vpu, dst_dma + offset, G1_REG_ADDR_DIR_MV);
270         }
271
272         /* Auxiliary buffer prepared in hantro_g1_h264_dec_prepare_table(). */
273         vdpu_write_relaxed(vpu, ctx->h264_dec.priv.dma, G1_REG_ADDR_QTABLE);
274 }
275
276 void hantro_g1_h264_dec_run(struct hantro_ctx *ctx)
277 {
278         struct hantro_dev *vpu = ctx->dev;
279
280         /* Prepare the H264 decoder context. */
281         if (hantro_h264_dec_prepare_run(ctx))
282                 return;
283
284         /* Configure hardware registers. */
285         set_params(ctx);
286         set_ref(ctx);
287         set_buffers(ctx);
288
289         hantro_end_prepare_run(ctx);
290
291         /* Start decoding! */
292         vdpu_write_relaxed(vpu,
293                            G1_REG_CONFIG_DEC_AXI_RD_ID(0xffu) |
294                            G1_REG_CONFIG_DEC_TIMEOUT_E |
295                            G1_REG_CONFIG_DEC_OUT_ENDIAN |
296                            G1_REG_CONFIG_DEC_STRENDIAN_E |
297                            G1_REG_CONFIG_DEC_MAX_BURST(16) |
298                            G1_REG_CONFIG_DEC_OUTSWAP32_E |
299                            G1_REG_CONFIG_DEC_INSWAP32_E |
300                            G1_REG_CONFIG_DEC_STRSWAP32_E |
301                            G1_REG_CONFIG_DEC_CLK_GATE_E,
302                            G1_REG_CONFIG);
303         vdpu_write(vpu, G1_REG_INTERRUPT_DEC_E, G1_REG_INTERRUPT);
304 }