Mention branches and keyring.
[releases.git] / hantro / rockchip_vpu2_hw_jpeg_enc.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Hantro VPU codec driver
4  *
5  * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
6  *
7  * JPEG encoder
8  * ------------
9  * The VPU JPEG encoder produces JPEG baseline sequential format.
10  * The quantization coefficients are 8-bit values, complying with
11  * the baseline specification. Therefore, it requires
12  * luma and chroma quantization tables. The hardware does entropy
13  * encoding using internal Huffman tables, as specified in the JPEG
14  * specification.
15  *
16  * In other words, only the luma and chroma quantization tables are
17  * required for the encoding operation.
18  *
19  * Quantization luma table values are written to registers
20  * VEPU_swreg_0-VEPU_swreg_15, and chroma table values to
21  * VEPU_swreg_16-VEPU_swreg_31. A special order is needed, neither
22  * zigzag, nor linear.
23  */
24
25 #include <asm/unaligned.h>
26 #include <media/v4l2-mem2mem.h>
27 #include "hantro_jpeg.h"
28 #include "hantro.h"
29 #include "hantro_v4l2.h"
30 #include "hantro_hw.h"
31 #include "rockchip_vpu2_regs.h"
32
33 #define VEPU_JPEG_QUANT_TABLE_COUNT 16
34
35 static void rockchip_vpu2_set_src_img_ctrl(struct hantro_dev *vpu,
36                                            struct hantro_ctx *ctx)
37 {
38         u32 overfill_r, overfill_b;
39         u32 reg;
40
41         /*
42          * The format width and height are already macroblock aligned
43          * by .vidioc_s_fmt_vid_cap_mplane() callback. Destination
44          * format width and height can be further modified by
45          * .vidioc_s_selection(), and the width is 4-aligned.
46          */
47         overfill_r = ctx->src_fmt.width - ctx->dst_fmt.width;
48         overfill_b = ctx->src_fmt.height - ctx->dst_fmt.height;
49
50         reg = VEPU_REG_IN_IMG_CTRL_ROW_LEN(ctx->src_fmt.width);
51         vepu_write_relaxed(vpu, reg, VEPU_REG_INPUT_LUMA_INFO);
52
53         reg = VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r / 4) |
54               VEPU_REG_IN_IMG_CTRL_OVRFLB(overfill_b);
55         /*
56          * This register controls the input crop, as the offset
57          * from the right/bottom within the last macroblock. The offset from the
58          * right must be divided by 4 and so the crop must be aligned to 4 pixels
59          * horizontally.
60          */
61         vepu_write_relaxed(vpu, reg, VEPU_REG_ENC_OVER_FILL_STRM_OFFSET);
62
63         reg = VEPU_REG_IN_IMG_CTRL_FMT(ctx->vpu_src_fmt->enc_fmt);
64         vepu_write_relaxed(vpu, reg, VEPU_REG_ENC_CTRL1);
65 }
66
67 static void rockchip_vpu2_jpeg_enc_set_buffers(struct hantro_dev *vpu,
68                                                struct hantro_ctx *ctx,
69                                                struct vb2_buffer *src_buf,
70                                                struct vb2_buffer *dst_buf)
71 {
72         struct v4l2_pix_format_mplane *pix_fmt = &ctx->src_fmt;
73         dma_addr_t src[3];
74         u32 size_left;
75
76         size_left = vb2_plane_size(dst_buf, 0) - ctx->vpu_dst_fmt->header_size;
77         if (WARN_ON(vb2_plane_size(dst_buf, 0) < ctx->vpu_dst_fmt->header_size))
78                 size_left = 0;
79
80         WARN_ON(pix_fmt->num_planes > 3);
81
82         vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(dst_buf, 0) +
83                                 ctx->vpu_dst_fmt->header_size,
84                            VEPU_REG_ADDR_OUTPUT_STREAM);
85         vepu_write_relaxed(vpu, size_left, VEPU_REG_STR_BUF_LIMIT);
86
87         if (pix_fmt->num_planes == 1) {
88                 src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0);
89                 vepu_write_relaxed(vpu, src[0], VEPU_REG_ADDR_IN_PLANE_0);
90         } else if (pix_fmt->num_planes == 2) {
91                 src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0);
92                 src[1] = vb2_dma_contig_plane_dma_addr(src_buf, 1);
93                 vepu_write_relaxed(vpu, src[0], VEPU_REG_ADDR_IN_PLANE_0);
94                 vepu_write_relaxed(vpu, src[1], VEPU_REG_ADDR_IN_PLANE_1);
95         } else {
96                 src[0] = vb2_dma_contig_plane_dma_addr(src_buf, 0);
97                 src[1] = vb2_dma_contig_plane_dma_addr(src_buf, 1);
98                 src[2] = vb2_dma_contig_plane_dma_addr(src_buf, 2);
99                 vepu_write_relaxed(vpu, src[0], VEPU_REG_ADDR_IN_PLANE_0);
100                 vepu_write_relaxed(vpu, src[1], VEPU_REG_ADDR_IN_PLANE_1);
101                 vepu_write_relaxed(vpu, src[2], VEPU_REG_ADDR_IN_PLANE_2);
102         }
103 }
104
105 static void
106 rockchip_vpu2_jpeg_enc_set_qtable(struct hantro_dev *vpu,
107                                   unsigned char *luma_qtable,
108                                   unsigned char *chroma_qtable)
109 {
110         u32 reg, i;
111         __be32 *luma_qtable_p;
112         __be32 *chroma_qtable_p;
113
114         luma_qtable_p = (__be32 *)luma_qtable;
115         chroma_qtable_p = (__be32 *)chroma_qtable;
116
117         /*
118          * Quantization table registers must be written in contiguous blocks.
119          * DO NOT collapse the below two "for" loops into one.
120          */
121         for (i = 0; i < VEPU_JPEG_QUANT_TABLE_COUNT; i++) {
122                 reg = get_unaligned_be32(&luma_qtable_p[i]);
123                 vepu_write_relaxed(vpu, reg, VEPU_REG_JPEG_LUMA_QUAT(i));
124         }
125
126         for (i = 0; i < VEPU_JPEG_QUANT_TABLE_COUNT; i++) {
127                 reg = get_unaligned_be32(&chroma_qtable_p[i]);
128                 vepu_write_relaxed(vpu, reg, VEPU_REG_JPEG_CHROMA_QUAT(i));
129         }
130 }
131
132 int rockchip_vpu2_jpeg_enc_run(struct hantro_ctx *ctx)
133 {
134         struct hantro_dev *vpu = ctx->dev;
135         struct vb2_v4l2_buffer *src_buf, *dst_buf;
136         struct hantro_jpeg_ctx jpeg_ctx;
137         u32 reg;
138
139         src_buf = hantro_get_src_buf(ctx);
140         dst_buf = hantro_get_dst_buf(ctx);
141
142         hantro_start_prepare_run(ctx);
143
144         memset(&jpeg_ctx, 0, sizeof(jpeg_ctx));
145         jpeg_ctx.buffer = vb2_plane_vaddr(&dst_buf->vb2_buf, 0);
146         if (!jpeg_ctx.buffer)
147                 return -ENOMEM;
148
149         jpeg_ctx.width = ctx->dst_fmt.width;
150         jpeg_ctx.height = ctx->dst_fmt.height;
151         jpeg_ctx.quality = ctx->jpeg_quality;
152         hantro_jpeg_header_assemble(&jpeg_ctx);
153
154         /* Switch to JPEG encoder mode before writing registers */
155         vepu_write_relaxed(vpu, VEPU_REG_ENCODE_FORMAT_JPEG,
156                            VEPU_REG_ENCODE_START);
157
158         rockchip_vpu2_set_src_img_ctrl(vpu, ctx);
159         rockchip_vpu2_jpeg_enc_set_buffers(vpu, ctx, &src_buf->vb2_buf,
160                                            &dst_buf->vb2_buf);
161         rockchip_vpu2_jpeg_enc_set_qtable(vpu, jpeg_ctx.hw_luma_qtable,
162                                           jpeg_ctx.hw_chroma_qtable);
163
164         reg = VEPU_REG_OUTPUT_SWAP32
165                 | VEPU_REG_OUTPUT_SWAP16
166                 | VEPU_REG_OUTPUT_SWAP8
167                 | VEPU_REG_INPUT_SWAP8
168                 | VEPU_REG_INPUT_SWAP16
169                 | VEPU_REG_INPUT_SWAP32;
170         /* Make sure that all registers are written at this point. */
171         vepu_write(vpu, reg, VEPU_REG_DATA_ENDIAN);
172
173         reg = VEPU_REG_AXI_CTRL_BURST_LEN(16);
174         vepu_write_relaxed(vpu, reg, VEPU_REG_AXI_CTRL);
175
176         reg = VEPU_REG_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width))
177                 | VEPU_REG_MB_HEIGHT(MB_HEIGHT(ctx->src_fmt.height))
178                 | VEPU_REG_FRAME_TYPE_INTRA
179                 | VEPU_REG_ENCODE_FORMAT_JPEG
180                 | VEPU_REG_ENCODE_ENABLE;
181
182         /* Kick the watchdog and start encoding */
183         hantro_end_prepare_run(ctx);
184         vepu_write(vpu, reg, VEPU_REG_ENCODE_START);
185
186         return 0;
187 }
188
189 void rockchip_vpu2_jpeg_enc_done(struct hantro_ctx *ctx)
190 {
191         struct hantro_dev *vpu = ctx->dev;
192         u32 bytesused = vepu_read(vpu, VEPU_REG_STR_BUF_LIMIT) / 8;
193         struct vb2_v4l2_buffer *dst_buf = hantro_get_dst_buf(ctx);
194
195         vb2_set_plane_payload(&dst_buf->vb2_buf, 0,
196                               ctx->vpu_dst_fmt->header_size + bytesused);
197 }