GNU Linux-libre 5.10.217-gnu1
[releases.git] / drivers / media / platform / coda / coda-jpeg.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Coda multi-standard codec IP - JPEG support functions
4  *
5  * Copyright (C) 2014 Philipp Zabel, Pengutronix
6  */
7
8 #include <asm/unaligned.h>
9 #include <linux/irqreturn.h>
10 #include <linux/kernel.h>
11 #include <linux/ktime.h>
12 #include <linux/slab.h>
13 #include <linux/swab.h>
14 #include <linux/videodev2.h>
15
16 #include <media/v4l2-common.h>
17 #include <media/v4l2-fh.h>
18 #include <media/v4l2-jpeg.h>
19 #include <media/v4l2-mem2mem.h>
20 #include <media/videobuf2-core.h>
21 #include <media/videobuf2-dma-contig.h>
22
23 #include "coda.h"
24 #include "trace.h"
25
26 #define SOI_MARKER      0xffd8
27 #define APP9_MARKER     0xffe9
28 #define DRI_MARKER      0xffdd
29 #define DQT_MARKER      0xffdb
30 #define DHT_MARKER      0xffc4
31 #define SOF_MARKER      0xffc0
32 #define SOS_MARKER      0xffda
33 #define EOI_MARKER      0xffd9
34
35 enum {
36         CODA9_JPEG_FORMAT_420,
37         CODA9_JPEG_FORMAT_422,
38         CODA9_JPEG_FORMAT_224,
39         CODA9_JPEG_FORMAT_444,
40         CODA9_JPEG_FORMAT_400,
41 };
42
43 struct coda_huff_tab {
44         u8 luma_dc[16 + 12];
45         u8 chroma_dc[16 + 12];
46         u8 luma_ac[16 + 162];
47         u8 chroma_ac[16 + 162];
48
49         /* DC Luma, DC Chroma, AC Luma, AC Chroma */
50         s16     min[4 * 16];
51         s16     max[4 * 16];
52         s8      ptr[4 * 16];
53 };
54
55 #define CODA9_JPEG_ENC_HUFF_DATA_SIZE   (256 + 256 + 16 + 16)
56
57 /*
58  * Typical Huffman tables for 8-bit precision luminance and
59  * chrominance from JPEG ITU-T.81 (ISO/IEC 10918-1) Annex K.3
60  */
61
62 static const unsigned char luma_dc[16 + 12] = {
63         /* bits */
64         0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01,
65         0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
66         /* values */
67         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
68         0x08, 0x09, 0x0a, 0x0b,
69 };
70
71 static const unsigned char chroma_dc[16 + 12] = {
72         /* bits */
73         0x00, 0x03, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
74         0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
75         /* values */
76         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
77         0x08, 0x09, 0x0a, 0x0b,
78 };
79
80 static const unsigned char luma_ac[16 + 162 + 2] = {
81         /* bits */
82         0x00, 0x02, 0x01, 0x03, 0x03, 0x02, 0x04, 0x03,
83         0x05, 0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7d,
84         /* values */
85         0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
86         0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
87         0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
88         0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
89         0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
90         0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
91         0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
92         0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
93         0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
94         0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
95         0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
96         0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
97         0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
98         0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
99         0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
100         0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
101         0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
102         0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
103         0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
104         0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
105         0xf9, 0xfa, /* padded to 32-bit */
106 };
107
108 static const unsigned char chroma_ac[16 + 162 + 2] = {
109         /* bits */
110         0x00, 0x02, 0x01, 0x02, 0x04, 0x04, 0x03, 0x04,
111         0x07, 0x05, 0x04, 0x04, 0x00, 0x01, 0x02, 0x77,
112         /* values */
113         0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
114         0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
115         0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
116         0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
117         0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
118         0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
119         0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
120         0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
121         0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
122         0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
123         0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
124         0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
125         0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
126         0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
127         0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
128         0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
129         0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
130         0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
131         0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
132         0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
133         0xf9, 0xfa, /* padded to 32-bit */
134 };
135
136 /*
137  * Quantization tables for luminance and chrominance components in
138  * zig-zag scan order from the Freescale i.MX VPU libraries
139  */
140
141 static unsigned char luma_q[64] = {
142         0x06, 0x04, 0x04, 0x04, 0x05, 0x04, 0x06, 0x05,
143         0x05, 0x06, 0x09, 0x06, 0x05, 0x06, 0x09, 0x0b,
144         0x08, 0x06, 0x06, 0x08, 0x0b, 0x0c, 0x0a, 0x0a,
145         0x0b, 0x0a, 0x0a, 0x0c, 0x10, 0x0c, 0x0c, 0x0c,
146         0x0c, 0x0c, 0x0c, 0x10, 0x0c, 0x0c, 0x0c, 0x0c,
147         0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
148         0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
149         0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
150 };
151
152 static unsigned char chroma_q[64] = {
153         0x07, 0x07, 0x07, 0x0d, 0x0c, 0x0d, 0x18, 0x10,
154         0x10, 0x18, 0x14, 0x0e, 0x0e, 0x0e, 0x14, 0x14,
155         0x0e, 0x0e, 0x0e, 0x0e, 0x14, 0x11, 0x0c, 0x0c,
156         0x0c, 0x0c, 0x0c, 0x11, 0x11, 0x0c, 0x0c, 0x0c,
157         0x0c, 0x0c, 0x0c, 0x11, 0x0c, 0x0c, 0x0c, 0x0c,
158         0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
159         0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
160         0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
161 };
162
163 static const unsigned char width_align[] = {
164         [CODA9_JPEG_FORMAT_420] = 16,
165         [CODA9_JPEG_FORMAT_422] = 16,
166         [CODA9_JPEG_FORMAT_224] = 8,
167         [CODA9_JPEG_FORMAT_444] = 8,
168         [CODA9_JPEG_FORMAT_400] = 8,
169 };
170
171 static const unsigned char height_align[] = {
172         [CODA9_JPEG_FORMAT_420] = 16,
173         [CODA9_JPEG_FORMAT_422] = 8,
174         [CODA9_JPEG_FORMAT_224] = 16,
175         [CODA9_JPEG_FORMAT_444] = 8,
176         [CODA9_JPEG_FORMAT_400] = 8,
177 };
178
179 static int coda9_jpeg_chroma_format(u32 pixfmt)
180 {
181         switch (pixfmt) {
182         case V4L2_PIX_FMT_YUV420:
183         case V4L2_PIX_FMT_NV12:
184                 return CODA9_JPEG_FORMAT_420;
185         case V4L2_PIX_FMT_YUV422P:
186                 return CODA9_JPEG_FORMAT_422;
187         case V4L2_PIX_FMT_YUV444:
188                 return CODA9_JPEG_FORMAT_444;
189         case V4L2_PIX_FMT_GREY:
190                 return CODA9_JPEG_FORMAT_400;
191         }
192         return -EINVAL;
193 }
194
195 struct coda_memcpy_desc {
196         int offset;
197         const void *src;
198         size_t len;
199 };
200
201 static void coda_memcpy_parabuf(void *parabuf,
202                                 const struct coda_memcpy_desc *desc)
203 {
204         u32 *dst = parabuf + desc->offset;
205         const u32 *src = desc->src;
206         int len = desc->len / 4;
207         int i;
208
209         for (i = 0; i < len; i += 2) {
210                 dst[i + 1] = swab32(src[i]);
211                 dst[i] = swab32(src[i + 1]);
212         }
213 }
214
215 int coda_jpeg_write_tables(struct coda_ctx *ctx)
216 {
217         int i;
218         static const struct coda_memcpy_desc huff[8] = {
219                 { 0,   luma_dc,    sizeof(luma_dc)    },
220                 { 32,  luma_ac,    sizeof(luma_ac)    },
221                 { 216, chroma_dc,  sizeof(chroma_dc)  },
222                 { 248, chroma_ac,  sizeof(chroma_ac)  },
223         };
224         struct coda_memcpy_desc qmat[3] = {
225                 { 512, ctx->params.jpeg_qmat_tab[0], 64 },
226                 { 576, ctx->params.jpeg_qmat_tab[1], 64 },
227                 { 640, ctx->params.jpeg_qmat_tab[1], 64 },
228         };
229
230         /* Write huffman tables to parameter memory */
231         for (i = 0; i < ARRAY_SIZE(huff); i++)
232                 coda_memcpy_parabuf(ctx->parabuf.vaddr, huff + i);
233
234         /* Write Q-matrix to parameter memory */
235         for (i = 0; i < ARRAY_SIZE(qmat); i++)
236                 coda_memcpy_parabuf(ctx->parabuf.vaddr, qmat + i);
237
238         return 0;
239 }
240
241 bool coda_jpeg_check_buffer(struct coda_ctx *ctx, struct vb2_buffer *vb)
242 {
243         void *vaddr = vb2_plane_vaddr(vb, 0);
244         u16 soi, eoi;
245         int len, i;
246
247         soi = be16_to_cpup((__be16 *)vaddr);
248         if (soi != SOI_MARKER)
249                 return false;
250
251         len = vb2_get_plane_payload(vb, 0);
252         vaddr += len - 2;
253         for (i = 0; i < 32; i++) {
254                 eoi = be16_to_cpup((__be16 *)(vaddr - i));
255                 if (eoi == EOI_MARKER) {
256                         if (i > 0)
257                                 vb2_set_plane_payload(vb, 0, len - i);
258                         return true;
259                 }
260         }
261
262         return false;
263 }
264
265 static int coda9_jpeg_gen_dec_huff_tab(struct coda_ctx *ctx, int tab_num);
266
267 int coda_jpeg_decode_header(struct coda_ctx *ctx, struct vb2_buffer *vb)
268 {
269         struct coda_dev *dev = ctx->dev;
270         u8 *buf = vb2_plane_vaddr(vb, 0);
271         size_t len = vb2_get_plane_payload(vb, 0);
272         struct v4l2_jpeg_scan_header scan_header;
273         struct v4l2_jpeg_reference quantization_tables[4] = { };
274         struct v4l2_jpeg_reference huffman_tables[4] = { };
275         struct v4l2_jpeg_header header = {
276                 .scan = &scan_header,
277                 .quantization_tables = quantization_tables,
278                 .huffman_tables = huffman_tables,
279         };
280         struct coda_q_data *q_data_src;
281         struct coda_huff_tab *huff_tab;
282         int i, j, ret;
283
284         ret = v4l2_jpeg_parse_header(buf, len, &header);
285         if (ret < 0) {
286                 v4l2_err(&dev->v4l2_dev, "failed to parse header\n");
287                 return ret;
288         }
289
290         ctx->params.jpeg_restart_interval = header.restart_interval;
291
292         /* check frame header */
293         if (header.frame.height > ctx->codec->max_h ||
294             header.frame.width > ctx->codec->max_w) {
295                 v4l2_err(&dev->v4l2_dev, "invalid dimensions: %dx%d\n",
296                          header.frame.width, header.frame.height);
297                 return -EINVAL;
298         }
299
300         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
301         if (header.frame.height != q_data_src->height ||
302             header.frame.width != q_data_src->width) {
303                 v4l2_err(&dev->v4l2_dev,
304                          "dimensions don't match format: %dx%d\n",
305                          header.frame.width, header.frame.height);
306                 return -EINVAL;
307         }
308
309         if (header.frame.num_components != 3) {
310                 v4l2_err(&dev->v4l2_dev,
311                          "unsupported number of components: %d\n",
312                          header.frame.num_components);
313                 return -EINVAL;
314         }
315
316         /* install quantization tables */
317         if (quantization_tables[3].start) {
318                 v4l2_err(&dev->v4l2_dev,
319                          "only 3 quantization tables supported\n");
320                 return -EINVAL;
321         }
322         for (i = 0; i < 3; i++) {
323                 if (!quantization_tables[i].start)
324                         continue;
325                 if (quantization_tables[i].length != 64) {
326                         v4l2_err(&dev->v4l2_dev,
327                                  "only 8-bit quantization tables supported\n");
328                         continue;
329                 }
330                 if (!ctx->params.jpeg_qmat_tab[i]) {
331                         ctx->params.jpeg_qmat_tab[i] = kmalloc(64, GFP_KERNEL);
332                         if (!ctx->params.jpeg_qmat_tab[i])
333                                 return -ENOMEM;
334                 }
335                 memcpy(ctx->params.jpeg_qmat_tab[i],
336                        quantization_tables[i].start, 64);
337         }
338
339         /* install Huffman tables */
340         for (i = 0; i < 4; i++) {
341                 if (!huffman_tables[i].start) {
342                         v4l2_err(&dev->v4l2_dev, "missing Huffman table\n");
343                         return -EINVAL;
344                 }
345                 /* AC tables should be between 17 -> 178, DC between 17 -> 28 */
346                 if (huffman_tables[i].length < 17 ||
347                     huffman_tables[i].length > 178 ||
348                     ((i & 2) == 0 && huffman_tables[i].length > 28)) {
349                         v4l2_err(&dev->v4l2_dev,
350                                  "invalid Huffman table %d length: %zu\n",
351                                  i, huffman_tables[i].length);
352                         return -EINVAL;
353                 }
354         }
355         huff_tab = ctx->params.jpeg_huff_tab;
356         if (!huff_tab) {
357                 huff_tab = kzalloc(sizeof(struct coda_huff_tab), GFP_KERNEL);
358                 if (!huff_tab)
359                         return -ENOMEM;
360                 ctx->params.jpeg_huff_tab = huff_tab;
361         }
362
363         memset(huff_tab, 0, sizeof(*huff_tab));
364         memcpy(huff_tab->luma_dc, huffman_tables[0].start, huffman_tables[0].length);
365         memcpy(huff_tab->chroma_dc, huffman_tables[1].start, huffman_tables[1].length);
366         memcpy(huff_tab->luma_ac, huffman_tables[2].start, huffman_tables[2].length);
367         memcpy(huff_tab->chroma_ac, huffman_tables[3].start, huffman_tables[3].length);
368
369         /* check scan header */
370         for (i = 0; i < scan_header.num_components; i++) {
371                 struct v4l2_jpeg_scan_component_spec *scan_component;
372
373                 scan_component = &scan_header.component[i];
374                 for (j = 0; j < header.frame.num_components; j++) {
375                         if (header.frame.component[j].component_identifier ==
376                             scan_component->component_selector)
377                                 break;
378                 }
379                 if (j == header.frame.num_components)
380                         continue;
381
382                 ctx->params.jpeg_huff_dc_index[j] =
383                         scan_component->dc_entropy_coding_table_selector;
384                 ctx->params.jpeg_huff_ac_index[j] =
385                         scan_component->ac_entropy_coding_table_selector;
386         }
387
388         /* Generate Huffman table information */
389         for (i = 0; i < 4; i++)
390                 coda9_jpeg_gen_dec_huff_tab(ctx, i);
391
392         /* start of entropy coded segment */
393         ctx->jpeg_ecs_offset = header.ecs_offset;
394
395         switch (header.frame.subsampling) {
396         case V4L2_JPEG_CHROMA_SUBSAMPLING_420:
397         case V4L2_JPEG_CHROMA_SUBSAMPLING_422:
398                 ctx->params.jpeg_chroma_subsampling = header.frame.subsampling;
399                 break;
400         default:
401                 v4l2_err(&dev->v4l2_dev, "chroma subsampling not supported: %d",
402                          header.frame.subsampling);
403                 return -EINVAL;
404         }
405
406         return 0;
407 }
408
409 static inline void coda9_jpeg_write_huff_values(struct coda_dev *dev, u8 *bits,
410                                                 int num_values)
411 {
412         s8 *values = (s8 *)(bits + 16);
413         int huff_length, i;
414
415         for (huff_length = 0, i = 0; i < 16; i++)
416                 huff_length += bits[i];
417         for (i = huff_length; i < num_values; i++)
418                 values[i] = -1;
419         for (i = 0; i < num_values; i++)
420                 coda_write(dev, (s32)values[i], CODA9_REG_JPEG_HUFF_DATA);
421 }
422
423 static int coda9_jpeg_dec_huff_setup(struct coda_ctx *ctx)
424 {
425         struct coda_huff_tab *huff_tab = ctx->params.jpeg_huff_tab;
426         struct coda_dev *dev = ctx->dev;
427         s16 *huff_min = huff_tab->min;
428         s16 *huff_max = huff_tab->max;
429         s8 *huff_ptr = huff_tab->ptr;
430         int i;
431
432         /* MIN Tables */
433         coda_write(dev, 0x003, CODA9_REG_JPEG_HUFF_CTRL);
434         coda_write(dev, 0x000, CODA9_REG_JPEG_HUFF_ADDR);
435         for (i = 0; i < 4 * 16; i++)
436                 coda_write(dev, (s32)huff_min[i], CODA9_REG_JPEG_HUFF_DATA);
437
438         /* MAX Tables */
439         coda_write(dev, 0x403, CODA9_REG_JPEG_HUFF_CTRL);
440         coda_write(dev, 0x440, CODA9_REG_JPEG_HUFF_ADDR);
441         for (i = 0; i < 4 * 16; i++)
442                 coda_write(dev, (s32)huff_max[i], CODA9_REG_JPEG_HUFF_DATA);
443
444         /* PTR Tables */
445         coda_write(dev, 0x803, CODA9_REG_JPEG_HUFF_CTRL);
446         coda_write(dev, 0x880, CODA9_REG_JPEG_HUFF_ADDR);
447         for (i = 0; i < 4 * 16; i++)
448                 coda_write(dev, (s32)huff_ptr[i], CODA9_REG_JPEG_HUFF_DATA);
449
450         /* VAL Tables: DC Luma, DC Chroma, AC Luma, AC Chroma */
451         coda_write(dev, 0xc03, CODA9_REG_JPEG_HUFF_CTRL);
452         coda9_jpeg_write_huff_values(dev, huff_tab->luma_dc, 12);
453         coda9_jpeg_write_huff_values(dev, huff_tab->chroma_dc, 12);
454         coda9_jpeg_write_huff_values(dev, huff_tab->luma_ac, 162);
455         coda9_jpeg_write_huff_values(dev, huff_tab->chroma_ac, 162);
456         coda_write(dev, 0x000, CODA9_REG_JPEG_HUFF_CTRL);
457         return 0;
458 }
459
460 static inline void coda9_jpeg_write_qmat_tab(struct coda_dev *dev,
461                                              u8 *qmat, int index)
462 {
463         int i;
464
465         coda_write(dev, index | 0x3, CODA9_REG_JPEG_QMAT_CTRL);
466         for (i = 0; i < 64; i++)
467                 coda_write(dev, qmat[i], CODA9_REG_JPEG_QMAT_DATA);
468         coda_write(dev, 0, CODA9_REG_JPEG_QMAT_CTRL);
469 }
470
471 static void coda9_jpeg_qmat_setup(struct coda_ctx *ctx)
472 {
473         struct coda_dev *dev = ctx->dev;
474         int *qmat_index = ctx->params.jpeg_qmat_index;
475         u8 **qmat_tab = ctx->params.jpeg_qmat_tab;
476
477         coda9_jpeg_write_qmat_tab(dev, qmat_tab[qmat_index[0]], 0x00);
478         coda9_jpeg_write_qmat_tab(dev, qmat_tab[qmat_index[1]], 0x40);
479         coda9_jpeg_write_qmat_tab(dev, qmat_tab[qmat_index[2]], 0x80);
480 }
481
482 static void coda9_jpeg_dec_bbc_gbu_setup(struct coda_ctx *ctx,
483                                          struct vb2_buffer *buf, u32 ecs_offset)
484 {
485         struct coda_dev *dev = ctx->dev;
486         int page_ptr, word_ptr, bit_ptr;
487         u32 bbc_base_addr, end_addr;
488         int bbc_cur_pos;
489         int ret, val;
490
491         bbc_base_addr = vb2_dma_contig_plane_dma_addr(buf, 0);
492         end_addr = bbc_base_addr + vb2_get_plane_payload(buf, 0);
493
494         page_ptr = ecs_offset / 256;
495         word_ptr = (ecs_offset % 256) / 4;
496         if (page_ptr & 1)
497                 word_ptr += 64;
498         bit_ptr = (ecs_offset % 4) * 8;
499         if (word_ptr & 1)
500                 bit_ptr += 32;
501         word_ptr &= ~0x1;
502
503         coda_write(dev, end_addr, CODA9_REG_JPEG_BBC_WR_PTR);
504         coda_write(dev, bbc_base_addr, CODA9_REG_JPEG_BBC_BAS_ADDR);
505
506         /* Leave 3 256-byte page margin to avoid a BBC interrupt */
507         coda_write(dev, end_addr + 256 * 3 + 256, CODA9_REG_JPEG_BBC_END_ADDR);
508         val = DIV_ROUND_UP(vb2_plane_size(buf, 0), 256) + 3;
509         coda_write(dev, BIT(31) | val, CODA9_REG_JPEG_BBC_STRM_CTRL);
510
511         bbc_cur_pos = page_ptr;
512         coda_write(dev, bbc_cur_pos, CODA9_REG_JPEG_BBC_CUR_POS);
513         coda_write(dev, bbc_base_addr + (bbc_cur_pos << 8),
514                         CODA9_REG_JPEG_BBC_EXT_ADDR);
515         coda_write(dev, (bbc_cur_pos & 1) << 6, CODA9_REG_JPEG_BBC_INT_ADDR);
516         coda_write(dev, 64, CODA9_REG_JPEG_BBC_DATA_CNT);
517         coda_write(dev, 0, CODA9_REG_JPEG_BBC_COMMAND);
518         do {
519                 ret = coda_read(dev, CODA9_REG_JPEG_BBC_BUSY);
520         } while (ret == 1);
521
522         bbc_cur_pos++;
523         coda_write(dev, bbc_cur_pos, CODA9_REG_JPEG_BBC_CUR_POS);
524         coda_write(dev, bbc_base_addr + (bbc_cur_pos << 8),
525                         CODA9_REG_JPEG_BBC_EXT_ADDR);
526         coda_write(dev, (bbc_cur_pos & 1) << 6, CODA9_REG_JPEG_BBC_INT_ADDR);
527         coda_write(dev, 64, CODA9_REG_JPEG_BBC_DATA_CNT);
528         coda_write(dev, 0, CODA9_REG_JPEG_BBC_COMMAND);
529         do {
530                 ret = coda_read(dev, CODA9_REG_JPEG_BBC_BUSY);
531         } while (ret == 1);
532
533         bbc_cur_pos++;
534         coda_write(dev, bbc_cur_pos, CODA9_REG_JPEG_BBC_CUR_POS);
535         coda_write(dev, 1, CODA9_REG_JPEG_BBC_CTRL);
536
537         coda_write(dev, 0, CODA9_REG_JPEG_GBU_TT_CNT);
538         coda_write(dev, word_ptr, CODA9_REG_JPEG_GBU_WD_PTR);
539         coda_write(dev, 0, CODA9_REG_JPEG_GBU_BBSR);
540         coda_write(dev, 127, CODA9_REG_JPEG_GBU_BBER);
541         if (page_ptr & 1) {
542                 coda_write(dev, 0, CODA9_REG_JPEG_GBU_BBIR);
543                 coda_write(dev, 0, CODA9_REG_JPEG_GBU_BBHR);
544         } else {
545                 coda_write(dev, 64, CODA9_REG_JPEG_GBU_BBIR);
546                 coda_write(dev, 64, CODA9_REG_JPEG_GBU_BBHR);
547         }
548         coda_write(dev, 4, CODA9_REG_JPEG_GBU_CTRL);
549         coda_write(dev, bit_ptr, CODA9_REG_JPEG_GBU_FF_RPTR);
550         coda_write(dev, 3, CODA9_REG_JPEG_GBU_CTRL);
551 }
552
553 static const int bus_req_num[] = {
554         [CODA9_JPEG_FORMAT_420] = 2,
555         [CODA9_JPEG_FORMAT_422] = 3,
556         [CODA9_JPEG_FORMAT_224] = 3,
557         [CODA9_JPEG_FORMAT_444] = 4,
558         [CODA9_JPEG_FORMAT_400] = 4,
559 };
560
561 #define MCU_INFO(mcu_block_num, comp_num, comp0_info, comp1_info, comp2_info) \
562         (((mcu_block_num) << CODA9_JPEG_MCU_BLOCK_NUM_OFFSET) | \
563          ((comp_num) << CODA9_JPEG_COMP_NUM_OFFSET) | \
564          ((comp0_info) << CODA9_JPEG_COMP0_INFO_OFFSET) | \
565          ((comp1_info) << CODA9_JPEG_COMP1_INFO_OFFSET) | \
566          ((comp2_info) << CODA9_JPEG_COMP2_INFO_OFFSET))
567
568 static const u32 mcu_info[] = {
569         [CODA9_JPEG_FORMAT_420] = MCU_INFO(6, 3, 10, 5, 5),
570         [CODA9_JPEG_FORMAT_422] = MCU_INFO(4, 3, 9, 5, 5),
571         [CODA9_JPEG_FORMAT_224] = MCU_INFO(4, 3, 6, 5, 5),
572         [CODA9_JPEG_FORMAT_444] = MCU_INFO(3, 3, 5, 5, 5),
573         [CODA9_JPEG_FORMAT_400] = MCU_INFO(1, 1, 5, 0, 0),
574 };
575
576 /*
577  * Convert Huffman table specifcations to tables of codes and code lengths.
578  * For reference, see JPEG ITU-T.81 (ISO/IEC 10918-1) [1]
579  *
580  * [1] https://www.w3.org/Graphics/JPEG/itu-t81.pdf
581  */
582 static int coda9_jpeg_gen_enc_huff_tab(struct coda_ctx *ctx, int tab_num,
583                                        int *ehufsi, int *ehufco)
584 {
585         int i, j, k, lastk, si, code, maxsymbol;
586         const u8 *bits, *huffval;
587         struct {
588                 int size[256];
589                 int code[256];
590         } *huff;
591         static const unsigned char *huff_tabs[4] = {
592                 luma_dc, luma_ac, chroma_dc, chroma_ac,
593         };
594         int ret = -EINVAL;
595
596         huff = kzalloc(sizeof(*huff), GFP_KERNEL);
597         if (!huff)
598                 return -ENOMEM;
599
600         bits = huff_tabs[tab_num];
601         huffval = huff_tabs[tab_num] + 16;
602
603         maxsymbol = tab_num & 1 ? 256 : 16;
604
605         /* Figure C.1 - Generation of table of Huffman code sizes */
606         k = 0;
607         for (i = 1; i <= 16; i++) {
608                 j = bits[i - 1];
609                 if (k + j > maxsymbol)
610                         goto out;
611                 while (j--)
612                         huff->size[k++] = i;
613         }
614         lastk = k;
615
616         /* Figure C.2 - Generation of table of Huffman codes */
617         k = 0;
618         code = 0;
619         si = huff->size[0];
620         while (k < lastk) {
621                 while (huff->size[k] == si) {
622                         huff->code[k++] = code;
623                         code++;
624                 }
625                 if (code >= (1 << si))
626                         goto out;
627                 code <<= 1;
628                 si++;
629         }
630
631         /* Figure C.3 - Ordering procedure for encoding procedure code tables */
632         for (k = 0; k < lastk; k++) {
633                 i = huffval[k];
634                 if (i >= maxsymbol || ehufsi[i])
635                         goto out;
636                 ehufco[i] = huff->code[k];
637                 ehufsi[i] = huff->size[k];
638         }
639
640         ret = 0;
641 out:
642         kfree(huff);
643         return ret;
644 }
645
646 #define DC_TABLE_INDEX0             0
647 #define AC_TABLE_INDEX0             1
648 #define DC_TABLE_INDEX1             2
649 #define AC_TABLE_INDEX1             3
650
651 static u8 *coda9_jpeg_get_huff_bits(struct coda_ctx *ctx, int tab_num)
652 {
653         struct coda_huff_tab *huff_tab = ctx->params.jpeg_huff_tab;
654
655         if (!huff_tab)
656                 return NULL;
657
658         switch (tab_num) {
659         case DC_TABLE_INDEX0: return huff_tab->luma_dc;
660         case AC_TABLE_INDEX0: return huff_tab->luma_ac;
661         case DC_TABLE_INDEX1: return huff_tab->chroma_dc;
662         case AC_TABLE_INDEX1: return huff_tab->chroma_ac;
663         }
664
665         return NULL;
666 }
667
668 static int coda9_jpeg_gen_dec_huff_tab(struct coda_ctx *ctx, int tab_num)
669 {
670         int ptr_cnt = 0, huff_code = 0, zero_flag = 0, data_flag = 0;
671         u8 *huff_bits;
672         s16 *huff_max;
673         s16 *huff_min;
674         s8 *huff_ptr;
675         int ofs;
676         int i;
677
678         huff_bits = coda9_jpeg_get_huff_bits(ctx, tab_num);
679         if (!huff_bits)
680                 return -EINVAL;
681
682         /* DC/AC Luma, DC/AC Chroma -> DC Luma/Chroma, AC Luma/Chroma */
683         ofs = ((tab_num & 1) << 1) | ((tab_num >> 1) & 1);
684         ofs *= 16;
685
686         huff_ptr = ctx->params.jpeg_huff_tab->ptr + ofs;
687         huff_max = ctx->params.jpeg_huff_tab->max + ofs;
688         huff_min = ctx->params.jpeg_huff_tab->min + ofs;
689
690         for (i = 0; i < 16; i++) {
691                 if (huff_bits[i]) {
692                         huff_ptr[i] = ptr_cnt;
693                         ptr_cnt += huff_bits[i];
694                         huff_min[i] = huff_code;
695                         huff_max[i] = huff_code + (huff_bits[i] - 1);
696                         data_flag = 1;
697                         zero_flag = 0;
698                 } else {
699                         huff_ptr[i] = -1;
700                         huff_min[i] = -1;
701                         huff_max[i] = -1;
702                         zero_flag = 1;
703                 }
704
705                 if (data_flag == 1) {
706                         if (zero_flag == 1)
707                                 huff_code <<= 1;
708                         else
709                                 huff_code = (huff_max[i] + 1) << 1;
710                 }
711         }
712
713         return 0;
714 }
715
716 static int coda9_jpeg_load_huff_tab(struct coda_ctx *ctx)
717 {
718         struct {
719                 int size[4][256];
720                 int code[4][256];
721         } *huff;
722         u32 *huff_data;
723         int i, j;
724         int ret;
725
726         huff = kzalloc(sizeof(*huff), GFP_KERNEL);
727         if (!huff)
728                 return -ENOMEM;
729
730         /* Generate all four (luma/chroma DC/AC) code/size lookup tables */
731         for (i = 0; i < 4; i++) {
732                 ret = coda9_jpeg_gen_enc_huff_tab(ctx, i, huff->size[i],
733                                                   huff->code[i]);
734                 if (ret)
735                         goto out;
736         }
737
738         if (!ctx->params.jpeg_huff_data) {
739                 ctx->params.jpeg_huff_data =
740                         kzalloc(sizeof(u32) * CODA9_JPEG_ENC_HUFF_DATA_SIZE,
741                                 GFP_KERNEL);
742                 if (!ctx->params.jpeg_huff_data) {
743                         ret = -ENOMEM;
744                         goto out;
745                 }
746         }
747         huff_data = ctx->params.jpeg_huff_data;
748
749         for (j = 0; j < 4; j++) {
750                 /* Store Huffman lookup tables in AC0, AC1, DC0, DC1 order */
751                 int t = (j == 0) ? AC_TABLE_INDEX0 :
752                         (j == 1) ? AC_TABLE_INDEX1 :
753                         (j == 2) ? DC_TABLE_INDEX0 :
754                                    DC_TABLE_INDEX1;
755                 /* DC tables only have 16 entries */
756                 int len = (j < 2) ? 256 : 16;
757
758                 for (i = 0; i < len; i++) {
759                         if (huff->size[t][i] == 0 && huff->code[t][i] == 0)
760                                 *(huff_data++) = 0;
761                         else
762                                 *(huff_data++) =
763                                         ((huff->size[t][i] - 1) << 16) |
764                                         huff->code[t][i];
765                 }
766         }
767
768         ret = 0;
769 out:
770         kfree(huff);
771         return ret;
772 }
773
774 static void coda9_jpeg_write_huff_tab(struct coda_ctx *ctx)
775 {
776         struct coda_dev *dev = ctx->dev;
777         u32 *huff_data = ctx->params.jpeg_huff_data;
778         int i;
779
780         /* Write Huffman size/code lookup tables in AC0, AC1, DC0, DC1 order */
781         coda_write(dev, 0x3, CODA9_REG_JPEG_HUFF_CTRL);
782         for (i = 0; i < CODA9_JPEG_ENC_HUFF_DATA_SIZE; i++)
783                 coda_write(dev, *(huff_data++), CODA9_REG_JPEG_HUFF_DATA);
784         coda_write(dev, 0x0, CODA9_REG_JPEG_HUFF_CTRL);
785 }
786
787 static inline void coda9_jpeg_write_qmat_quotients(struct coda_dev *dev,
788                                                    u8 *qmat, int index)
789 {
790         int i;
791
792         coda_write(dev, index | 0x3, CODA9_REG_JPEG_QMAT_CTRL);
793         for (i = 0; i < 64; i++)
794                 coda_write(dev, 0x80000 / qmat[i], CODA9_REG_JPEG_QMAT_DATA);
795         coda_write(dev, index, CODA9_REG_JPEG_QMAT_CTRL);
796 }
797
798 static void coda9_jpeg_load_qmat_tab(struct coda_ctx *ctx)
799 {
800         struct coda_dev *dev = ctx->dev;
801         u8 *luma_tab;
802         u8 *chroma_tab;
803
804         luma_tab = ctx->params.jpeg_qmat_tab[0];
805         if (!luma_tab)
806                 luma_tab = luma_q;
807
808         chroma_tab = ctx->params.jpeg_qmat_tab[1];
809         if (!chroma_tab)
810                 chroma_tab = chroma_q;
811
812         coda9_jpeg_write_qmat_quotients(dev, luma_tab, 0x00);
813         coda9_jpeg_write_qmat_quotients(dev, chroma_tab, 0x40);
814         coda9_jpeg_write_qmat_quotients(dev, chroma_tab, 0x80);
815 }
816
817 struct coda_jpeg_stream {
818         u8 *curr;
819         u8 *end;
820 };
821
822 static inline int coda_jpeg_put_byte(u8 byte, struct coda_jpeg_stream *stream)
823 {
824         if (stream->curr >= stream->end)
825                 return -EINVAL;
826
827         *stream->curr++ = byte;
828
829         return 0;
830 }
831
832 static inline int coda_jpeg_put_word(u16 word, struct coda_jpeg_stream *stream)
833 {
834         if (stream->curr + sizeof(__be16) > stream->end)
835                 return -EINVAL;
836
837         put_unaligned_be16(word, stream->curr);
838         stream->curr += sizeof(__be16);
839
840         return 0;
841 }
842
843 static int coda_jpeg_put_table(u16 marker, u8 index, const u8 *table,
844                                size_t len, struct coda_jpeg_stream *stream)
845 {
846         int i, ret;
847
848         ret = coda_jpeg_put_word(marker, stream);
849         if (ret < 0)
850                 return ret;
851         ret = coda_jpeg_put_word(3 + len, stream);
852         if (ret < 0)
853                 return ret;
854         ret = coda_jpeg_put_byte(index, stream);
855         for (i = 0; i < len && ret == 0; i++)
856                 ret = coda_jpeg_put_byte(table[i], stream);
857
858         return ret;
859 }
860
861 static int coda_jpeg_define_quantization_table(struct coda_ctx *ctx, u8 index,
862                                                struct coda_jpeg_stream *stream)
863 {
864         return coda_jpeg_put_table(DQT_MARKER, index,
865                                    ctx->params.jpeg_qmat_tab[index], 64,
866                                    stream);
867 }
868
869 static int coda_jpeg_define_huffman_table(u8 index, const u8 *table, size_t len,
870                                           struct coda_jpeg_stream *stream)
871 {
872         return coda_jpeg_put_table(DHT_MARKER, index, table, len, stream);
873 }
874
875 static int coda9_jpeg_encode_header(struct coda_ctx *ctx, int len, u8 *buf)
876 {
877         struct coda_jpeg_stream stream = { buf, buf + len };
878         struct coda_q_data *q_data_src;
879         int chroma_format, comp_num;
880         int i, ret, pad;
881
882         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
883         chroma_format = coda9_jpeg_chroma_format(q_data_src->fourcc);
884         if (chroma_format < 0)
885                 return 0;
886
887         /* Start Of Image */
888         ret = coda_jpeg_put_word(SOI_MARKER, &stream);
889         if (ret < 0)
890                 return ret;
891
892         /* Define Restart Interval */
893         if (ctx->params.jpeg_restart_interval) {
894                 ret = coda_jpeg_put_word(DRI_MARKER, &stream);
895                 if (ret < 0)
896                         return ret;
897                 ret = coda_jpeg_put_word(4, &stream);
898                 if (ret < 0)
899                         return ret;
900                 ret = coda_jpeg_put_word(ctx->params.jpeg_restart_interval,
901                                          &stream);
902                 if (ret < 0)
903                         return ret;
904         }
905
906         /* Define Quantization Tables */
907         ret = coda_jpeg_define_quantization_table(ctx, 0x00, &stream);
908         if (ret < 0)
909                 return ret;
910         if (chroma_format != CODA9_JPEG_FORMAT_400) {
911                 ret = coda_jpeg_define_quantization_table(ctx, 0x01, &stream);
912                 if (ret < 0)
913                         return ret;
914         }
915
916         /* Define Huffman Tables */
917         ret = coda_jpeg_define_huffman_table(0x00, luma_dc, 16 + 12, &stream);
918         if (ret < 0)
919                 return ret;
920         ret = coda_jpeg_define_huffman_table(0x10, luma_ac, 16 + 162, &stream);
921         if (ret < 0)
922                 return ret;
923         if (chroma_format != CODA9_JPEG_FORMAT_400) {
924                 ret = coda_jpeg_define_huffman_table(0x01, chroma_dc, 16 + 12,
925                                                      &stream);
926                 if (ret < 0)
927                         return ret;
928                 ret = coda_jpeg_define_huffman_table(0x11, chroma_ac, 16 + 162,
929                                                      &stream);
930                 if (ret < 0)
931                         return ret;
932         }
933
934         /* Start Of Frame */
935         ret = coda_jpeg_put_word(SOF_MARKER, &stream);
936         if (ret < 0)
937                 return ret;
938         comp_num = (chroma_format == CODA9_JPEG_FORMAT_400) ? 1 : 3;
939         ret = coda_jpeg_put_word(8 + comp_num * 3, &stream);
940         if (ret < 0)
941                 return ret;
942         ret = coda_jpeg_put_byte(0x08, &stream);
943         if (ret < 0)
944                 return ret;
945         ret = coda_jpeg_put_word(q_data_src->height, &stream);
946         if (ret < 0)
947                 return ret;
948         ret = coda_jpeg_put_word(q_data_src->width, &stream);
949         if (ret < 0)
950                 return ret;
951         ret = coda_jpeg_put_byte(comp_num, &stream);
952         if (ret < 0)
953                 return ret;
954         for (i = 0; i < comp_num; i++) {
955                 static unsigned char subsampling[5][3] = {
956                         [CODA9_JPEG_FORMAT_420] = { 0x22, 0x11, 0x11 },
957                         [CODA9_JPEG_FORMAT_422] = { 0x21, 0x11, 0x11 },
958                         [CODA9_JPEG_FORMAT_224] = { 0x12, 0x11, 0x11 },
959                         [CODA9_JPEG_FORMAT_444] = { 0x11, 0x11, 0x11 },
960                         [CODA9_JPEG_FORMAT_400] = { 0x11 },
961                 };
962
963                 /* Component identifier, matches SOS */
964                 ret = coda_jpeg_put_byte(i + 1, &stream);
965                 if (ret < 0)
966                         return ret;
967                 ret = coda_jpeg_put_byte(subsampling[chroma_format][i],
968                                          &stream);
969                 if (ret < 0)
970                         return ret;
971                 /* Chroma table index */
972                 ret = coda_jpeg_put_byte((i == 0) ? 0 : 1, &stream);
973                 if (ret < 0)
974                         return ret;
975         }
976
977         /* Pad to multiple of 8 bytes */
978         pad = (stream.curr - buf) % 8;
979         if (pad) {
980                 pad = 8 - pad;
981                 while (pad--) {
982                         ret = coda_jpeg_put_byte(0x00, &stream);
983                         if (ret < 0)
984                                 return ret;
985                 }
986         }
987
988         return stream.curr - buf;
989 }
990
991 /*
992  * Scale quantization table using nonlinear scaling factor
993  * u8 qtab[64], scale [50,190]
994  */
995 static void coda_scale_quant_table(u8 *q_tab, int scale)
996 {
997         unsigned int temp;
998         int i;
999
1000         for (i = 0; i < 64; i++) {
1001                 temp = DIV_ROUND_CLOSEST((unsigned int)q_tab[i] * scale, 100);
1002                 if (temp <= 0)
1003                         temp = 1;
1004                 if (temp > 255)
1005                         temp = 255;
1006                 q_tab[i] = (unsigned char)temp;
1007         }
1008 }
1009
1010 void coda_set_jpeg_compression_quality(struct coda_ctx *ctx, int quality)
1011 {
1012         unsigned int scale;
1013
1014         ctx->params.jpeg_quality = quality;
1015
1016         /* Clip quality setting to [5,100] interval */
1017         if (quality > 100)
1018                 quality = 100;
1019         if (quality < 5)
1020                 quality = 5;
1021
1022         /*
1023          * Non-linear scaling factor:
1024          * [5,50] -> [1000..100], [51,100] -> [98..0]
1025          */
1026         if (quality < 50)
1027                 scale = 5000 / quality;
1028         else
1029                 scale = 200 - 2 * quality;
1030
1031         if (ctx->params.jpeg_qmat_tab[0]) {
1032                 memcpy(ctx->params.jpeg_qmat_tab[0], luma_q, 64);
1033                 coda_scale_quant_table(ctx->params.jpeg_qmat_tab[0], scale);
1034         }
1035         if (ctx->params.jpeg_qmat_tab[1]) {
1036                 memcpy(ctx->params.jpeg_qmat_tab[1], chroma_q, 64);
1037                 coda_scale_quant_table(ctx->params.jpeg_qmat_tab[1], scale);
1038         }
1039 }
1040
1041 /*
1042  * Encoder context operations
1043  */
1044
1045 static int coda9_jpeg_start_encoding(struct coda_ctx *ctx)
1046 {
1047         struct coda_dev *dev = ctx->dev;
1048         int ret;
1049
1050         ret = coda9_jpeg_load_huff_tab(ctx);
1051         if (ret < 0) {
1052                 v4l2_err(&dev->v4l2_dev, "error loading Huffman tables\n");
1053                 return ret;
1054         }
1055         if (!ctx->params.jpeg_qmat_tab[0]) {
1056                 ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL);
1057                 if (!ctx->params.jpeg_qmat_tab[0])
1058                         return -ENOMEM;
1059         }
1060         if (!ctx->params.jpeg_qmat_tab[1]) {
1061                 ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL);
1062                 if (!ctx->params.jpeg_qmat_tab[1])
1063                         return -ENOMEM;
1064         }
1065         coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality);
1066
1067         return 0;
1068 }
1069
1070 static int coda9_jpeg_prepare_encode(struct coda_ctx *ctx)
1071 {
1072         struct coda_q_data *q_data_src;
1073         struct vb2_v4l2_buffer *src_buf, *dst_buf;
1074         struct coda_dev *dev = ctx->dev;
1075         u32 start_addr, end_addr;
1076         u16 aligned_width, aligned_height;
1077         bool chroma_interleave;
1078         int chroma_format;
1079         int header_len;
1080         int ret;
1081         ktime_t timeout;
1082
1083         src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
1084         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1085         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1086
1087         if (vb2_get_plane_payload(&src_buf->vb2_buf, 0) == 0)
1088                 vb2_set_plane_payload(&src_buf->vb2_buf, 0,
1089                                       vb2_plane_size(&src_buf->vb2_buf, 0));
1090
1091         src_buf->sequence = ctx->osequence;
1092         dst_buf->sequence = ctx->osequence;
1093         ctx->osequence++;
1094
1095         src_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1096         src_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1097
1098         coda_set_gdi_regs(ctx);
1099
1100         start_addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
1101         end_addr = start_addr + vb2_plane_size(&dst_buf->vb2_buf, 0);
1102
1103         chroma_format = coda9_jpeg_chroma_format(q_data_src->fourcc);
1104         if (chroma_format < 0)
1105                 return chroma_format;
1106
1107         /* Round image dimensions to multiple of MCU size */
1108         aligned_width = round_up(q_data_src->width, width_align[chroma_format]);
1109         aligned_height = round_up(q_data_src->height,
1110                                   height_align[chroma_format]);
1111         if (aligned_width != q_data_src->bytesperline) {
1112                 v4l2_err(&dev->v4l2_dev, "wrong stride: %d instead of %d\n",
1113                          aligned_width, q_data_src->bytesperline);
1114         }
1115
1116         header_len =
1117                 coda9_jpeg_encode_header(ctx,
1118                                          vb2_plane_size(&dst_buf->vb2_buf, 0),
1119                                          vb2_plane_vaddr(&dst_buf->vb2_buf, 0));
1120         if (header_len < 0)
1121                 return header_len;
1122
1123         coda_write(dev, start_addr + header_len, CODA9_REG_JPEG_BBC_BAS_ADDR);
1124         coda_write(dev, end_addr, CODA9_REG_JPEG_BBC_END_ADDR);
1125         coda_write(dev, start_addr + header_len, CODA9_REG_JPEG_BBC_WR_PTR);
1126         coda_write(dev, start_addr + header_len, CODA9_REG_JPEG_BBC_RD_PTR);
1127         coda_write(dev, 0, CODA9_REG_JPEG_BBC_CUR_POS);
1128         /* 64 words per 256-byte page */
1129         coda_write(dev, 64, CODA9_REG_JPEG_BBC_DATA_CNT);
1130         coda_write(dev, start_addr, CODA9_REG_JPEG_BBC_EXT_ADDR);
1131         coda_write(dev, 0, CODA9_REG_JPEG_BBC_INT_ADDR);
1132
1133         coda_write(dev, 0, CODA9_REG_JPEG_GBU_BT_PTR);
1134         coda_write(dev, 0, CODA9_REG_JPEG_GBU_WD_PTR);
1135         coda_write(dev, 0, CODA9_REG_JPEG_GBU_BBSR);
1136         coda_write(dev, BIT(31) | ((end_addr - start_addr - header_len) / 256),
1137                    CODA9_REG_JPEG_BBC_STRM_CTRL);
1138         coda_write(dev, 0, CODA9_REG_JPEG_GBU_CTRL);
1139         coda_write(dev, 0, CODA9_REG_JPEG_GBU_FF_RPTR);
1140         coda_write(dev, 127, CODA9_REG_JPEG_GBU_BBER);
1141         coda_write(dev, 64, CODA9_REG_JPEG_GBU_BBIR);
1142         coda_write(dev, 64, CODA9_REG_JPEG_GBU_BBHR);
1143
1144         chroma_interleave = (q_data_src->fourcc == V4L2_PIX_FMT_NV12);
1145         coda_write(dev, CODA9_JPEG_PIC_CTRL_TC_DIRECTION |
1146                    CODA9_JPEG_PIC_CTRL_ENCODER_EN, CODA9_REG_JPEG_PIC_CTRL);
1147         coda_write(dev, 0, CODA9_REG_JPEG_SCL_INFO);
1148         coda_write(dev, chroma_interleave, CODA9_REG_JPEG_DPB_CONFIG);
1149         coda_write(dev, ctx->params.jpeg_restart_interval,
1150                    CODA9_REG_JPEG_RST_INTVAL);
1151         coda_write(dev, 1, CODA9_REG_JPEG_BBC_CTRL);
1152
1153         coda_write(dev, bus_req_num[chroma_format], CODA9_REG_JPEG_OP_INFO);
1154
1155         coda9_jpeg_write_huff_tab(ctx);
1156         coda9_jpeg_load_qmat_tab(ctx);
1157
1158         if (ctx->params.rot_mode & CODA_ROT_90) {
1159                 aligned_width = aligned_height;
1160                 aligned_height = q_data_src->bytesperline;
1161                 if (chroma_format == CODA9_JPEG_FORMAT_422)
1162                         chroma_format = CODA9_JPEG_FORMAT_224;
1163                 else if (chroma_format == CODA9_JPEG_FORMAT_224)
1164                         chroma_format = CODA9_JPEG_FORMAT_422;
1165         }
1166         /* These need to be multiples of MCU size */
1167         coda_write(dev, aligned_width << 16 | aligned_height,
1168                    CODA9_REG_JPEG_PIC_SIZE);
1169         coda_write(dev, ctx->params.rot_mode ?
1170                    (CODA_ROT_MIR_ENABLE | ctx->params.rot_mode) : 0,
1171                    CODA9_REG_JPEG_ROT_INFO);
1172
1173         coda_write(dev, mcu_info[chroma_format], CODA9_REG_JPEG_MCU_INFO);
1174
1175         coda_write(dev, 1, CODA9_GDI_CONTROL);
1176         timeout = ktime_add_us(ktime_get(), 100000);
1177         do {
1178                 ret = coda_read(dev, CODA9_GDI_STATUS);
1179                 if (ktime_compare(ktime_get(), timeout) > 0) {
1180                         v4l2_err(&dev->v4l2_dev, "timeout waiting for GDI\n");
1181                         return -ETIMEDOUT;
1182                 }
1183         } while (!ret);
1184
1185         coda_write(dev, (chroma_format << 17) | (chroma_interleave << 16) |
1186                    q_data_src->bytesperline, CODA9_GDI_INFO_CONTROL);
1187         /* The content of this register seems to be irrelevant: */
1188         coda_write(dev, aligned_width << 16 | aligned_height,
1189                    CODA9_GDI_INFO_PIC_SIZE);
1190
1191         coda_write_base(ctx, q_data_src, src_buf, CODA9_GDI_INFO_BASE_Y);
1192
1193         coda_write(dev, 0, CODA9_REG_JPEG_DPB_BASE00);
1194         coda_write(dev, 0, CODA9_GDI_CONTROL);
1195         coda_write(dev, 1, CODA9_GDI_PIC_INIT_HOST);
1196
1197         coda_write(dev, 1, CODA9_GDI_WPROT_ERR_CLR);
1198         coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
1199
1200         trace_coda_jpeg_run(ctx, src_buf);
1201
1202         coda_write(dev, 1, CODA9_REG_JPEG_PIC_START);
1203
1204         return 0;
1205 }
1206
1207 static void coda9_jpeg_finish_encode(struct coda_ctx *ctx)
1208 {
1209         struct vb2_v4l2_buffer *src_buf, *dst_buf;
1210         struct coda_dev *dev = ctx->dev;
1211         u32 wr_ptr, start_ptr;
1212         u32 err_mb;
1213
1214         if (ctx->aborting) {
1215                 coda_write(ctx->dev, 0, CODA9_REG_JPEG_BBC_FLUSH_CMD);
1216                 return;
1217         }
1218
1219         /*
1220          * Lock to make sure that an encoder stop command running in parallel
1221          * will either already have marked src_buf as last, or it will wake up
1222          * the capture queue after the buffers are returned.
1223          */
1224         mutex_lock(&ctx->wakeup_mutex);
1225         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1226         dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1227
1228         trace_coda_jpeg_done(ctx, dst_buf);
1229
1230         /*
1231          * Set plane payload to the number of bytes written out
1232          * by the JPEG processing unit
1233          */
1234         start_ptr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
1235         wr_ptr = coda_read(dev, CODA9_REG_JPEG_BBC_WR_PTR);
1236         vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr);
1237
1238         err_mb = coda_read(dev, CODA9_REG_JPEG_PIC_ERRMB);
1239         if (err_mb)
1240                 coda_dbg(1, ctx, "ERRMB: 0x%x\n", err_mb);
1241
1242         coda_write(dev, 0, CODA9_REG_JPEG_BBC_FLUSH_CMD);
1243
1244         dst_buf->flags &= ~(V4L2_BUF_FLAG_PFRAME | V4L2_BUF_FLAG_LAST);
1245         dst_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1246         dst_buf->flags |= src_buf->flags & V4L2_BUF_FLAG_LAST;
1247
1248         v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, false);
1249
1250         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
1251         coda_m2m_buf_done(ctx, dst_buf, err_mb ? VB2_BUF_STATE_ERROR :
1252                                                  VB2_BUF_STATE_DONE);
1253         mutex_unlock(&ctx->wakeup_mutex);
1254
1255         coda_dbg(1, ctx, "job finished: encoded frame (%u)%s\n",
1256                  dst_buf->sequence,
1257                  (dst_buf->flags & V4L2_BUF_FLAG_LAST) ? " (last)" : "");
1258
1259         /*
1260          * Reset JPEG processing unit after each encode run to work
1261          * around hangups when switching context between encoder and
1262          * decoder.
1263          */
1264         coda_hw_reset(ctx);
1265 }
1266
1267 static void coda9_jpeg_encode_timeout(struct coda_ctx *ctx)
1268 {
1269         struct coda_dev *dev = ctx->dev;
1270         u32 end_addr, wr_ptr;
1271
1272         /* Handle missing BBC overflow interrupt via timeout */
1273         end_addr = coda_read(dev, CODA9_REG_JPEG_BBC_END_ADDR);
1274         wr_ptr = coda_read(dev, CODA9_REG_JPEG_BBC_WR_PTR);
1275         if (wr_ptr >= end_addr - 256) {
1276                 v4l2_err(&dev->v4l2_dev, "JPEG too large for capture buffer\n");
1277                 coda9_jpeg_finish_encode(ctx);
1278                 return;
1279         }
1280
1281         coda_hw_reset(ctx);
1282 }
1283
1284 static void coda9_jpeg_release(struct coda_ctx *ctx)
1285 {
1286         int i;
1287
1288         if (ctx->params.jpeg_qmat_tab[0] == luma_q)
1289                 ctx->params.jpeg_qmat_tab[0] = NULL;
1290         if (ctx->params.jpeg_qmat_tab[1] == chroma_q)
1291                 ctx->params.jpeg_qmat_tab[1] = NULL;
1292         for (i = 0; i < 3; i++)
1293                 kfree(ctx->params.jpeg_qmat_tab[i]);
1294         kfree(ctx->params.jpeg_huff_data);
1295         kfree(ctx->params.jpeg_huff_tab);
1296 }
1297
1298 const struct coda_context_ops coda9_jpeg_encode_ops = {
1299         .queue_init = coda_encoder_queue_init,
1300         .start_streaming = coda9_jpeg_start_encoding,
1301         .prepare_run = coda9_jpeg_prepare_encode,
1302         .finish_run = coda9_jpeg_finish_encode,
1303         .run_timeout = coda9_jpeg_encode_timeout,
1304         .release = coda9_jpeg_release,
1305 };
1306
1307 /*
1308  * Decoder context operations
1309  */
1310
1311 static int coda9_jpeg_start_decoding(struct coda_ctx *ctx)
1312 {
1313         ctx->params.jpeg_qmat_index[0] = 0;
1314         ctx->params.jpeg_qmat_index[1] = 1;
1315         ctx->params.jpeg_qmat_index[2] = 1;
1316         ctx->params.jpeg_qmat_tab[0] = luma_q;
1317         ctx->params.jpeg_qmat_tab[1] = chroma_q;
1318         /* nothing more to do here */
1319
1320         /* TODO: we could already scan the first header to get the chroma
1321          * format.
1322          */
1323
1324         return 0;
1325 }
1326
1327 static int coda9_jpeg_prepare_decode(struct coda_ctx *ctx)
1328 {
1329         struct coda_dev *dev = ctx->dev;
1330         int aligned_width, aligned_height;
1331         int chroma_format;
1332         int ret;
1333         u32 val, dst_fourcc;
1334         struct coda_q_data *q_data_src, *q_data_dst;
1335         struct vb2_v4l2_buffer *src_buf, *dst_buf;
1336         int chroma_interleave;
1337
1338         src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
1339         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1340         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1341         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1342         dst_fourcc = q_data_dst->fourcc;
1343
1344         if (vb2_get_plane_payload(&src_buf->vb2_buf, 0) == 0)
1345                 vb2_set_plane_payload(&src_buf->vb2_buf, 0,
1346                                       vb2_plane_size(&src_buf->vb2_buf, 0));
1347
1348         chroma_format = coda9_jpeg_chroma_format(q_data_dst->fourcc);
1349         if (chroma_format < 0) {
1350                 v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1351                 return chroma_format;
1352         }
1353
1354         ret = coda_jpeg_decode_header(ctx, &src_buf->vb2_buf);
1355         if (ret < 0) {
1356                 v4l2_err(&dev->v4l2_dev, "failed to decode JPEG header: %d\n",
1357                          ret);
1358
1359                 src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1360                 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1361                 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
1362                 v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_DONE);
1363
1364                 v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1365                 return ret;
1366         }
1367
1368         /* Round image dimensions to multiple of MCU size */
1369         aligned_width = round_up(q_data_src->width, width_align[chroma_format]);
1370         aligned_height = round_up(q_data_src->height, height_align[chroma_format]);
1371         if (aligned_width != q_data_dst->bytesperline) {
1372                 v4l2_err(&dev->v4l2_dev, "stride mismatch: %d != %d\n",
1373                          aligned_width, q_data_dst->bytesperline);
1374         }
1375
1376         coda_set_gdi_regs(ctx);
1377
1378         val = ctx->params.jpeg_huff_ac_index[0] << 12 |
1379               ctx->params.jpeg_huff_ac_index[1] << 11 |
1380               ctx->params.jpeg_huff_ac_index[2] << 10 |
1381               ctx->params.jpeg_huff_dc_index[0] << 9 |
1382               ctx->params.jpeg_huff_dc_index[1] << 8 |
1383               ctx->params.jpeg_huff_dc_index[2] << 7;
1384         if (ctx->params.jpeg_huff_tab)
1385                 val |= CODA9_JPEG_PIC_CTRL_USER_HUFFMAN_EN;
1386         coda_write(dev, val, CODA9_REG_JPEG_PIC_CTRL);
1387
1388         coda_write(dev, aligned_width << 16 | aligned_height,
1389                         CODA9_REG_JPEG_PIC_SIZE);
1390
1391         chroma_interleave = (dst_fourcc == V4L2_PIX_FMT_NV12);
1392         coda_write(dev, 0, CODA9_REG_JPEG_ROT_INFO);
1393         coda_write(dev, bus_req_num[chroma_format], CODA9_REG_JPEG_OP_INFO);
1394         coda_write(dev, mcu_info[chroma_format], CODA9_REG_JPEG_MCU_INFO);
1395         coda_write(dev, 0, CODA9_REG_JPEG_SCL_INFO);
1396         coda_write(dev, chroma_interleave, CODA9_REG_JPEG_DPB_CONFIG);
1397         coda_write(dev, ctx->params.jpeg_restart_interval,
1398                         CODA9_REG_JPEG_RST_INTVAL);
1399
1400         if (ctx->params.jpeg_huff_tab) {
1401                 ret = coda9_jpeg_dec_huff_setup(ctx);
1402                 if (ret < 0) {
1403                         v4l2_err(&dev->v4l2_dev,
1404                                  "failed to set up Huffman tables: %d\n", ret);
1405                         v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1406                         return ret;
1407                 }
1408         }
1409
1410         coda9_jpeg_qmat_setup(ctx);
1411
1412         coda9_jpeg_dec_bbc_gbu_setup(ctx, &src_buf->vb2_buf,
1413                                      ctx->jpeg_ecs_offset);
1414
1415         coda_write(dev, 0, CODA9_REG_JPEG_RST_INDEX);
1416         coda_write(dev, 0, CODA9_REG_JPEG_RST_COUNT);
1417
1418         coda_write(dev, 0, CODA9_REG_JPEG_DPCM_DIFF_Y);
1419         coda_write(dev, 0, CODA9_REG_JPEG_DPCM_DIFF_CB);
1420         coda_write(dev, 0, CODA9_REG_JPEG_DPCM_DIFF_CR);
1421
1422         coda_write(dev, 0, CODA9_REG_JPEG_ROT_INFO);
1423
1424         coda_write(dev, 1, CODA9_GDI_CONTROL);
1425         do {
1426                 ret = coda_read(dev, CODA9_GDI_STATUS);
1427         } while (!ret);
1428
1429         val = (chroma_format << 17) | (chroma_interleave << 16) |
1430               q_data_dst->bytesperline;
1431         if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
1432                 val |= 3 << 20;
1433         coda_write(dev, val, CODA9_GDI_INFO_CONTROL);
1434
1435         coda_write(dev, aligned_width << 16 | aligned_height,
1436                         CODA9_GDI_INFO_PIC_SIZE);
1437
1438         coda_write_base(ctx, q_data_dst, dst_buf, CODA9_GDI_INFO_BASE_Y);
1439
1440         coda_write(dev, 0, CODA9_REG_JPEG_DPB_BASE00);
1441         coda_write(dev, 0, CODA9_GDI_CONTROL);
1442         coda_write(dev, 1, CODA9_GDI_PIC_INIT_HOST);
1443
1444         trace_coda_jpeg_run(ctx, src_buf);
1445
1446         coda_write(dev, 1, CODA9_REG_JPEG_PIC_START);
1447
1448         return 0;
1449 }
1450
1451 static void coda9_jpeg_finish_decode(struct coda_ctx *ctx)
1452 {
1453         struct coda_dev *dev = ctx->dev;
1454         struct vb2_v4l2_buffer *dst_buf, *src_buf;
1455         struct coda_q_data *q_data_dst;
1456         u32 err_mb;
1457
1458         err_mb = coda_read(dev, CODA9_REG_JPEG_PIC_ERRMB);
1459         if (err_mb)
1460                 v4l2_err(&dev->v4l2_dev, "ERRMB: 0x%x\n", err_mb);
1461
1462         coda_write(dev, 0, CODA9_REG_JPEG_BBC_FLUSH_CMD);
1463
1464         /*
1465          * Lock to make sure that a decoder stop command running in parallel
1466          * will either already have marked src_buf as last, or it will wake up
1467          * the capture queue after the buffers are returned.
1468          */
1469         mutex_lock(&ctx->wakeup_mutex);
1470         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1471         dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1472         dst_buf->sequence = ctx->osequence++;
1473
1474         trace_coda_jpeg_done(ctx, dst_buf);
1475
1476         dst_buf->flags &= ~(V4L2_BUF_FLAG_PFRAME | V4L2_BUF_FLAG_LAST);
1477         dst_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1478         dst_buf->flags |= src_buf->flags & V4L2_BUF_FLAG_LAST;
1479
1480         v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, false);
1481
1482         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1483         vb2_set_plane_payload(&dst_buf->vb2_buf, 0, q_data_dst->sizeimage);
1484
1485         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
1486         coda_m2m_buf_done(ctx, dst_buf, err_mb ? VB2_BUF_STATE_ERROR :
1487                                                  VB2_BUF_STATE_DONE);
1488
1489         mutex_unlock(&ctx->wakeup_mutex);
1490
1491         coda_dbg(1, ctx, "job finished: decoded frame (%u)%s\n",
1492                  dst_buf->sequence,
1493                  (dst_buf->flags & V4L2_BUF_FLAG_LAST) ? " (last)" : "");
1494
1495         /*
1496          * Reset JPEG processing unit after each decode run to work
1497          * around hangups when switching context between encoder and
1498          * decoder.
1499          */
1500         coda_hw_reset(ctx);
1501 }
1502
1503 const struct coda_context_ops coda9_jpeg_decode_ops = {
1504         .queue_init = coda_encoder_queue_init, /* non-bitstream operation */
1505         .start_streaming = coda9_jpeg_start_decoding,
1506         .prepare_run = coda9_jpeg_prepare_decode,
1507         .finish_run = coda9_jpeg_finish_decode,
1508         .release = coda9_jpeg_release,
1509 };
1510
1511 irqreturn_t coda9_jpeg_irq_handler(int irq, void *data)
1512 {
1513         struct coda_dev *dev = data;
1514         struct coda_ctx *ctx;
1515         int status;
1516         int err_mb;
1517
1518         status = coda_read(dev, CODA9_REG_JPEG_PIC_STATUS);
1519         if (status == 0)
1520                 return IRQ_HANDLED;
1521         coda_write(dev, status, CODA9_REG_JPEG_PIC_STATUS);
1522
1523         if (status & CODA9_JPEG_STATUS_OVERFLOW)
1524                 v4l2_err(&dev->v4l2_dev, "JPEG overflow\n");
1525
1526         if (status & CODA9_JPEG_STATUS_BBC_INT)
1527                 v4l2_err(&dev->v4l2_dev, "JPEG BBC interrupt\n");
1528
1529         if (status & CODA9_JPEG_STATUS_ERROR) {
1530                 v4l2_err(&dev->v4l2_dev, "JPEG error\n");
1531
1532                 err_mb = coda_read(dev, CODA9_REG_JPEG_PIC_ERRMB);
1533                 if (err_mb) {
1534                         v4l2_err(&dev->v4l2_dev,
1535                                  "ERRMB: 0x%x: rst idx %d, mcu pos (%d,%d)\n",
1536                                  err_mb, err_mb >> 24, (err_mb >> 12) & 0xfff,
1537                                  err_mb & 0xfff);
1538                 }
1539         }
1540
1541         ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
1542         if (!ctx) {
1543                 v4l2_err(&dev->v4l2_dev,
1544                          "Instance released before the end of transaction\n");
1545                 mutex_unlock(&dev->coda_mutex);
1546                 return IRQ_HANDLED;
1547         }
1548
1549         complete(&ctx->completion);
1550
1551         return IRQ_HANDLED;
1552 }