GNU Linux-libre 4.19.207-gnu1
[releases.git] / drivers / media / platform / coda / coda-bit.c
1 /*
2  * Coda multi-standard codec IP - BIT processor functions
3  *
4  * Copyright (C) 2012 Vista Silicon S.L.
5  *    Javier Martin, <javier.martin@vista-silicon.com>
6  *    Xavier Duret
7  * Copyright (C) 2012-2014 Philipp Zabel, Pengutronix
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  */
14
15 #include <linux/clk.h>
16 #include <linux/irqreturn.h>
17 #include <linux/kernel.h>
18 #include <linux/log2.h>
19 #include <linux/platform_device.h>
20 #include <linux/reset.h>
21 #include <linux/slab.h>
22 #include <linux/videodev2.h>
23
24 #include <media/v4l2-common.h>
25 #include <media/v4l2-ctrls.h>
26 #include <media/v4l2-fh.h>
27 #include <media/v4l2-mem2mem.h>
28 #include <media/videobuf2-v4l2.h>
29 #include <media/videobuf2-dma-contig.h>
30 #include <media/videobuf2-vmalloc.h>
31
32 #include "coda.h"
33 #include "imx-vdoa.h"
34 #define CREATE_TRACE_POINTS
35 #include "trace.h"
36
37 #define CODA_PARA_BUF_SIZE      (10 * 1024)
38 #define CODA7_PS_BUF_SIZE       0x28000
39 #define CODA9_PS_SAVE_SIZE      (512 * 1024)
40
41 #define CODA_DEFAULT_GAMMA      4096
42 #define CODA9_DEFAULT_GAMMA     24576   /* 0.75 * 32768 */
43
44 static void coda_free_bitstream_buffer(struct coda_ctx *ctx);
45
46 static inline int coda_is_initialized(struct coda_dev *dev)
47 {
48         return coda_read(dev, CODA_REG_BIT_CUR_PC) != 0;
49 }
50
51 static inline unsigned long coda_isbusy(struct coda_dev *dev)
52 {
53         return coda_read(dev, CODA_REG_BIT_BUSY);
54 }
55
56 static int coda_wait_timeout(struct coda_dev *dev)
57 {
58         unsigned long timeout = jiffies + msecs_to_jiffies(1000);
59
60         while (coda_isbusy(dev)) {
61                 if (time_after(jiffies, timeout))
62                         return -ETIMEDOUT;
63         }
64         return 0;
65 }
66
67 static void coda_command_async(struct coda_ctx *ctx, int cmd)
68 {
69         struct coda_dev *dev = ctx->dev;
70
71         if (dev->devtype->product == CODA_HX4 ||
72             dev->devtype->product == CODA_7541 ||
73             dev->devtype->product == CODA_960) {
74                 /* Restore context related registers to CODA */
75                 coda_write(dev, ctx->bit_stream_param,
76                                 CODA_REG_BIT_BIT_STREAM_PARAM);
77                 coda_write(dev, ctx->frm_dis_flg,
78                                 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
79                 coda_write(dev, ctx->frame_mem_ctrl,
80                                 CODA_REG_BIT_FRAME_MEM_CTRL);
81                 coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR);
82         }
83
84         if (dev->devtype->product == CODA_960) {
85                 coda_write(dev, 1, CODA9_GDI_WPROT_ERR_CLR);
86                 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
87         }
88
89         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
90
91         coda_write(dev, ctx->idx, CODA_REG_BIT_RUN_INDEX);
92         coda_write(dev, ctx->params.codec_mode, CODA_REG_BIT_RUN_COD_STD);
93         coda_write(dev, ctx->params.codec_mode_aux, CODA7_REG_BIT_RUN_AUX_STD);
94
95         trace_coda_bit_run(ctx, cmd);
96
97         coda_write(dev, cmd, CODA_REG_BIT_RUN_COMMAND);
98 }
99
100 static int coda_command_sync(struct coda_ctx *ctx, int cmd)
101 {
102         struct coda_dev *dev = ctx->dev;
103         int ret;
104
105         coda_command_async(ctx, cmd);
106         ret = coda_wait_timeout(dev);
107         trace_coda_bit_done(ctx);
108
109         return ret;
110 }
111
112 int coda_hw_reset(struct coda_ctx *ctx)
113 {
114         struct coda_dev *dev = ctx->dev;
115         unsigned long timeout;
116         unsigned int idx;
117         int ret;
118
119         if (!dev->rstc)
120                 return -ENOENT;
121
122         idx = coda_read(dev, CODA_REG_BIT_RUN_INDEX);
123
124         if (dev->devtype->product == CODA_960) {
125                 timeout = jiffies + msecs_to_jiffies(100);
126                 coda_write(dev, 0x11, CODA9_GDI_BUS_CTRL);
127                 while (coda_read(dev, CODA9_GDI_BUS_STATUS) != 0x77) {
128                         if (time_after(jiffies, timeout))
129                                 return -ETIME;
130                         cpu_relax();
131                 }
132         }
133
134         ret = reset_control_reset(dev->rstc);
135         if (ret < 0)
136                 return ret;
137
138         if (dev->devtype->product == CODA_960)
139                 coda_write(dev, 0x00, CODA9_GDI_BUS_CTRL);
140         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
141         coda_write(dev, CODA_REG_RUN_ENABLE, CODA_REG_BIT_CODE_RUN);
142         ret = coda_wait_timeout(dev);
143         coda_write(dev, idx, CODA_REG_BIT_RUN_INDEX);
144
145         return ret;
146 }
147
148 static void coda_kfifo_sync_from_device(struct coda_ctx *ctx)
149 {
150         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
151         struct coda_dev *dev = ctx->dev;
152         u32 rd_ptr;
153
154         rd_ptr = coda_read(dev, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
155         kfifo->out = (kfifo->in & ~kfifo->mask) |
156                       (rd_ptr - ctx->bitstream.paddr);
157         if (kfifo->out > kfifo->in)
158                 kfifo->out -= kfifo->mask + 1;
159 }
160
161 static void coda_kfifo_sync_to_device_full(struct coda_ctx *ctx)
162 {
163         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
164         struct coda_dev *dev = ctx->dev;
165         u32 rd_ptr, wr_ptr;
166
167         rd_ptr = ctx->bitstream.paddr + (kfifo->out & kfifo->mask);
168         coda_write(dev, rd_ptr, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
169         wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
170         coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
171 }
172
173 static void coda_kfifo_sync_to_device_write(struct coda_ctx *ctx)
174 {
175         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
176         struct coda_dev *dev = ctx->dev;
177         u32 wr_ptr;
178
179         wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
180         coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
181 }
182
183 static int coda_bitstream_pad(struct coda_ctx *ctx, u32 size)
184 {
185         unsigned char *buf;
186         u32 n;
187
188         if (size < 6)
189                 size = 6;
190
191         buf = kmalloc(size, GFP_KERNEL);
192         if (!buf)
193                 return -ENOMEM;
194
195         coda_h264_filler_nal(size, buf);
196         n = kfifo_in(&ctx->bitstream_fifo, buf, size);
197         kfree(buf);
198
199         return (n < size) ? -ENOSPC : 0;
200 }
201
202 static int coda_bitstream_queue(struct coda_ctx *ctx,
203                                 struct vb2_v4l2_buffer *src_buf)
204 {
205         u32 src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
206         u32 n;
207
208         n = kfifo_in(&ctx->bitstream_fifo,
209                         vb2_plane_vaddr(&src_buf->vb2_buf, 0), src_size);
210         if (n < src_size)
211                 return -ENOSPC;
212
213         src_buf->sequence = ctx->qsequence++;
214
215         return 0;
216 }
217
218 static bool coda_bitstream_try_queue(struct coda_ctx *ctx,
219                                      struct vb2_v4l2_buffer *src_buf)
220 {
221         unsigned long payload = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
222         int ret;
223
224         if (coda_get_bitstream_payload(ctx) + payload + 512 >=
225             ctx->bitstream.size)
226                 return false;
227
228         if (vb2_plane_vaddr(&src_buf->vb2_buf, 0) == NULL) {
229                 v4l2_err(&ctx->dev->v4l2_dev, "trying to queue empty buffer\n");
230                 return true;
231         }
232
233         /* Add zero padding before the first H.264 buffer, if it is too small */
234         if (ctx->qsequence == 0 && payload < 512 &&
235             ctx->codec->src_fourcc == V4L2_PIX_FMT_H264)
236                 coda_bitstream_pad(ctx, 512 - payload);
237
238         ret = coda_bitstream_queue(ctx, src_buf);
239         if (ret < 0) {
240                 v4l2_err(&ctx->dev->v4l2_dev, "bitstream buffer overflow\n");
241                 return false;
242         }
243         /* Sync read pointer to device */
244         if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev))
245                 coda_kfifo_sync_to_device_write(ctx);
246
247         ctx->hold = false;
248
249         return true;
250 }
251
252 void coda_fill_bitstream(struct coda_ctx *ctx, struct list_head *buffer_list)
253 {
254         struct vb2_v4l2_buffer *src_buf;
255         struct coda_buffer_meta *meta;
256         unsigned long flags;
257         u32 start;
258
259         if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG)
260                 return;
261
262         while (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) > 0) {
263                 /*
264                  * Only queue two JPEGs into the bitstream buffer to keep
265                  * latency low. We need at least one complete buffer and the
266                  * header of another buffer (for prescan) in the bitstream.
267                  */
268                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
269                     ctx->num_metas > 1)
270                         break;
271
272                 src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
273
274                 /* Drop frames that do not start/end with a SOI/EOI markers */
275                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
276                     !coda_jpeg_check_buffer(ctx, &src_buf->vb2_buf)) {
277                         v4l2_err(&ctx->dev->v4l2_dev,
278                                  "dropping invalid JPEG frame %d\n",
279                                  ctx->qsequence);
280                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
281                         if (buffer_list) {
282                                 struct v4l2_m2m_buffer *m2m_buf;
283
284                                 m2m_buf = container_of(src_buf,
285                                                        struct v4l2_m2m_buffer,
286                                                        vb);
287                                 list_add_tail(&m2m_buf->list, buffer_list);
288                         } else {
289                                 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
290                         }
291                         continue;
292                 }
293
294                 /* Dump empty buffers */
295                 if (!vb2_get_plane_payload(&src_buf->vb2_buf, 0)) {
296                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
297                         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
298                         continue;
299                 }
300
301                 /* Buffer start position */
302                 start = ctx->bitstream_fifo.kfifo.in &
303                         ctx->bitstream_fifo.kfifo.mask;
304
305                 if (coda_bitstream_try_queue(ctx, src_buf)) {
306                         /*
307                          * Source buffer is queued in the bitstream ringbuffer;
308                          * queue the timestamp and mark source buffer as done
309                          */
310                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
311
312                         meta = kmalloc(sizeof(*meta), GFP_KERNEL);
313                         if (meta) {
314                                 meta->sequence = src_buf->sequence;
315                                 meta->timecode = src_buf->timecode;
316                                 meta->timestamp = src_buf->vb2_buf.timestamp;
317                                 meta->start = start;
318                                 meta->end = ctx->bitstream_fifo.kfifo.in &
319                                             ctx->bitstream_fifo.kfifo.mask;
320                                 spin_lock_irqsave(&ctx->buffer_meta_lock,
321                                                   flags);
322                                 list_add_tail(&meta->list,
323                                               &ctx->buffer_meta_list);
324                                 ctx->num_metas++;
325                                 spin_unlock_irqrestore(&ctx->buffer_meta_lock,
326                                                        flags);
327
328                                 trace_coda_bit_queue(ctx, src_buf, meta);
329                         }
330
331                         if (buffer_list) {
332                                 struct v4l2_m2m_buffer *m2m_buf;
333
334                                 m2m_buf = container_of(src_buf,
335                                                        struct v4l2_m2m_buffer,
336                                                        vb);
337                                 list_add_tail(&m2m_buf->list, buffer_list);
338                         } else {
339                                 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
340                         }
341                 } else {
342                         break;
343                 }
344         }
345 }
346
347 void coda_bit_stream_end_flag(struct coda_ctx *ctx)
348 {
349         struct coda_dev *dev = ctx->dev;
350
351         ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
352
353         /* If this context is currently running, update the hardware flag */
354         if ((dev->devtype->product == CODA_960) &&
355             coda_isbusy(dev) &&
356             (ctx->idx == coda_read(dev, CODA_REG_BIT_RUN_INDEX))) {
357                 coda_write(dev, ctx->bit_stream_param,
358                            CODA_REG_BIT_BIT_STREAM_PARAM);
359         }
360 }
361
362 static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value)
363 {
364         struct coda_dev *dev = ctx->dev;
365         u32 *p = ctx->parabuf.vaddr;
366
367         if (dev->devtype->product == CODA_DX6)
368                 p[index] = value;
369         else
370                 p[index ^ 1] = value;
371 }
372
373 static inline int coda_alloc_context_buf(struct coda_ctx *ctx,
374                                          struct coda_aux_buf *buf, size_t size,
375                                          const char *name)
376 {
377         return coda_alloc_aux_buf(ctx->dev, buf, size, name, ctx->debugfs_entry);
378 }
379
380
381 static void coda_free_framebuffers(struct coda_ctx *ctx)
382 {
383         int i;
384
385         for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++)
386                 coda_free_aux_buf(ctx->dev, &ctx->internal_frames[i]);
387 }
388
389 static int coda_alloc_framebuffers(struct coda_ctx *ctx,
390                                    struct coda_q_data *q_data, u32 fourcc)
391 {
392         struct coda_dev *dev = ctx->dev;
393         unsigned int ysize, ycbcr_size;
394         int ret;
395         int i;
396
397         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 ||
398             ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 ||
399             ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 ||
400             ctx->codec->dst_fourcc == V4L2_PIX_FMT_MPEG4)
401                 ysize = round_up(q_data->rect.width, 16) *
402                         round_up(q_data->rect.height, 16);
403         else
404                 ysize = round_up(q_data->rect.width, 8) * q_data->rect.height;
405
406         if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
407                 ycbcr_size = round_up(ysize, 4096) + ysize / 2;
408         else
409                 ycbcr_size = ysize + ysize / 2;
410
411         /* Allocate frame buffers */
412         for (i = 0; i < ctx->num_internal_frames; i++) {
413                 size_t size = ycbcr_size;
414                 char *name;
415
416                 /* Add space for mvcol buffers */
417                 if (dev->devtype->product != CODA_DX6 &&
418                     (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 ||
419                      (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 && i == 0)))
420                         size += ysize / 4;
421                 name = kasprintf(GFP_KERNEL, "fb%d", i);
422                 if (!name) {
423                         coda_free_framebuffers(ctx);
424                         return -ENOMEM;
425                 }
426                 ret = coda_alloc_context_buf(ctx, &ctx->internal_frames[i],
427                                              size, name);
428                 kfree(name);
429                 if (ret < 0) {
430                         coda_free_framebuffers(ctx);
431                         return ret;
432                 }
433         }
434
435         /* Register frame buffers in the parameter buffer */
436         for (i = 0; i < ctx->num_internal_frames; i++) {
437                 u32 y, cb, cr, mvcol;
438
439                 /* Start addresses of Y, Cb, Cr planes */
440                 y = ctx->internal_frames[i].paddr;
441                 cb = y + ysize;
442                 cr = y + ysize + ysize/4;
443                 mvcol = y + ysize + ysize/4 + ysize/4;
444                 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) {
445                         cb = round_up(cb, 4096);
446                         mvcol = cb + ysize/2;
447                         cr = 0;
448                         /* Packed 20-bit MSB of base addresses */
449                         /* YYYYYCCC, CCyyyyyc, cccc.... */
450                         y = (y & 0xfffff000) | cb >> 20;
451                         cb = (cb & 0x000ff000) << 12;
452                 }
453                 coda_parabuf_write(ctx, i * 3 + 0, y);
454                 coda_parabuf_write(ctx, i * 3 + 1, cb);
455                 coda_parabuf_write(ctx, i * 3 + 2, cr);
456
457                 if (dev->devtype->product == CODA_DX6)
458                         continue;
459
460                 /* mvcol buffer for h.264 and mpeg4 */
461                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264)
462                         coda_parabuf_write(ctx, 96 + i, mvcol);
463                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 && i == 0)
464                         coda_parabuf_write(ctx, 97, mvcol);
465         }
466
467         return 0;
468 }
469
470 static void coda_free_context_buffers(struct coda_ctx *ctx)
471 {
472         struct coda_dev *dev = ctx->dev;
473
474         coda_free_aux_buf(dev, &ctx->slicebuf);
475         coda_free_aux_buf(dev, &ctx->psbuf);
476         if (dev->devtype->product != CODA_DX6)
477                 coda_free_aux_buf(dev, &ctx->workbuf);
478         coda_free_aux_buf(dev, &ctx->parabuf);
479 }
480
481 static int coda_alloc_context_buffers(struct coda_ctx *ctx,
482                                       struct coda_q_data *q_data)
483 {
484         struct coda_dev *dev = ctx->dev;
485         size_t size;
486         int ret;
487
488         if (!ctx->parabuf.vaddr) {
489                 ret = coda_alloc_context_buf(ctx, &ctx->parabuf,
490                                              CODA_PARA_BUF_SIZE, "parabuf");
491                 if (ret < 0)
492                         return ret;
493         }
494
495         if (dev->devtype->product == CODA_DX6)
496                 return 0;
497
498         if (!ctx->slicebuf.vaddr && q_data->fourcc == V4L2_PIX_FMT_H264) {
499                 /* worst case slice size */
500                 size = (DIV_ROUND_UP(q_data->rect.width, 16) *
501                         DIV_ROUND_UP(q_data->rect.height, 16)) * 3200 / 8 + 512;
502                 ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size,
503                                              "slicebuf");
504                 if (ret < 0)
505                         goto err;
506         }
507
508         if (!ctx->psbuf.vaddr && (dev->devtype->product == CODA_HX4 ||
509                                   dev->devtype->product == CODA_7541)) {
510                 ret = coda_alloc_context_buf(ctx, &ctx->psbuf,
511                                              CODA7_PS_BUF_SIZE, "psbuf");
512                 if (ret < 0)
513                         goto err;
514         }
515
516         if (!ctx->workbuf.vaddr) {
517                 size = dev->devtype->workbuf_size;
518                 if (dev->devtype->product == CODA_960 &&
519                     q_data->fourcc == V4L2_PIX_FMT_H264)
520                         size += CODA9_PS_SAVE_SIZE;
521                 ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size,
522                                              "workbuf");
523                 if (ret < 0)
524                         goto err;
525         }
526
527         return 0;
528
529 err:
530         coda_free_context_buffers(ctx);
531         return ret;
532 }
533
534 static int coda_encode_header(struct coda_ctx *ctx, struct vb2_v4l2_buffer *buf,
535                               int header_code, u8 *header, int *size)
536 {
537         struct vb2_buffer *vb = &buf->vb2_buf;
538         struct coda_dev *dev = ctx->dev;
539         struct coda_q_data *q_data_src;
540         struct v4l2_rect *r;
541         size_t bufsize;
542         int ret;
543         int i;
544
545         if (dev->devtype->product == CODA_960)
546                 memset(vb2_plane_vaddr(vb, 0), 0, 64);
547
548         coda_write(dev, vb2_dma_contig_plane_dma_addr(vb, 0),
549                    CODA_CMD_ENC_HEADER_BB_START);
550         bufsize = vb2_plane_size(vb, 0);
551         if (dev->devtype->product == CODA_960)
552                 bufsize /= 1024;
553         coda_write(dev, bufsize, CODA_CMD_ENC_HEADER_BB_SIZE);
554         if (dev->devtype->product == CODA_960 &&
555             ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 &&
556             header_code == CODA_HEADER_H264_SPS) {
557                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
558                 r = &q_data_src->rect;
559
560                 if (r->width % 16 || r->height % 16) {
561                         u32 crop_right = round_up(r->width, 16) -  r->width;
562                         u32 crop_bottom = round_up(r->height, 16) - r->height;
563
564                         coda_write(dev, crop_right,
565                                    CODA9_CMD_ENC_HEADER_FRAME_CROP_H);
566                         coda_write(dev, crop_bottom,
567                                    CODA9_CMD_ENC_HEADER_FRAME_CROP_V);
568                         header_code |= CODA9_HEADER_FRAME_CROP;
569                 }
570         }
571         coda_write(dev, header_code, CODA_CMD_ENC_HEADER_CODE);
572         ret = coda_command_sync(ctx, CODA_COMMAND_ENCODE_HEADER);
573         if (ret < 0) {
574                 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_ENCODE_HEADER timeout\n");
575                 return ret;
576         }
577
578         if (dev->devtype->product == CODA_960) {
579                 for (i = 63; i > 0; i--)
580                         if (((char *)vb2_plane_vaddr(vb, 0))[i] != 0)
581                                 break;
582                 *size = i + 1;
583         } else {
584                 *size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)) -
585                         coda_read(dev, CODA_CMD_ENC_HEADER_BB_START);
586         }
587         memcpy(header, vb2_plane_vaddr(vb, 0), *size);
588
589         return 0;
590 }
591
592 static phys_addr_t coda_iram_alloc(struct coda_iram_info *iram, size_t size)
593 {
594         phys_addr_t ret;
595
596         size = round_up(size, 1024);
597         if (size > iram->remaining)
598                 return 0;
599         iram->remaining -= size;
600
601         ret = iram->next_paddr;
602         iram->next_paddr += size;
603
604         return ret;
605 }
606
607 static void coda_setup_iram(struct coda_ctx *ctx)
608 {
609         struct coda_iram_info *iram_info = &ctx->iram_info;
610         struct coda_dev *dev = ctx->dev;
611         int w64, w128;
612         int mb_width;
613         int dbk_bits;
614         int bit_bits;
615         int ip_bits;
616         int me_bits;
617
618         memset(iram_info, 0, sizeof(*iram_info));
619         iram_info->next_paddr = dev->iram.paddr;
620         iram_info->remaining = dev->iram.size;
621
622         if (!dev->iram.vaddr)
623                 return;
624
625         switch (dev->devtype->product) {
626         case CODA_HX4:
627                 dbk_bits = CODA7_USE_HOST_DBK_ENABLE;
628                 bit_bits = CODA7_USE_HOST_BIT_ENABLE;
629                 ip_bits = CODA7_USE_HOST_IP_ENABLE;
630                 me_bits = CODA7_USE_HOST_ME_ENABLE;
631                 break;
632         case CODA_7541:
633                 dbk_bits = CODA7_USE_HOST_DBK_ENABLE | CODA7_USE_DBK_ENABLE;
634                 bit_bits = CODA7_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
635                 ip_bits = CODA7_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
636                 me_bits = CODA7_USE_HOST_ME_ENABLE | CODA7_USE_ME_ENABLE;
637                 break;
638         case CODA_960:
639                 dbk_bits = CODA9_USE_HOST_DBK_ENABLE | CODA9_USE_DBK_ENABLE;
640                 bit_bits = CODA9_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
641                 ip_bits = CODA9_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
642                 me_bits = 0;
643                 break;
644         default: /* CODA_DX6 */
645                 return;
646         }
647
648         if (ctx->inst_type == CODA_INST_ENCODER) {
649                 struct coda_q_data *q_data_src;
650
651                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
652                 mb_width = DIV_ROUND_UP(q_data_src->rect.width, 16);
653                 w128 = mb_width * 128;
654                 w64 = mb_width * 64;
655
656                 /* Prioritize in case IRAM is too small for everything */
657                 if (dev->devtype->product == CODA_HX4 ||
658                     dev->devtype->product == CODA_7541) {
659                         iram_info->search_ram_size = round_up(mb_width * 16 *
660                                                               36 + 2048, 1024);
661                         iram_info->search_ram_paddr = coda_iram_alloc(iram_info,
662                                                 iram_info->search_ram_size);
663                         if (!iram_info->search_ram_paddr) {
664                                 pr_err("IRAM is smaller than the search ram size\n");
665                                 goto out;
666                         }
667                         iram_info->axi_sram_use |= me_bits;
668                 }
669
670                 /* Only H.264BP and H.263P3 are considered */
671                 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w64);
672                 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w64);
673                 if (!iram_info->buf_dbk_c_use)
674                         goto out;
675                 iram_info->axi_sram_use |= dbk_bits;
676
677                 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
678                 if (!iram_info->buf_bit_use)
679                         goto out;
680                 iram_info->axi_sram_use |= bit_bits;
681
682                 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
683                 if (!iram_info->buf_ip_ac_dc_use)
684                         goto out;
685                 iram_info->axi_sram_use |= ip_bits;
686
687                 /* OVL and BTP disabled for encoder */
688         } else if (ctx->inst_type == CODA_INST_DECODER) {
689                 struct coda_q_data *q_data_dst;
690
691                 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
692                 mb_width = DIV_ROUND_UP(q_data_dst->width, 16);
693                 w128 = mb_width * 128;
694
695                 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w128);
696                 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w128);
697                 if (!iram_info->buf_dbk_c_use)
698                         goto out;
699                 iram_info->axi_sram_use |= dbk_bits;
700
701                 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
702                 if (!iram_info->buf_bit_use)
703                         goto out;
704                 iram_info->axi_sram_use |= bit_bits;
705
706                 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
707                 if (!iram_info->buf_ip_ac_dc_use)
708                         goto out;
709                 iram_info->axi_sram_use |= ip_bits;
710
711                 /* OVL and BTP unused as there is no VC1 support yet */
712         }
713
714 out:
715         if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE))
716                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
717                          "IRAM smaller than needed\n");
718
719         if (dev->devtype->product == CODA_HX4 ||
720             dev->devtype->product == CODA_7541) {
721                 /* TODO - Enabling these causes picture errors on CODA7541 */
722                 if (ctx->inst_type == CODA_INST_DECODER) {
723                         /* fw 1.4.50 */
724                         iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
725                                                      CODA7_USE_IP_ENABLE);
726                 } else {
727                         /* fw 13.4.29 */
728                         iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
729                                                      CODA7_USE_HOST_DBK_ENABLE |
730                                                      CODA7_USE_IP_ENABLE |
731                                                      CODA7_USE_DBK_ENABLE);
732                 }
733         }
734 }
735
736 static u32 coda_supported_firmwares[] = {
737         CODA_FIRMWARE_VERNUM(CODA_DX6, 2, 2, 5),
738         CODA_FIRMWARE_VERNUM(CODA_HX4, 1, 4, 50),
739         CODA_FIRMWARE_VERNUM(CODA_7541, 1, 4, 50),
740         CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 5),
741         CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 9),
742         CODA_FIRMWARE_VERNUM(CODA_960, 2, 3, 10),
743         CODA_FIRMWARE_VERNUM(CODA_960, 3, 1, 1),
744 };
745
746 static bool coda_firmware_supported(u32 vernum)
747 {
748         int i;
749
750         for (i = 0; i < ARRAY_SIZE(coda_supported_firmwares); i++)
751                 if (vernum == coda_supported_firmwares[i])
752                         return true;
753         return false;
754 }
755
756 int coda_check_firmware(struct coda_dev *dev)
757 {
758         u16 product, major, minor, release;
759         u32 data;
760         int ret;
761
762         ret = clk_prepare_enable(dev->clk_per);
763         if (ret)
764                 goto err_clk_per;
765
766         ret = clk_prepare_enable(dev->clk_ahb);
767         if (ret)
768                 goto err_clk_ahb;
769
770         coda_write(dev, 0, CODA_CMD_FIRMWARE_VERNUM);
771         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
772         coda_write(dev, 0, CODA_REG_BIT_RUN_INDEX);
773         coda_write(dev, 0, CODA_REG_BIT_RUN_COD_STD);
774         coda_write(dev, CODA_COMMAND_FIRMWARE_GET, CODA_REG_BIT_RUN_COMMAND);
775         if (coda_wait_timeout(dev)) {
776                 v4l2_err(&dev->v4l2_dev, "firmware get command error\n");
777                 ret = -EIO;
778                 goto err_run_cmd;
779         }
780
781         if (dev->devtype->product == CODA_960) {
782                 data = coda_read(dev, CODA9_CMD_FIRMWARE_CODE_REV);
783                 v4l2_info(&dev->v4l2_dev, "Firmware code revision: %d\n",
784                           data);
785         }
786
787         /* Check we are compatible with the loaded firmware */
788         data = coda_read(dev, CODA_CMD_FIRMWARE_VERNUM);
789         product = CODA_FIRMWARE_PRODUCT(data);
790         major = CODA_FIRMWARE_MAJOR(data);
791         minor = CODA_FIRMWARE_MINOR(data);
792         release = CODA_FIRMWARE_RELEASE(data);
793
794         clk_disable_unprepare(dev->clk_per);
795         clk_disable_unprepare(dev->clk_ahb);
796
797         if (product != dev->devtype->product) {
798                 v4l2_err(&dev->v4l2_dev,
799                          "Wrong firmware. Hw: %s, Fw: %s, Version: %u.%u.%u\n",
800                          coda_product_name(dev->devtype->product),
801                          coda_product_name(product), major, minor, release);
802                 return -EINVAL;
803         }
804
805         v4l2_info(&dev->v4l2_dev, "Initialized %s.\n",
806                   coda_product_name(product));
807
808         if (coda_firmware_supported(data)) {
809                 v4l2_info(&dev->v4l2_dev, "Firmware version: %u.%u.%u\n",
810                           major, minor, release);
811         } else {
812                 v4l2_warn(&dev->v4l2_dev,
813                           "Unsupported firmware version: %u.%u.%u\n",
814                           major, minor, release);
815         }
816
817         return 0;
818
819 err_run_cmd:
820         clk_disable_unprepare(dev->clk_ahb);
821 err_clk_ahb:
822         clk_disable_unprepare(dev->clk_per);
823 err_clk_per:
824         return ret;
825 }
826
827 static void coda9_set_frame_cache(struct coda_ctx *ctx, u32 fourcc)
828 {
829         u32 cache_size, cache_config;
830
831         if (ctx->tiled_map_type == GDI_LINEAR_FRAME_MAP) {
832                 /* Luma 2x0 page, 2x6 cache, chroma 2x0 page, 2x4 cache size */
833                 cache_size = 0x20262024;
834                 cache_config = 2 << CODA9_CACHE_PAGEMERGE_OFFSET;
835         } else {
836                 /* Luma 0x2 page, 4x4 cache, chroma 0x2 page, 4x3 cache size */
837                 cache_size = 0x02440243;
838                 cache_config = 1 << CODA9_CACHE_PAGEMERGE_OFFSET;
839         }
840         coda_write(ctx->dev, cache_size, CODA9_CMD_SET_FRAME_CACHE_SIZE);
841         if (fourcc == V4L2_PIX_FMT_NV12 || fourcc == V4L2_PIX_FMT_YUYV) {
842                 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
843                                 16 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET |
844                                 0 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET;
845         } else {
846                 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
847                                 8 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET |
848                                 8 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET;
849         }
850         coda_write(ctx->dev, cache_config, CODA9_CMD_SET_FRAME_CACHE_CONFIG);
851 }
852
853 /*
854  * Encoder context operations
855  */
856
857 static int coda_encoder_reqbufs(struct coda_ctx *ctx,
858                                 struct v4l2_requestbuffers *rb)
859 {
860         struct coda_q_data *q_data_src;
861         int ret;
862
863         if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
864                 return 0;
865
866         if (rb->count) {
867                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
868                 ret = coda_alloc_context_buffers(ctx, q_data_src);
869                 if (ret < 0)
870                         return ret;
871         } else {
872                 coda_free_context_buffers(ctx);
873         }
874
875         return 0;
876 }
877
878 static int coda_start_encoding(struct coda_ctx *ctx)
879 {
880         struct coda_dev *dev = ctx->dev;
881         struct v4l2_device *v4l2_dev = &dev->v4l2_dev;
882         struct coda_q_data *q_data_src, *q_data_dst;
883         u32 bitstream_buf, bitstream_size;
884         struct vb2_v4l2_buffer *buf;
885         int gamma, ret, value;
886         u32 dst_fourcc;
887         int num_fb;
888         u32 stride;
889
890         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
891         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
892         dst_fourcc = q_data_dst->fourcc;
893
894         buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
895         bitstream_buf = vb2_dma_contig_plane_dma_addr(&buf->vb2_buf, 0);
896         bitstream_size = q_data_dst->sizeimage;
897
898         if (!coda_is_initialized(dev)) {
899                 v4l2_err(v4l2_dev, "coda is not initialized.\n");
900                 return -EFAULT;
901         }
902
903         if (dst_fourcc == V4L2_PIX_FMT_JPEG) {
904                 if (!ctx->params.jpeg_qmat_tab[0])
905                         ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL);
906                 if (!ctx->params.jpeg_qmat_tab[1])
907                         ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL);
908                 coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality);
909         }
910
911         mutex_lock(&dev->coda_mutex);
912
913         coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
914         coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
915         coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
916         switch (dev->devtype->product) {
917         case CODA_DX6:
918                 coda_write(dev, CODADX6_STREAM_BUF_DYNALLOC_EN |
919                         CODADX6_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
920                 break;
921         case CODA_960:
922                 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
923                 /* fallthrough */
924         case CODA_HX4:
925         case CODA_7541:
926                 coda_write(dev, CODA7_STREAM_BUF_DYNALLOC_EN |
927                         CODA7_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
928                 break;
929         }
930
931         ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) |
932                                  CODA9_FRAME_TILED2LINEAR);
933         if (q_data_src->fourcc == V4L2_PIX_FMT_NV12)
934                 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
935         if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
936                 ctx->frame_mem_ctrl |= (0x3 << 9) | CODA9_FRAME_TILED2LINEAR;
937         coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
938
939         if (dev->devtype->product == CODA_DX6) {
940                 /* Configure the coda */
941                 coda_write(dev, dev->iram.paddr,
942                            CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR);
943         }
944
945         /* Could set rotation here if needed */
946         value = 0;
947         switch (dev->devtype->product) {
948         case CODA_DX6:
949                 value = (q_data_src->rect.width & CODADX6_PICWIDTH_MASK)
950                         << CODADX6_PICWIDTH_OFFSET;
951                 value |= (q_data_src->rect.height & CODADX6_PICHEIGHT_MASK)
952                          << CODA_PICHEIGHT_OFFSET;
953                 break;
954         case CODA_HX4:
955         case CODA_7541:
956                 if (dst_fourcc == V4L2_PIX_FMT_H264) {
957                         value = (round_up(q_data_src->rect.width, 16) &
958                                  CODA7_PICWIDTH_MASK) << CODA7_PICWIDTH_OFFSET;
959                         value |= (round_up(q_data_src->rect.height, 16) &
960                                  CODA7_PICHEIGHT_MASK) << CODA_PICHEIGHT_OFFSET;
961                         break;
962                 }
963                 /* fallthrough */
964         case CODA_960:
965                 value = (q_data_src->rect.width & CODA7_PICWIDTH_MASK)
966                         << CODA7_PICWIDTH_OFFSET;
967                 value |= (q_data_src->rect.height & CODA7_PICHEIGHT_MASK)
968                          << CODA_PICHEIGHT_OFFSET;
969         }
970         coda_write(dev, value, CODA_CMD_ENC_SEQ_SRC_SIZE);
971         if (dst_fourcc == V4L2_PIX_FMT_JPEG)
972                 ctx->params.framerate = 0;
973         coda_write(dev, ctx->params.framerate,
974                    CODA_CMD_ENC_SEQ_SRC_F_RATE);
975
976         ctx->params.codec_mode = ctx->codec->mode;
977         switch (dst_fourcc) {
978         case V4L2_PIX_FMT_MPEG4:
979                 if (dev->devtype->product == CODA_960)
980                         coda_write(dev, CODA9_STD_MPEG4,
981                                    CODA_CMD_ENC_SEQ_COD_STD);
982                 else
983                         coda_write(dev, CODA_STD_MPEG4,
984                                    CODA_CMD_ENC_SEQ_COD_STD);
985                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_MP4_PARA);
986                 break;
987         case V4L2_PIX_FMT_H264:
988                 if (dev->devtype->product == CODA_960)
989                         coda_write(dev, CODA9_STD_H264,
990                                    CODA_CMD_ENC_SEQ_COD_STD);
991                 else
992                         coda_write(dev, CODA_STD_H264,
993                                    CODA_CMD_ENC_SEQ_COD_STD);
994                 value = ((ctx->params.h264_disable_deblocking_filter_idc &
995                           CODA_264PARAM_DISABLEDEBLK_MASK) <<
996                          CODA_264PARAM_DISABLEDEBLK_OFFSET) |
997                         ((ctx->params.h264_slice_alpha_c0_offset_div2 &
998                           CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) <<
999                          CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) |
1000                         ((ctx->params.h264_slice_beta_offset_div2 &
1001                           CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) <<
1002                          CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET);
1003                 coda_write(dev, value, CODA_CMD_ENC_SEQ_264_PARA);
1004                 break;
1005         case V4L2_PIX_FMT_JPEG:
1006                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_PARA);
1007                 coda_write(dev, ctx->params.jpeg_restart_interval,
1008                                 CODA_CMD_ENC_SEQ_JPG_RST_INTERVAL);
1009                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_EN);
1010                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_SIZE);
1011                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_OFFSET);
1012
1013                 coda_jpeg_write_tables(ctx);
1014                 break;
1015         default:
1016                 v4l2_err(v4l2_dev,
1017                          "dst format (0x%08x) invalid.\n", dst_fourcc);
1018                 ret = -EINVAL;
1019                 goto out;
1020         }
1021
1022         /*
1023          * slice mode and GOP size registers are used for thumb size/offset
1024          * in JPEG mode
1025          */
1026         if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
1027                 switch (ctx->params.slice_mode) {
1028                 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE:
1029                         value = 0;
1030                         break;
1031                 case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_MB:
1032                         value  = (ctx->params.slice_max_mb &
1033                                   CODA_SLICING_SIZE_MASK)
1034                                  << CODA_SLICING_SIZE_OFFSET;
1035                         value |= (1 & CODA_SLICING_UNIT_MASK)
1036                                  << CODA_SLICING_UNIT_OFFSET;
1037                         value |=  1 & CODA_SLICING_MODE_MASK;
1038                         break;
1039                 case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_BYTES:
1040                         value  = (ctx->params.slice_max_bits &
1041                                   CODA_SLICING_SIZE_MASK)
1042                                  << CODA_SLICING_SIZE_OFFSET;
1043                         value |= (0 & CODA_SLICING_UNIT_MASK)
1044                                  << CODA_SLICING_UNIT_OFFSET;
1045                         value |=  1 & CODA_SLICING_MODE_MASK;
1046                         break;
1047                 }
1048                 coda_write(dev, value, CODA_CMD_ENC_SEQ_SLICE_MODE);
1049                 value = ctx->params.gop_size;
1050                 coda_write(dev, value, CODA_CMD_ENC_SEQ_GOP_SIZE);
1051         }
1052
1053         if (ctx->params.bitrate) {
1054                 /* Rate control enabled */
1055                 value = (ctx->params.bitrate & CODA_RATECONTROL_BITRATE_MASK)
1056                         << CODA_RATECONTROL_BITRATE_OFFSET;
1057                 value |=  1 & CODA_RATECONTROL_ENABLE_MASK;
1058                 value |= (ctx->params.vbv_delay &
1059                           CODA_RATECONTROL_INITIALDELAY_MASK)
1060                          << CODA_RATECONTROL_INITIALDELAY_OFFSET;
1061                 if (dev->devtype->product == CODA_960)
1062                         value |= BIT(31); /* disable autoskip */
1063         } else {
1064                 value = 0;
1065         }
1066         coda_write(dev, value, CODA_CMD_ENC_SEQ_RC_PARA);
1067
1068         coda_write(dev, ctx->params.vbv_size, CODA_CMD_ENC_SEQ_RC_BUF_SIZE);
1069         coda_write(dev, ctx->params.intra_refresh,
1070                    CODA_CMD_ENC_SEQ_INTRA_REFRESH);
1071
1072         coda_write(dev, bitstream_buf, CODA_CMD_ENC_SEQ_BB_START);
1073         coda_write(dev, bitstream_size / 1024, CODA_CMD_ENC_SEQ_BB_SIZE);
1074
1075
1076         value = 0;
1077         if (dev->devtype->product == CODA_960)
1078                 gamma = CODA9_DEFAULT_GAMMA;
1079         else
1080                 gamma = CODA_DEFAULT_GAMMA;
1081         if (gamma > 0) {
1082                 coda_write(dev, (gamma & CODA_GAMMA_MASK) << CODA_GAMMA_OFFSET,
1083                            CODA_CMD_ENC_SEQ_RC_GAMMA);
1084         }
1085
1086         if (ctx->params.h264_min_qp || ctx->params.h264_max_qp) {
1087                 coda_write(dev,
1088                            ctx->params.h264_min_qp << CODA_QPMIN_OFFSET |
1089                            ctx->params.h264_max_qp << CODA_QPMAX_OFFSET,
1090                            CODA_CMD_ENC_SEQ_RC_QP_MIN_MAX);
1091         }
1092         if (dev->devtype->product == CODA_960) {
1093                 if (ctx->params.h264_max_qp)
1094                         value |= 1 << CODA9_OPTION_RCQPMAX_OFFSET;
1095                 if (CODA_DEFAULT_GAMMA > 0)
1096                         value |= 1 << CODA9_OPTION_GAMMA_OFFSET;
1097         } else {
1098                 if (CODA_DEFAULT_GAMMA > 0) {
1099                         if (dev->devtype->product == CODA_DX6)
1100                                 value |= 1 << CODADX6_OPTION_GAMMA_OFFSET;
1101                         else
1102                                 value |= 1 << CODA7_OPTION_GAMMA_OFFSET;
1103                 }
1104                 if (ctx->params.h264_min_qp)
1105                         value |= 1 << CODA7_OPTION_RCQPMIN_OFFSET;
1106                 if (ctx->params.h264_max_qp)
1107                         value |= 1 << CODA7_OPTION_RCQPMAX_OFFSET;
1108         }
1109         coda_write(dev, value, CODA_CMD_ENC_SEQ_OPTION);
1110
1111         coda_write(dev, 0, CODA_CMD_ENC_SEQ_RC_INTERVAL_MODE);
1112
1113         coda_setup_iram(ctx);
1114
1115         if (dst_fourcc == V4L2_PIX_FMT_H264) {
1116                 switch (dev->devtype->product) {
1117                 case CODA_DX6:
1118                         value = FMO_SLICE_SAVE_BUF_SIZE << 7;
1119                         coda_write(dev, value, CODADX6_CMD_ENC_SEQ_FMO);
1120                         break;
1121                 case CODA_HX4:
1122                 case CODA_7541:
1123                         coda_write(dev, ctx->iram_info.search_ram_paddr,
1124                                         CODA7_CMD_ENC_SEQ_SEARCH_BASE);
1125                         coda_write(dev, ctx->iram_info.search_ram_size,
1126                                         CODA7_CMD_ENC_SEQ_SEARCH_SIZE);
1127                         break;
1128                 case CODA_960:
1129                         coda_write(dev, 0, CODA9_CMD_ENC_SEQ_ME_OPTION);
1130                         coda_write(dev, 0, CODA9_CMD_ENC_SEQ_INTRA_WEIGHT);
1131                 }
1132         }
1133
1134         ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT);
1135         if (ret < 0) {
1136                 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1137                 goto out;
1138         }
1139
1140         if (coda_read(dev, CODA_RET_ENC_SEQ_SUCCESS) == 0) {
1141                 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT failed\n");
1142                 ret = -EFAULT;
1143                 goto out;
1144         }
1145         ctx->initialized = 1;
1146
1147         if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
1148                 if (dev->devtype->product == CODA_960)
1149                         ctx->num_internal_frames = 4;
1150                 else
1151                         ctx->num_internal_frames = 2;
1152                 ret = coda_alloc_framebuffers(ctx, q_data_src, dst_fourcc);
1153                 if (ret < 0) {
1154                         v4l2_err(v4l2_dev, "failed to allocate framebuffers\n");
1155                         goto out;
1156                 }
1157                 num_fb = 2;
1158                 stride = q_data_src->bytesperline;
1159         } else {
1160                 ctx->num_internal_frames = 0;
1161                 num_fb = 0;
1162                 stride = 0;
1163         }
1164         coda_write(dev, num_fb, CODA_CMD_SET_FRAME_BUF_NUM);
1165         coda_write(dev, stride, CODA_CMD_SET_FRAME_BUF_STRIDE);
1166
1167         if (dev->devtype->product == CODA_HX4 ||
1168             dev->devtype->product == CODA_7541) {
1169                 coda_write(dev, q_data_src->bytesperline,
1170                                 CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE);
1171         }
1172         if (dev->devtype->product != CODA_DX6) {
1173                 coda_write(dev, ctx->iram_info.buf_bit_use,
1174                                 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1175                 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1176                                 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1177                 coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1178                                 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1179                 coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1180                                 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1181                 coda_write(dev, ctx->iram_info.buf_ovl_use,
1182                                 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1183                 if (dev->devtype->product == CODA_960) {
1184                         coda_write(dev, ctx->iram_info.buf_btp_use,
1185                                         CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1186
1187                         coda9_set_frame_cache(ctx, q_data_src->fourcc);
1188
1189                         /* FIXME */
1190                         coda_write(dev, ctx->internal_frames[2].paddr,
1191                                    CODA9_CMD_SET_FRAME_SUBSAMP_A);
1192                         coda_write(dev, ctx->internal_frames[3].paddr,
1193                                    CODA9_CMD_SET_FRAME_SUBSAMP_B);
1194                 }
1195         }
1196
1197         ret = coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF);
1198         if (ret < 0) {
1199                 v4l2_err(v4l2_dev, "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1200                 goto out;
1201         }
1202
1203         /* Save stream headers */
1204         buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1205         switch (dst_fourcc) {
1206         case V4L2_PIX_FMT_H264:
1207                 /*
1208                  * Get SPS in the first frame and copy it to an
1209                  * intermediate buffer.
1210                  */
1211                 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_SPS,
1212                                          &ctx->vpu_header[0][0],
1213                                          &ctx->vpu_header_size[0]);
1214                 if (ret < 0)
1215                         goto out;
1216
1217                 /*
1218                  * If visible width or height are not aligned to macroblock
1219                  * size, the crop_right and crop_bottom SPS fields must be set
1220                  * to the difference between visible and coded size.  This is
1221                  * only supported by CODA960 firmware. All others do not allow
1222                  * writing frame cropping parameters, so we have to manually
1223                  * fix up the SPS RBSP (Sequence Parameter Set Raw Byte
1224                  * Sequence Payload) ourselves.
1225                  */
1226                 if (ctx->dev->devtype->product != CODA_960 &&
1227                     ((q_data_src->rect.width % 16) ||
1228                      (q_data_src->rect.height % 16))) {
1229                         ret = coda_h264_sps_fixup(ctx, q_data_src->rect.width,
1230                                                   q_data_src->rect.height,
1231                                                   &ctx->vpu_header[0][0],
1232                                                   &ctx->vpu_header_size[0],
1233                                                   sizeof(ctx->vpu_header[0]));
1234                         if (ret < 0)
1235                                 goto out;
1236                 }
1237
1238                 /*
1239                  * Get PPS in the first frame and copy it to an
1240                  * intermediate buffer.
1241                  */
1242                 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_PPS,
1243                                          &ctx->vpu_header[1][0],
1244                                          &ctx->vpu_header_size[1]);
1245                 if (ret < 0)
1246                         goto out;
1247
1248                 /*
1249                  * Length of H.264 headers is variable and thus it might not be
1250                  * aligned for the coda to append the encoded frame. In that is
1251                  * the case a filler NAL must be added to header 2.
1252                  */
1253                 ctx->vpu_header_size[2] = coda_h264_padding(
1254                                         (ctx->vpu_header_size[0] +
1255                                          ctx->vpu_header_size[1]),
1256                                          ctx->vpu_header[2]);
1257                 break;
1258         case V4L2_PIX_FMT_MPEG4:
1259                 /*
1260                  * Get VOS in the first frame and copy it to an
1261                  * intermediate buffer
1262                  */
1263                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOS,
1264                                          &ctx->vpu_header[0][0],
1265                                          &ctx->vpu_header_size[0]);
1266                 if (ret < 0)
1267                         goto out;
1268
1269                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VIS,
1270                                          &ctx->vpu_header[1][0],
1271                                          &ctx->vpu_header_size[1]);
1272                 if (ret < 0)
1273                         goto out;
1274
1275                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOL,
1276                                          &ctx->vpu_header[2][0],
1277                                          &ctx->vpu_header_size[2]);
1278                 if (ret < 0)
1279                         goto out;
1280                 break;
1281         default:
1282                 /* No more formats need to save headers at the moment */
1283                 break;
1284         }
1285
1286 out:
1287         mutex_unlock(&dev->coda_mutex);
1288         return ret;
1289 }
1290
1291 static int coda_prepare_encode(struct coda_ctx *ctx)
1292 {
1293         struct coda_q_data *q_data_src, *q_data_dst;
1294         struct vb2_v4l2_buffer *src_buf, *dst_buf;
1295         struct coda_dev *dev = ctx->dev;
1296         int force_ipicture;
1297         int quant_param = 0;
1298         u32 pic_stream_buffer_addr, pic_stream_buffer_size;
1299         u32 rot_mode = 0;
1300         u32 dst_fourcc;
1301         u32 reg;
1302
1303         src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
1304         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1305         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1306         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1307         dst_fourcc = q_data_dst->fourcc;
1308
1309         src_buf->sequence = ctx->osequence;
1310         dst_buf->sequence = ctx->osequence;
1311         ctx->osequence++;
1312
1313         force_ipicture = ctx->params.force_ipicture;
1314         if (force_ipicture)
1315                 ctx->params.force_ipicture = false;
1316         else if (ctx->params.gop_size != 0 &&
1317                  (src_buf->sequence % ctx->params.gop_size) == 0)
1318                 force_ipicture = 1;
1319
1320         /*
1321          * Workaround coda firmware BUG that only marks the first
1322          * frame as IDR. This is a problem for some decoders that can't
1323          * recover when a frame is lost.
1324          */
1325         if (!force_ipicture) {
1326                 src_buf->flags |= V4L2_BUF_FLAG_PFRAME;
1327                 src_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1328         } else {
1329                 src_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1330                 src_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1331         }
1332
1333         if (dev->devtype->product == CODA_960)
1334                 coda_set_gdi_regs(ctx);
1335
1336         /*
1337          * Copy headers in front of the first frame and forced I frames for
1338          * H.264 only. In MPEG4 they are already copied by the CODA.
1339          */
1340         if (src_buf->sequence == 0 || force_ipicture) {
1341                 pic_stream_buffer_addr =
1342                         vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0) +
1343                         ctx->vpu_header_size[0] +
1344                         ctx->vpu_header_size[1] +
1345                         ctx->vpu_header_size[2];
1346                 pic_stream_buffer_size = q_data_dst->sizeimage -
1347                         ctx->vpu_header_size[0] -
1348                         ctx->vpu_header_size[1] -
1349                         ctx->vpu_header_size[2];
1350                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0),
1351                        &ctx->vpu_header[0][0], ctx->vpu_header_size[0]);
1352                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0)
1353                         + ctx->vpu_header_size[0], &ctx->vpu_header[1][0],
1354                         ctx->vpu_header_size[1]);
1355                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0)
1356                         + ctx->vpu_header_size[0] + ctx->vpu_header_size[1],
1357                         &ctx->vpu_header[2][0], ctx->vpu_header_size[2]);
1358         } else {
1359                 pic_stream_buffer_addr =
1360                         vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
1361                 pic_stream_buffer_size = q_data_dst->sizeimage;
1362         }
1363
1364         if (force_ipicture) {
1365                 switch (dst_fourcc) {
1366                 case V4L2_PIX_FMT_H264:
1367                         quant_param = ctx->params.h264_intra_qp;
1368                         break;
1369                 case V4L2_PIX_FMT_MPEG4:
1370                         quant_param = ctx->params.mpeg4_intra_qp;
1371                         break;
1372                 case V4L2_PIX_FMT_JPEG:
1373                         quant_param = 30;
1374                         break;
1375                 default:
1376                         v4l2_warn(&ctx->dev->v4l2_dev,
1377                                 "cannot set intra qp, fmt not supported\n");
1378                         break;
1379                 }
1380         } else {
1381                 switch (dst_fourcc) {
1382                 case V4L2_PIX_FMT_H264:
1383                         quant_param = ctx->params.h264_inter_qp;
1384                         break;
1385                 case V4L2_PIX_FMT_MPEG4:
1386                         quant_param = ctx->params.mpeg4_inter_qp;
1387                         break;
1388                 default:
1389                         v4l2_warn(&ctx->dev->v4l2_dev,
1390                                 "cannot set inter qp, fmt not supported\n");
1391                         break;
1392                 }
1393         }
1394
1395         /* submit */
1396         if (ctx->params.rot_mode)
1397                 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1398         coda_write(dev, rot_mode, CODA_CMD_ENC_PIC_ROT_MODE);
1399         coda_write(dev, quant_param, CODA_CMD_ENC_PIC_QS);
1400
1401         if (dev->devtype->product == CODA_960) {
1402                 coda_write(dev, 4/*FIXME: 0*/, CODA9_CMD_ENC_PIC_SRC_INDEX);
1403                 coda_write(dev, q_data_src->bytesperline,
1404                            CODA9_CMD_ENC_PIC_SRC_STRIDE);
1405                 coda_write(dev, 0, CODA9_CMD_ENC_PIC_SUB_FRAME_SYNC);
1406
1407                 reg = CODA9_CMD_ENC_PIC_SRC_ADDR_Y;
1408         } else {
1409                 reg = CODA_CMD_ENC_PIC_SRC_ADDR_Y;
1410         }
1411         coda_write_base(ctx, q_data_src, src_buf, reg);
1412
1413         coda_write(dev, force_ipicture << 1 & 0x2,
1414                    CODA_CMD_ENC_PIC_OPTION);
1415
1416         coda_write(dev, pic_stream_buffer_addr, CODA_CMD_ENC_PIC_BB_START);
1417         coda_write(dev, pic_stream_buffer_size / 1024,
1418                    CODA_CMD_ENC_PIC_BB_SIZE);
1419
1420         if (!ctx->streamon_out) {
1421                 /* After streamoff on the output side, set stream end flag */
1422                 ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
1423                 coda_write(dev, ctx->bit_stream_param,
1424                            CODA_REG_BIT_BIT_STREAM_PARAM);
1425         }
1426
1427         if (dev->devtype->product != CODA_DX6)
1428                 coda_write(dev, ctx->iram_info.axi_sram_use,
1429                                 CODA7_REG_BIT_AXI_SRAM_USE);
1430
1431         trace_coda_enc_pic_run(ctx, src_buf);
1432
1433         coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1434
1435         return 0;
1436 }
1437
1438 static void coda_finish_encode(struct coda_ctx *ctx)
1439 {
1440         struct vb2_v4l2_buffer *src_buf, *dst_buf;
1441         struct coda_dev *dev = ctx->dev;
1442         u32 wr_ptr, start_ptr;
1443
1444         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1445         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1446
1447         trace_coda_enc_pic_done(ctx, dst_buf);
1448
1449         /* Get results from the coda */
1450         start_ptr = coda_read(dev, CODA_CMD_ENC_PIC_BB_START);
1451         wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
1452
1453         /* Calculate bytesused field */
1454         if (dst_buf->sequence == 0 ||
1455             src_buf->flags & V4L2_BUF_FLAG_KEYFRAME) {
1456                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr +
1457                                         ctx->vpu_header_size[0] +
1458                                         ctx->vpu_header_size[1] +
1459                                         ctx->vpu_header_size[2]);
1460         } else {
1461                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr);
1462         }
1463
1464         v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, "frame size = %u\n",
1465                  wr_ptr - start_ptr);
1466
1467         coda_read(dev, CODA_RET_ENC_PIC_SLICE_NUM);
1468         coda_read(dev, CODA_RET_ENC_PIC_FLAG);
1469
1470         if (coda_read(dev, CODA_RET_ENC_PIC_TYPE) == 0) {
1471                 dst_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1472                 dst_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1473         } else {
1474                 dst_buf->flags |= V4L2_BUF_FLAG_PFRAME;
1475                 dst_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1476         }
1477
1478         dst_buf->vb2_buf.timestamp = src_buf->vb2_buf.timestamp;
1479         dst_buf->field = src_buf->field;
1480         dst_buf->flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1481         dst_buf->flags |=
1482                 src_buf->flags & V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1483         dst_buf->timecode = src_buf->timecode;
1484
1485         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
1486
1487         dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1488         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE);
1489
1490         ctx->gopcounter--;
1491         if (ctx->gopcounter < 0)
1492                 ctx->gopcounter = ctx->params.gop_size - 1;
1493
1494         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1495                 "job finished: encoding frame (%d) (%s)\n",
1496                 dst_buf->sequence,
1497                 (dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
1498                 "KEYFRAME" : "PFRAME");
1499 }
1500
1501 static void coda_seq_end_work(struct work_struct *work)
1502 {
1503         struct coda_ctx *ctx = container_of(work, struct coda_ctx, seq_end_work);
1504         struct coda_dev *dev = ctx->dev;
1505
1506         mutex_lock(&ctx->buffer_mutex);
1507         mutex_lock(&dev->coda_mutex);
1508
1509         if (ctx->initialized == 0)
1510                 goto out;
1511
1512         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1513                  "%d: %s: sent command 'SEQ_END' to coda\n", ctx->idx,
1514                  __func__);
1515         if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
1516                 v4l2_err(&dev->v4l2_dev,
1517                          "CODA_COMMAND_SEQ_END failed\n");
1518         }
1519
1520         /*
1521          * FIXME: Sometimes h.264 encoding fails with 8-byte sequences missing
1522          * from the output stream after the h.264 decoder has run. Resetting the
1523          * hardware after the decoder has finished seems to help.
1524          */
1525         if (dev->devtype->product == CODA_960)
1526                 coda_hw_reset(ctx);
1527
1528         kfifo_init(&ctx->bitstream_fifo,
1529                 ctx->bitstream.vaddr, ctx->bitstream.size);
1530
1531         coda_free_framebuffers(ctx);
1532
1533         ctx->initialized = 0;
1534
1535 out:
1536         mutex_unlock(&dev->coda_mutex);
1537         mutex_unlock(&ctx->buffer_mutex);
1538 }
1539
1540 static void coda_bit_release(struct coda_ctx *ctx)
1541 {
1542         mutex_lock(&ctx->buffer_mutex);
1543         coda_free_framebuffers(ctx);
1544         coda_free_context_buffers(ctx);
1545         coda_free_bitstream_buffer(ctx);
1546         mutex_unlock(&ctx->buffer_mutex);
1547 }
1548
1549 const struct coda_context_ops coda_bit_encode_ops = {
1550         .queue_init = coda_encoder_queue_init,
1551         .reqbufs = coda_encoder_reqbufs,
1552         .start_streaming = coda_start_encoding,
1553         .prepare_run = coda_prepare_encode,
1554         .finish_run = coda_finish_encode,
1555         .seq_end_work = coda_seq_end_work,
1556         .release = coda_bit_release,
1557 };
1558
1559 /*
1560  * Decoder context operations
1561  */
1562
1563 static int coda_alloc_bitstream_buffer(struct coda_ctx *ctx,
1564                                        struct coda_q_data *q_data)
1565 {
1566         if (ctx->bitstream.vaddr)
1567                 return 0;
1568
1569         ctx->bitstream.size = roundup_pow_of_two(q_data->sizeimage * 2);
1570         ctx->bitstream.vaddr = dma_alloc_wc(&ctx->dev->plat_dev->dev,
1571                                             ctx->bitstream.size,
1572                                             &ctx->bitstream.paddr, GFP_KERNEL);
1573         if (!ctx->bitstream.vaddr) {
1574                 v4l2_err(&ctx->dev->v4l2_dev,
1575                          "failed to allocate bitstream ringbuffer");
1576                 return -ENOMEM;
1577         }
1578         kfifo_init(&ctx->bitstream_fifo,
1579                    ctx->bitstream.vaddr, ctx->bitstream.size);
1580
1581         return 0;
1582 }
1583
1584 static void coda_free_bitstream_buffer(struct coda_ctx *ctx)
1585 {
1586         if (ctx->bitstream.vaddr == NULL)
1587                 return;
1588
1589         dma_free_wc(&ctx->dev->plat_dev->dev, ctx->bitstream.size,
1590                     ctx->bitstream.vaddr, ctx->bitstream.paddr);
1591         ctx->bitstream.vaddr = NULL;
1592         kfifo_init(&ctx->bitstream_fifo, NULL, 0);
1593 }
1594
1595 static int coda_decoder_reqbufs(struct coda_ctx *ctx,
1596                                 struct v4l2_requestbuffers *rb)
1597 {
1598         struct coda_q_data *q_data_src;
1599         int ret;
1600
1601         if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
1602                 return 0;
1603
1604         if (rb->count) {
1605                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1606                 ret = coda_alloc_context_buffers(ctx, q_data_src);
1607                 if (ret < 0)
1608                         return ret;
1609                 ret = coda_alloc_bitstream_buffer(ctx, q_data_src);
1610                 if (ret < 0) {
1611                         coda_free_context_buffers(ctx);
1612                         return ret;
1613                 }
1614         } else {
1615                 coda_free_bitstream_buffer(ctx);
1616                 coda_free_context_buffers(ctx);
1617         }
1618
1619         return 0;
1620 }
1621
1622 static bool coda_reorder_enable(struct coda_ctx *ctx)
1623 {
1624         struct coda_dev *dev = ctx->dev;
1625         int profile;
1626
1627         if (dev->devtype->product != CODA_HX4 &&
1628             dev->devtype->product != CODA_7541 &&
1629             dev->devtype->product != CODA_960)
1630                 return false;
1631
1632         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1633                 return false;
1634
1635         if (ctx->codec->src_fourcc != V4L2_PIX_FMT_H264)
1636                 return true;
1637
1638         profile = coda_h264_profile(ctx->params.h264_profile_idc);
1639         if (profile < 0)
1640                 v4l2_warn(&dev->v4l2_dev, "Unknown H264 Profile: %u\n",
1641                           ctx->params.h264_profile_idc);
1642
1643         /* Baseline profile does not support reordering */
1644         return profile > V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE;
1645 }
1646
1647 static int __coda_start_decoding(struct coda_ctx *ctx)
1648 {
1649         struct coda_q_data *q_data_src, *q_data_dst;
1650         u32 bitstream_buf, bitstream_size;
1651         struct coda_dev *dev = ctx->dev;
1652         int width, height;
1653         u32 src_fourcc, dst_fourcc;
1654         u32 val;
1655         int ret;
1656
1657         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1658                  "Video Data Order Adapter: %s\n",
1659                  ctx->use_vdoa ? "Enabled" : "Disabled");
1660
1661         /* Start decoding */
1662         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1663         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1664         bitstream_buf = ctx->bitstream.paddr;
1665         bitstream_size = ctx->bitstream.size;
1666         src_fourcc = q_data_src->fourcc;
1667         dst_fourcc = q_data_dst->fourcc;
1668
1669         coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
1670
1671         /* Update coda bitstream read and write pointers from kfifo */
1672         coda_kfifo_sync_to_device_full(ctx);
1673
1674         ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) |
1675                                  CODA9_FRAME_TILED2LINEAR);
1676         if (dst_fourcc == V4L2_PIX_FMT_NV12 || dst_fourcc == V4L2_PIX_FMT_YUYV)
1677                 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
1678         if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
1679                 ctx->frame_mem_ctrl |= (0x3 << 9) |
1680                         ((ctx->use_vdoa) ? 0 : CODA9_FRAME_TILED2LINEAR);
1681         coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
1682
1683         ctx->display_idx = -1;
1684         ctx->frm_dis_flg = 0;
1685         coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
1686
1687         coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START);
1688         coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE);
1689         val = 0;
1690         if (coda_reorder_enable(ctx))
1691                 val |= CODA_REORDER_ENABLE;
1692         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1693                 val |= CODA_NO_INT_ENABLE;
1694         coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION);
1695
1696         ctx->params.codec_mode = ctx->codec->mode;
1697         if (dev->devtype->product == CODA_960 &&
1698             src_fourcc == V4L2_PIX_FMT_MPEG4)
1699                 ctx->params.codec_mode_aux = CODA_MP4_AUX_MPEG4;
1700         else
1701                 ctx->params.codec_mode_aux = 0;
1702         if (src_fourcc == V4L2_PIX_FMT_MPEG4) {
1703                 coda_write(dev, CODA_MP4_CLASS_MPEG4,
1704                            CODA_CMD_DEC_SEQ_MP4_ASP_CLASS);
1705         }
1706         if (src_fourcc == V4L2_PIX_FMT_H264) {
1707                 if (dev->devtype->product == CODA_HX4 ||
1708                     dev->devtype->product == CODA_7541) {
1709                         coda_write(dev, ctx->psbuf.paddr,
1710                                         CODA_CMD_DEC_SEQ_PS_BB_START);
1711                         coda_write(dev, (CODA7_PS_BUF_SIZE / 1024),
1712                                         CODA_CMD_DEC_SEQ_PS_BB_SIZE);
1713                 }
1714                 if (dev->devtype->product == CODA_960) {
1715                         coda_write(dev, 0, CODA_CMD_DEC_SEQ_X264_MV_EN);
1716                         coda_write(dev, 512, CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE);
1717                 }
1718         }
1719         if (src_fourcc == V4L2_PIX_FMT_JPEG)
1720                 coda_write(dev, 0, CODA_CMD_DEC_SEQ_JPG_THUMB_EN);
1721         if (dev->devtype->product != CODA_960)
1722                 coda_write(dev, 0, CODA_CMD_DEC_SEQ_SRC_SIZE);
1723
1724         ctx->bit_stream_param = CODA_BIT_DEC_SEQ_INIT_ESCAPE;
1725         ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT);
1726         ctx->bit_stream_param = 0;
1727         if (ret) {
1728                 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1729                 return ret;
1730         }
1731         ctx->sequence_offset = ~0U;
1732         ctx->initialized = 1;
1733
1734         /* Update kfifo out pointer from coda bitstream read pointer */
1735         coda_kfifo_sync_from_device(ctx);
1736
1737         if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) {
1738                 v4l2_err(&dev->v4l2_dev,
1739                         "CODA_COMMAND_SEQ_INIT failed, error code = %d\n",
1740                         coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON));
1741                 return -EAGAIN;
1742         }
1743
1744         val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE);
1745         if (dev->devtype->product == CODA_DX6) {
1746                 width = (val >> CODADX6_PICWIDTH_OFFSET) & CODADX6_PICWIDTH_MASK;
1747                 height = val & CODADX6_PICHEIGHT_MASK;
1748         } else {
1749                 width = (val >> CODA7_PICWIDTH_OFFSET) & CODA7_PICWIDTH_MASK;
1750                 height = val & CODA7_PICHEIGHT_MASK;
1751         }
1752
1753         if (width > q_data_dst->bytesperline || height > q_data_dst->height) {
1754                 v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n",
1755                          width, height, q_data_dst->bytesperline,
1756                          q_data_dst->height);
1757                 return -EINVAL;
1758         }
1759
1760         width = round_up(width, 16);
1761         height = round_up(height, 16);
1762
1763         v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "%s instance %d now: %dx%d\n",
1764                  __func__, ctx->idx, width, height);
1765
1766         ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED);
1767         /*
1768          * If the VDOA is used, the decoder needs one additional frame,
1769          * because the frames are freed when the next frame is decoded.
1770          * Otherwise there are visible errors in the decoded frames (green
1771          * regions in displayed frames) and a broken order of frames (earlier
1772          * frames are sporadically displayed after later frames).
1773          */
1774         if (ctx->use_vdoa)
1775                 ctx->num_internal_frames += 1;
1776         if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) {
1777                 v4l2_err(&dev->v4l2_dev,
1778                          "not enough framebuffers to decode (%d < %d)\n",
1779                          CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames);
1780                 return -EINVAL;
1781         }
1782
1783         if (src_fourcc == V4L2_PIX_FMT_H264) {
1784                 u32 left_right;
1785                 u32 top_bottom;
1786
1787                 left_right = coda_read(dev, CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT);
1788                 top_bottom = coda_read(dev, CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM);
1789
1790                 q_data_dst->rect.left = (left_right >> 10) & 0x3ff;
1791                 q_data_dst->rect.top = (top_bottom >> 10) & 0x3ff;
1792                 q_data_dst->rect.width = width - q_data_dst->rect.left -
1793                                          (left_right & 0x3ff);
1794                 q_data_dst->rect.height = height - q_data_dst->rect.top -
1795                                           (top_bottom & 0x3ff);
1796         }
1797
1798         ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc);
1799         if (ret < 0) {
1800                 v4l2_err(&dev->v4l2_dev, "failed to allocate framebuffers\n");
1801                 return ret;
1802         }
1803
1804         /* Tell the decoder how many frame buffers we allocated. */
1805         coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM);
1806         coda_write(dev, width, CODA_CMD_SET_FRAME_BUF_STRIDE);
1807
1808         if (dev->devtype->product != CODA_DX6) {
1809                 /* Set secondary AXI IRAM */
1810                 coda_setup_iram(ctx);
1811
1812                 coda_write(dev, ctx->iram_info.buf_bit_use,
1813                                 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1814                 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1815                                 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1816                 coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1817                                 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1818                 coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1819                                 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1820                 coda_write(dev, ctx->iram_info.buf_ovl_use,
1821                                 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1822                 if (dev->devtype->product == CODA_960) {
1823                         coda_write(dev, ctx->iram_info.buf_btp_use,
1824                                         CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1825
1826                         coda_write(dev, -1, CODA9_CMD_SET_FRAME_DELAY);
1827                         coda9_set_frame_cache(ctx, dst_fourcc);
1828                 }
1829         }
1830
1831         if (src_fourcc == V4L2_PIX_FMT_H264) {
1832                 coda_write(dev, ctx->slicebuf.paddr,
1833                                 CODA_CMD_SET_FRAME_SLICE_BB_START);
1834                 coda_write(dev, ctx->slicebuf.size / 1024,
1835                                 CODA_CMD_SET_FRAME_SLICE_BB_SIZE);
1836         }
1837
1838         if (dev->devtype->product == CODA_HX4 ||
1839             dev->devtype->product == CODA_7541) {
1840                 int max_mb_x = 1920 / 16;
1841                 int max_mb_y = 1088 / 16;
1842                 int max_mb_num = max_mb_x * max_mb_y;
1843
1844                 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1845                                 CODA7_CMD_SET_FRAME_MAX_DEC_SIZE);
1846         } else if (dev->devtype->product == CODA_960) {
1847                 int max_mb_x = 1920 / 16;
1848                 int max_mb_y = 1088 / 16;
1849                 int max_mb_num = max_mb_x * max_mb_y;
1850
1851                 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1852                                 CODA9_CMD_SET_FRAME_MAX_DEC_SIZE);
1853         }
1854
1855         if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) {
1856                 v4l2_err(&ctx->dev->v4l2_dev,
1857                          "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1858                 return -ETIMEDOUT;
1859         }
1860
1861         return 0;
1862 }
1863
1864 static int coda_start_decoding(struct coda_ctx *ctx)
1865 {
1866         struct coda_dev *dev = ctx->dev;
1867         int ret;
1868
1869         mutex_lock(&dev->coda_mutex);
1870         ret = __coda_start_decoding(ctx);
1871         mutex_unlock(&dev->coda_mutex);
1872
1873         return ret;
1874 }
1875
1876 static int coda_prepare_decode(struct coda_ctx *ctx)
1877 {
1878         struct vb2_v4l2_buffer *dst_buf;
1879         struct coda_dev *dev = ctx->dev;
1880         struct coda_q_data *q_data_dst;
1881         struct coda_buffer_meta *meta;
1882         unsigned long flags;
1883         u32 rot_mode = 0;
1884         u32 reg_addr, reg_stride;
1885
1886         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1887         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1888
1889         /* Try to copy source buffer contents into the bitstream ringbuffer */
1890         mutex_lock(&ctx->bitstream_mutex);
1891         coda_fill_bitstream(ctx, NULL);
1892         mutex_unlock(&ctx->bitstream_mutex);
1893
1894         if (coda_get_bitstream_payload(ctx) < 512 &&
1895             (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
1896                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1897                          "bitstream payload: %d, skipping\n",
1898                          coda_get_bitstream_payload(ctx));
1899                 v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1900                 return -EAGAIN;
1901         }
1902
1903         /* Run coda_start_decoding (again) if not yet initialized */
1904         if (!ctx->initialized) {
1905                 int ret = __coda_start_decoding(ctx);
1906
1907                 if (ret < 0) {
1908                         v4l2_err(&dev->v4l2_dev, "failed to start decoding\n");
1909                         v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1910                         return -EAGAIN;
1911                 } else {
1912                         ctx->initialized = 1;
1913                 }
1914         }
1915
1916         if (dev->devtype->product == CODA_960)
1917                 coda_set_gdi_regs(ctx);
1918
1919         if (ctx->use_vdoa &&
1920             ctx->display_idx >= 0 &&
1921             ctx->display_idx < ctx->num_internal_frames) {
1922                 vdoa_device_run(ctx->vdoa,
1923                                 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0),
1924                                 ctx->internal_frames[ctx->display_idx].paddr);
1925         } else {
1926                 if (dev->devtype->product == CODA_960) {
1927                         /*
1928                          * The CODA960 seems to have an internal list of
1929                          * buffers with 64 entries that includes the
1930                          * registered frame buffers as well as the rotator
1931                          * buffer output.
1932                          *
1933                          * ROT_INDEX needs to be < 0x40, but >
1934                          * ctx->num_internal_frames.
1935                          */
1936                         coda_write(dev,
1937                                    CODA_MAX_FRAMEBUFFERS + dst_buf->vb2_buf.index,
1938                                    CODA9_CMD_DEC_PIC_ROT_INDEX);
1939
1940                         reg_addr = CODA9_CMD_DEC_PIC_ROT_ADDR_Y;
1941                         reg_stride = CODA9_CMD_DEC_PIC_ROT_STRIDE;
1942                 } else {
1943                         reg_addr = CODA_CMD_DEC_PIC_ROT_ADDR_Y;
1944                         reg_stride = CODA_CMD_DEC_PIC_ROT_STRIDE;
1945                 }
1946                 coda_write_base(ctx, q_data_dst, dst_buf, reg_addr);
1947                 coda_write(dev, q_data_dst->bytesperline, reg_stride);
1948
1949                 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1950         }
1951
1952         coda_write(dev, rot_mode, CODA_CMD_DEC_PIC_ROT_MODE);
1953
1954         switch (dev->devtype->product) {
1955         case CODA_DX6:
1956                 /* TBD */
1957         case CODA_HX4:
1958         case CODA_7541:
1959                 coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION);
1960                 break;
1961         case CODA_960:
1962                 /* 'hardcode to use interrupt disable mode'? */
1963                 coda_write(dev, (1 << 10), CODA_CMD_DEC_PIC_OPTION);
1964                 break;
1965         }
1966
1967         coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM);
1968
1969         coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START);
1970         coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE);
1971
1972         if (dev->devtype->product != CODA_DX6)
1973                 coda_write(dev, ctx->iram_info.axi_sram_use,
1974                                 CODA7_REG_BIT_AXI_SRAM_USE);
1975
1976         spin_lock_irqsave(&ctx->buffer_meta_lock, flags);
1977         meta = list_first_entry_or_null(&ctx->buffer_meta_list,
1978                                         struct coda_buffer_meta, list);
1979
1980         if (meta && ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) {
1981
1982                 /* If this is the last buffer in the bitstream, add padding */
1983                 if (meta->end == (ctx->bitstream_fifo.kfifo.in &
1984                                   ctx->bitstream_fifo.kfifo.mask)) {
1985                         static unsigned char buf[512];
1986                         unsigned int pad;
1987
1988                         /* Pad to multiple of 256 and then add 256 more */
1989                         pad = ((0 - meta->end) & 0xff) + 256;
1990
1991                         memset(buf, 0xff, sizeof(buf));
1992
1993                         kfifo_in(&ctx->bitstream_fifo, buf, pad);
1994                 }
1995         }
1996         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
1997
1998         coda_kfifo_sync_to_device_full(ctx);
1999
2000         /* Clear decode success flag */
2001         coda_write(dev, 0, CODA_RET_DEC_PIC_SUCCESS);
2002
2003         /* Clear error return value */
2004         coda_write(dev, 0, CODA_RET_DEC_PIC_ERR_MB);
2005
2006         trace_coda_dec_pic_run(ctx, meta);
2007
2008         coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
2009
2010         return 0;
2011 }
2012
2013 static void coda_finish_decode(struct coda_ctx *ctx)
2014 {
2015         struct coda_dev *dev = ctx->dev;
2016         struct coda_q_data *q_data_src;
2017         struct coda_q_data *q_data_dst;
2018         struct vb2_v4l2_buffer *dst_buf;
2019         struct coda_buffer_meta *meta;
2020         unsigned long payload;
2021         unsigned long flags;
2022         int width, height;
2023         int decoded_idx;
2024         int display_idx;
2025         u32 src_fourcc;
2026         int success;
2027         u32 err_mb;
2028         int err_vdoa = 0;
2029         u32 val;
2030
2031         /* Update kfifo out pointer from coda bitstream read pointer */
2032         coda_kfifo_sync_from_device(ctx);
2033
2034         /*
2035          * in stream-end mode, the read pointer can overshoot the write pointer
2036          * by up to 512 bytes
2037          */
2038         if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) {
2039                 if (coda_get_bitstream_payload(ctx) >= ctx->bitstream.size - 512)
2040                         kfifo_init(&ctx->bitstream_fifo,
2041                                 ctx->bitstream.vaddr, ctx->bitstream.size);
2042         }
2043
2044         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
2045         src_fourcc = q_data_src->fourcc;
2046
2047         val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS);
2048         if (val != 1)
2049                 pr_err("DEC_PIC_SUCCESS = %d\n", val);
2050
2051         success = val & 0x1;
2052         if (!success)
2053                 v4l2_err(&dev->v4l2_dev, "decode failed\n");
2054
2055         if (src_fourcc == V4L2_PIX_FMT_H264) {
2056                 if (val & (1 << 3))
2057                         v4l2_err(&dev->v4l2_dev,
2058                                  "insufficient PS buffer space (%d bytes)\n",
2059                                  ctx->psbuf.size);
2060                 if (val & (1 << 2))
2061                         v4l2_err(&dev->v4l2_dev,
2062                                  "insufficient slice buffer space (%d bytes)\n",
2063                                  ctx->slicebuf.size);
2064         }
2065
2066         val = coda_read(dev, CODA_RET_DEC_PIC_SIZE);
2067         width = (val >> 16) & 0xffff;
2068         height = val & 0xffff;
2069
2070         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
2071
2072         /* frame crop information */
2073         if (src_fourcc == V4L2_PIX_FMT_H264) {
2074                 u32 left_right;
2075                 u32 top_bottom;
2076
2077                 left_right = coda_read(dev, CODA_RET_DEC_PIC_CROP_LEFT_RIGHT);
2078                 top_bottom = coda_read(dev, CODA_RET_DEC_PIC_CROP_TOP_BOTTOM);
2079
2080                 if (left_right == 0xffffffff && top_bottom == 0xffffffff) {
2081                         /* Keep current crop information */
2082                 } else {
2083                         struct v4l2_rect *rect = &q_data_dst->rect;
2084
2085                         rect->left = left_right >> 16 & 0xffff;
2086                         rect->top = top_bottom >> 16 & 0xffff;
2087                         rect->width = width - rect->left -
2088                                       (left_right & 0xffff);
2089                         rect->height = height - rect->top -
2090                                        (top_bottom & 0xffff);
2091                 }
2092         } else {
2093                 /* no cropping */
2094         }
2095
2096         err_mb = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB);
2097         if (err_mb > 0)
2098                 v4l2_err(&dev->v4l2_dev,
2099                          "errors in %d macroblocks\n", err_mb);
2100
2101         if (dev->devtype->product == CODA_HX4 ||
2102             dev->devtype->product == CODA_7541) {
2103                 val = coda_read(dev, CODA_RET_DEC_PIC_OPTION);
2104                 if (val == 0) {
2105                         /* not enough bitstream data */
2106                         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2107                                  "prescan failed: %d\n", val);
2108                         ctx->hold = true;
2109                         return;
2110                 }
2111         }
2112
2113         /* Wait until the VDOA finished writing the previous display frame */
2114         if (ctx->use_vdoa &&
2115             ctx->display_idx >= 0 &&
2116             ctx->display_idx < ctx->num_internal_frames) {
2117                 err_vdoa = vdoa_wait_for_completion(ctx->vdoa);
2118         }
2119
2120         ctx->frm_dis_flg = coda_read(dev,
2121                                      CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
2122
2123         /* The previous display frame was copied out and can be overwritten */
2124         if (ctx->display_idx >= 0 &&
2125             ctx->display_idx < ctx->num_internal_frames) {
2126                 ctx->frm_dis_flg &= ~(1 << ctx->display_idx);
2127                 coda_write(dev, ctx->frm_dis_flg,
2128                                 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
2129         }
2130
2131         /*
2132          * The index of the last decoded frame, not necessarily in
2133          * display order, and the index of the next display frame.
2134          * The latter could have been decoded in a previous run.
2135          */
2136         decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX);
2137         display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX);
2138
2139         if (decoded_idx == -1) {
2140                 /* no frame was decoded, but we might have a display frame */
2141                 if (display_idx >= 0 && display_idx < ctx->num_internal_frames)
2142                         ctx->sequence_offset++;
2143                 else if (ctx->display_idx < 0)
2144                         ctx->hold = true;
2145         } else if (decoded_idx == -2) {
2146                 if (ctx->display_idx >= 0 &&
2147                     ctx->display_idx < ctx->num_internal_frames)
2148                         ctx->sequence_offset++;
2149                 /* no frame was decoded, we still return remaining buffers */
2150         } else if (decoded_idx < 0 || decoded_idx >= ctx->num_internal_frames) {
2151                 v4l2_err(&dev->v4l2_dev,
2152                          "decoded frame index out of range: %d\n", decoded_idx);
2153         } else {
2154                 val = coda_read(dev, CODA_RET_DEC_PIC_FRAME_NUM);
2155                 if (ctx->sequence_offset == -1)
2156                         ctx->sequence_offset = val;
2157                 val -= ctx->sequence_offset;
2158                 spin_lock_irqsave(&ctx->buffer_meta_lock, flags);
2159                 if (!list_empty(&ctx->buffer_meta_list)) {
2160                         meta = list_first_entry(&ctx->buffer_meta_list,
2161                                               struct coda_buffer_meta, list);
2162                         list_del(&meta->list);
2163                         ctx->num_metas--;
2164                         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
2165                         /*
2166                          * Clamp counters to 16 bits for comparison, as the HW
2167                          * counter rolls over at this point for h.264. This
2168                          * may be different for other formats, but using 16 bits
2169                          * should be enough to detect most errors and saves us
2170                          * from doing different things based on the format.
2171                          */
2172                         if ((val & 0xffff) != (meta->sequence & 0xffff)) {
2173                                 v4l2_err(&dev->v4l2_dev,
2174                                          "sequence number mismatch (%d(%d) != %d)\n",
2175                                          val, ctx->sequence_offset,
2176                                          meta->sequence);
2177                         }
2178                         ctx->frame_metas[decoded_idx] = *meta;
2179                         kfree(meta);
2180                 } else {
2181                         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
2182                         v4l2_err(&dev->v4l2_dev, "empty timestamp list!\n");
2183                         memset(&ctx->frame_metas[decoded_idx], 0,
2184                                sizeof(struct coda_buffer_meta));
2185                         ctx->frame_metas[decoded_idx].sequence = val;
2186                         ctx->sequence_offset++;
2187                 }
2188
2189                 trace_coda_dec_pic_done(ctx, &ctx->frame_metas[decoded_idx]);
2190
2191                 val = coda_read(dev, CODA_RET_DEC_PIC_TYPE) & 0x7;
2192                 if (val == 0)
2193                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_KEYFRAME;
2194                 else if (val == 1)
2195                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_PFRAME;
2196                 else
2197                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_BFRAME;
2198
2199                 ctx->frame_errors[decoded_idx] = err_mb;
2200         }
2201
2202         if (display_idx == -1) {
2203                 /*
2204                  * no more frames to be decoded, but there could still
2205                  * be rotator output to dequeue
2206                  */
2207                 ctx->hold = true;
2208         } else if (display_idx == -3) {
2209                 /* possibly prescan failure */
2210         } else if (display_idx < 0 || display_idx >= ctx->num_internal_frames) {
2211                 v4l2_err(&dev->v4l2_dev,
2212                          "presentation frame index out of range: %d\n",
2213                          display_idx);
2214         }
2215
2216         /* If a frame was copied out, return it */
2217         if (ctx->display_idx >= 0 &&
2218             ctx->display_idx < ctx->num_internal_frames) {
2219                 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2220                 dst_buf->sequence = ctx->osequence++;
2221
2222                 dst_buf->field = V4L2_FIELD_NONE;
2223                 dst_buf->flags &= ~(V4L2_BUF_FLAG_KEYFRAME |
2224                                              V4L2_BUF_FLAG_PFRAME |
2225                                              V4L2_BUF_FLAG_BFRAME);
2226                 dst_buf->flags |= ctx->frame_types[ctx->display_idx];
2227                 meta = &ctx->frame_metas[ctx->display_idx];
2228                 dst_buf->timecode = meta->timecode;
2229                 dst_buf->vb2_buf.timestamp = meta->timestamp;
2230
2231                 trace_coda_dec_rot_done(ctx, dst_buf, meta);
2232
2233                 switch (q_data_dst->fourcc) {
2234                 case V4L2_PIX_FMT_YUYV:
2235                         payload = width * height * 2;
2236                         break;
2237                 case V4L2_PIX_FMT_YUV420:
2238                 case V4L2_PIX_FMT_YVU420:
2239                 case V4L2_PIX_FMT_NV12:
2240                 default:
2241                         payload = width * height * 3 / 2;
2242                         break;
2243                 case V4L2_PIX_FMT_YUV422P:
2244                         payload = width * height * 2;
2245                         break;
2246                 }
2247                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, payload);
2248
2249                 if (ctx->frame_errors[ctx->display_idx] || err_vdoa)
2250                         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR);
2251                 else
2252                         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE);
2253
2254                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2255                         "job finished: decoding frame (%d) (%s)\n",
2256                         dst_buf->sequence,
2257                         (dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
2258                         "KEYFRAME" : "PFRAME");
2259         } else {
2260                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2261                         "job finished: no frame decoded\n");
2262         }
2263
2264         /* The rotator will copy the current display frame next time */
2265         ctx->display_idx = display_idx;
2266 }
2267
2268 static void coda_decode_timeout(struct coda_ctx *ctx)
2269 {
2270         struct vb2_v4l2_buffer *dst_buf;
2271
2272         /*
2273          * For now this only handles the case where we would deadlock with
2274          * userspace, i.e. userspace issued DEC_CMD_STOP and waits for EOS,
2275          * but after a failed decode run we would hold the context and wait for
2276          * userspace to queue more buffers.
2277          */
2278         if (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))
2279                 return;
2280
2281         dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2282         dst_buf->sequence = ctx->qsequence - 1;
2283
2284         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR);
2285 }
2286
2287 const struct coda_context_ops coda_bit_decode_ops = {
2288         .queue_init = coda_decoder_queue_init,
2289         .reqbufs = coda_decoder_reqbufs,
2290         .start_streaming = coda_start_decoding,
2291         .prepare_run = coda_prepare_decode,
2292         .finish_run = coda_finish_decode,
2293         .run_timeout = coda_decode_timeout,
2294         .seq_end_work = coda_seq_end_work,
2295         .release = coda_bit_release,
2296 };
2297
2298 irqreturn_t coda_irq_handler(int irq, void *data)
2299 {
2300         struct coda_dev *dev = data;
2301         struct coda_ctx *ctx;
2302
2303         /* read status register to attend the IRQ */
2304         coda_read(dev, CODA_REG_BIT_INT_STATUS);
2305         coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET,
2306                       CODA_REG_BIT_INT_CLEAR);
2307
2308         ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
2309         if (ctx == NULL) {
2310                 v4l2_err(&dev->v4l2_dev,
2311                          "Instance released before the end of transaction\n");
2312                 return IRQ_HANDLED;
2313         }
2314
2315         trace_coda_bit_done(ctx);
2316
2317         if (ctx->aborting) {
2318                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
2319                          "task has been aborted\n");
2320         }
2321
2322         if (coda_isbusy(ctx->dev)) {
2323                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
2324                          "coda is still busy!!!!\n");
2325                 return IRQ_NONE;
2326         }
2327
2328         complete(&ctx->completion);
2329
2330         return IRQ_HANDLED;
2331 }