GNU Linux-libre 4.14.313-gnu1
[releases.git] / drivers / media / platform / coda / coda-bit.c
1 /*
2  * Coda multi-standard codec IP - BIT processor functions
3  *
4  * Copyright (C) 2012 Vista Silicon S.L.
5  *    Javier Martin, <javier.martin@vista-silicon.com>
6  *    Xavier Duret
7  * Copyright (C) 2012-2014 Philipp Zabel, Pengutronix
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  */
14
15 #include <linux/clk.h>
16 #include <linux/irqreturn.h>
17 #include <linux/kernel.h>
18 #include <linux/log2.h>
19 #include <linux/platform_device.h>
20 #include <linux/reset.h>
21 #include <linux/slab.h>
22 #include <linux/videodev2.h>
23
24 #include <media/v4l2-common.h>
25 #include <media/v4l2-ctrls.h>
26 #include <media/v4l2-fh.h>
27 #include <media/v4l2-mem2mem.h>
28 #include <media/videobuf2-v4l2.h>
29 #include <media/videobuf2-dma-contig.h>
30 #include <media/videobuf2-vmalloc.h>
31
32 #include "coda.h"
33 #include "imx-vdoa.h"
34 #define CREATE_TRACE_POINTS
35 #include "trace.h"
36
37 #define CODA_PARA_BUF_SIZE      (10 * 1024)
38 #define CODA7_PS_BUF_SIZE       0x28000
39 #define CODA9_PS_SAVE_SIZE      (512 * 1024)
40
41 #define CODA_DEFAULT_GAMMA      4096
42 #define CODA9_DEFAULT_GAMMA     24576   /* 0.75 * 32768 */
43
44 static void coda_free_bitstream_buffer(struct coda_ctx *ctx);
45
46 static inline int coda_is_initialized(struct coda_dev *dev)
47 {
48         return coda_read(dev, CODA_REG_BIT_CUR_PC) != 0;
49 }
50
51 static inline unsigned long coda_isbusy(struct coda_dev *dev)
52 {
53         return coda_read(dev, CODA_REG_BIT_BUSY);
54 }
55
56 static int coda_wait_timeout(struct coda_dev *dev)
57 {
58         unsigned long timeout = jiffies + msecs_to_jiffies(1000);
59
60         while (coda_isbusy(dev)) {
61                 if (time_after(jiffies, timeout))
62                         return -ETIMEDOUT;
63         }
64         return 0;
65 }
66
67 static void coda_command_async(struct coda_ctx *ctx, int cmd)
68 {
69         struct coda_dev *dev = ctx->dev;
70
71         if (dev->devtype->product == CODA_960 ||
72             dev->devtype->product == CODA_7541) {
73                 /* Restore context related registers to CODA */
74                 coda_write(dev, ctx->bit_stream_param,
75                                 CODA_REG_BIT_BIT_STREAM_PARAM);
76                 coda_write(dev, ctx->frm_dis_flg,
77                                 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
78                 coda_write(dev, ctx->frame_mem_ctrl,
79                                 CODA_REG_BIT_FRAME_MEM_CTRL);
80                 coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR);
81         }
82
83         if (dev->devtype->product == CODA_960) {
84                 coda_write(dev, 1, CODA9_GDI_WPROT_ERR_CLR);
85                 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
86         }
87
88         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
89
90         coda_write(dev, ctx->idx, CODA_REG_BIT_RUN_INDEX);
91         coda_write(dev, ctx->params.codec_mode, CODA_REG_BIT_RUN_COD_STD);
92         coda_write(dev, ctx->params.codec_mode_aux, CODA7_REG_BIT_RUN_AUX_STD);
93
94         trace_coda_bit_run(ctx, cmd);
95
96         coda_write(dev, cmd, CODA_REG_BIT_RUN_COMMAND);
97 }
98
99 static int coda_command_sync(struct coda_ctx *ctx, int cmd)
100 {
101         struct coda_dev *dev = ctx->dev;
102         int ret;
103
104         coda_command_async(ctx, cmd);
105         ret = coda_wait_timeout(dev);
106         trace_coda_bit_done(ctx);
107
108         return ret;
109 }
110
111 int coda_hw_reset(struct coda_ctx *ctx)
112 {
113         struct coda_dev *dev = ctx->dev;
114         unsigned long timeout;
115         unsigned int idx;
116         int ret;
117
118         if (!dev->rstc)
119                 return -ENOENT;
120
121         idx = coda_read(dev, CODA_REG_BIT_RUN_INDEX);
122
123         if (dev->devtype->product == CODA_960) {
124                 timeout = jiffies + msecs_to_jiffies(100);
125                 coda_write(dev, 0x11, CODA9_GDI_BUS_CTRL);
126                 while (coda_read(dev, CODA9_GDI_BUS_STATUS) != 0x77) {
127                         if (time_after(jiffies, timeout))
128                                 return -ETIME;
129                         cpu_relax();
130                 }
131         }
132
133         ret = reset_control_reset(dev->rstc);
134         if (ret < 0)
135                 return ret;
136
137         if (dev->devtype->product == CODA_960)
138                 coda_write(dev, 0x00, CODA9_GDI_BUS_CTRL);
139         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
140         coda_write(dev, CODA_REG_RUN_ENABLE, CODA_REG_BIT_CODE_RUN);
141         ret = coda_wait_timeout(dev);
142         coda_write(dev, idx, CODA_REG_BIT_RUN_INDEX);
143
144         return ret;
145 }
146
147 static void coda_kfifo_sync_from_device(struct coda_ctx *ctx)
148 {
149         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
150         struct coda_dev *dev = ctx->dev;
151         u32 rd_ptr;
152
153         rd_ptr = coda_read(dev, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
154         kfifo->out = (kfifo->in & ~kfifo->mask) |
155                       (rd_ptr - ctx->bitstream.paddr);
156         if (kfifo->out > kfifo->in)
157                 kfifo->out -= kfifo->mask + 1;
158 }
159
160 static void coda_kfifo_sync_to_device_full(struct coda_ctx *ctx)
161 {
162         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
163         struct coda_dev *dev = ctx->dev;
164         u32 rd_ptr, wr_ptr;
165
166         rd_ptr = ctx->bitstream.paddr + (kfifo->out & kfifo->mask);
167         coda_write(dev, rd_ptr, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
168         wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
169         coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
170 }
171
172 static void coda_kfifo_sync_to_device_write(struct coda_ctx *ctx)
173 {
174         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
175         struct coda_dev *dev = ctx->dev;
176         u32 wr_ptr;
177
178         wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
179         coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
180 }
181
182 static int coda_bitstream_pad(struct coda_ctx *ctx, u32 size)
183 {
184         unsigned char *buf;
185         u32 n;
186
187         if (size < 6)
188                 size = 6;
189
190         buf = kmalloc(size, GFP_KERNEL);
191         if (!buf)
192                 return -ENOMEM;
193
194         coda_h264_filler_nal(size, buf);
195         n = kfifo_in(&ctx->bitstream_fifo, buf, size);
196         kfree(buf);
197
198         return (n < size) ? -ENOSPC : 0;
199 }
200
201 static int coda_bitstream_queue(struct coda_ctx *ctx,
202                                 struct vb2_v4l2_buffer *src_buf)
203 {
204         u32 src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
205         u32 n;
206
207         n = kfifo_in(&ctx->bitstream_fifo,
208                         vb2_plane_vaddr(&src_buf->vb2_buf, 0), src_size);
209         if (n < src_size)
210                 return -ENOSPC;
211
212         src_buf->sequence = ctx->qsequence++;
213
214         return 0;
215 }
216
217 static bool coda_bitstream_try_queue(struct coda_ctx *ctx,
218                                      struct vb2_v4l2_buffer *src_buf)
219 {
220         unsigned long payload = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
221         int ret;
222
223         if (coda_get_bitstream_payload(ctx) + payload + 512 >=
224             ctx->bitstream.size)
225                 return false;
226
227         if (vb2_plane_vaddr(&src_buf->vb2_buf, 0) == NULL) {
228                 v4l2_err(&ctx->dev->v4l2_dev, "trying to queue empty buffer\n");
229                 return true;
230         }
231
232         /* Add zero padding before the first H.264 buffer, if it is too small */
233         if (ctx->qsequence == 0 && payload < 512 &&
234             ctx->codec->src_fourcc == V4L2_PIX_FMT_H264)
235                 coda_bitstream_pad(ctx, 512 - payload);
236
237         ret = coda_bitstream_queue(ctx, src_buf);
238         if (ret < 0) {
239                 v4l2_err(&ctx->dev->v4l2_dev, "bitstream buffer overflow\n");
240                 return false;
241         }
242         /* Sync read pointer to device */
243         if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev))
244                 coda_kfifo_sync_to_device_write(ctx);
245
246         ctx->hold = false;
247
248         return true;
249 }
250
251 void coda_fill_bitstream(struct coda_ctx *ctx, struct list_head *buffer_list)
252 {
253         struct vb2_v4l2_buffer *src_buf;
254         struct coda_buffer_meta *meta;
255         unsigned long flags;
256         u32 start;
257
258         if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG)
259                 return;
260
261         while (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) > 0) {
262                 /*
263                  * Only queue a single JPEG into the bitstream buffer, except
264                  * to increase payload over 512 bytes or if in hold state.
265                  */
266                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
267                     (coda_get_bitstream_payload(ctx) >= 512) && !ctx->hold)
268                         break;
269
270                 src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
271
272                 /* Drop frames that do not start/end with a SOI/EOI markers */
273                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
274                     !coda_jpeg_check_buffer(ctx, &src_buf->vb2_buf)) {
275                         v4l2_err(&ctx->dev->v4l2_dev,
276                                  "dropping invalid JPEG frame %d\n",
277                                  ctx->qsequence);
278                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
279                         if (buffer_list) {
280                                 struct v4l2_m2m_buffer *m2m_buf;
281
282                                 m2m_buf = container_of(src_buf,
283                                                        struct v4l2_m2m_buffer,
284                                                        vb);
285                                 list_add_tail(&m2m_buf->list, buffer_list);
286                         } else {
287                                 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
288                         }
289                         continue;
290                 }
291
292                 /* Dump empty buffers */
293                 if (!vb2_get_plane_payload(&src_buf->vb2_buf, 0)) {
294                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
295                         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
296                         continue;
297                 }
298
299                 /* Buffer start position */
300                 start = ctx->bitstream_fifo.kfifo.in &
301                         ctx->bitstream_fifo.kfifo.mask;
302
303                 if (coda_bitstream_try_queue(ctx, src_buf)) {
304                         /*
305                          * Source buffer is queued in the bitstream ringbuffer;
306                          * queue the timestamp and mark source buffer as done
307                          */
308                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
309
310                         meta = kmalloc(sizeof(*meta), GFP_KERNEL);
311                         if (meta) {
312                                 meta->sequence = src_buf->sequence;
313                                 meta->timecode = src_buf->timecode;
314                                 meta->timestamp = src_buf->vb2_buf.timestamp;
315                                 meta->start = start;
316                                 meta->end = ctx->bitstream_fifo.kfifo.in &
317                                             ctx->bitstream_fifo.kfifo.mask;
318                                 spin_lock_irqsave(&ctx->buffer_meta_lock,
319                                                   flags);
320                                 list_add_tail(&meta->list,
321                                               &ctx->buffer_meta_list);
322                                 ctx->num_metas++;
323                                 spin_unlock_irqrestore(&ctx->buffer_meta_lock,
324                                                        flags);
325
326                                 trace_coda_bit_queue(ctx, src_buf, meta);
327                         }
328
329                         if (buffer_list) {
330                                 struct v4l2_m2m_buffer *m2m_buf;
331
332                                 m2m_buf = container_of(src_buf,
333                                                        struct v4l2_m2m_buffer,
334                                                        vb);
335                                 list_add_tail(&m2m_buf->list, buffer_list);
336                         } else {
337                                 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
338                         }
339                 } else {
340                         break;
341                 }
342         }
343 }
344
345 void coda_bit_stream_end_flag(struct coda_ctx *ctx)
346 {
347         struct coda_dev *dev = ctx->dev;
348
349         ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
350
351         /* If this context is currently running, update the hardware flag */
352         if ((dev->devtype->product == CODA_960) &&
353             coda_isbusy(dev) &&
354             (ctx->idx == coda_read(dev, CODA_REG_BIT_RUN_INDEX))) {
355                 coda_write(dev, ctx->bit_stream_param,
356                            CODA_REG_BIT_BIT_STREAM_PARAM);
357         }
358 }
359
360 static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value)
361 {
362         struct coda_dev *dev = ctx->dev;
363         u32 *p = ctx->parabuf.vaddr;
364
365         if (dev->devtype->product == CODA_DX6)
366                 p[index] = value;
367         else
368                 p[index ^ 1] = value;
369 }
370
371 static inline int coda_alloc_context_buf(struct coda_ctx *ctx,
372                                          struct coda_aux_buf *buf, size_t size,
373                                          const char *name)
374 {
375         return coda_alloc_aux_buf(ctx->dev, buf, size, name, ctx->debugfs_entry);
376 }
377
378
379 static void coda_free_framebuffers(struct coda_ctx *ctx)
380 {
381         int i;
382
383         for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++)
384                 coda_free_aux_buf(ctx->dev, &ctx->internal_frames[i]);
385 }
386
387 static int coda_alloc_framebuffers(struct coda_ctx *ctx,
388                                    struct coda_q_data *q_data, u32 fourcc)
389 {
390         struct coda_dev *dev = ctx->dev;
391         int width, height;
392         int ysize;
393         int ret;
394         int i;
395
396         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 ||
397             ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 ||
398             ctx->codec->dst_fourcc == V4L2_PIX_FMT_MPEG4) {
399                 width = round_up(q_data->width, 16);
400                 height = round_up(q_data->height, 16);
401         } else {
402                 width = round_up(q_data->width, 8);
403                 height = q_data->height;
404         }
405         ysize = width * height;
406
407         /* Allocate frame buffers */
408         for (i = 0; i < ctx->num_internal_frames; i++) {
409                 size_t size;
410                 char *name;
411
412                 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
413                         size = round_up(ysize, 4096) + ysize / 2;
414                 else
415                         size = ysize + ysize / 2;
416                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 &&
417                     dev->devtype->product != CODA_DX6)
418                         size += ysize / 4;
419                 name = kasprintf(GFP_KERNEL, "fb%d", i);
420                 ret = coda_alloc_context_buf(ctx, &ctx->internal_frames[i],
421                                              size, name);
422                 kfree(name);
423                 if (ret < 0) {
424                         coda_free_framebuffers(ctx);
425                         return ret;
426                 }
427         }
428
429         /* Register frame buffers in the parameter buffer */
430         for (i = 0; i < ctx->num_internal_frames; i++) {
431                 u32 y, cb, cr, mvcol;
432
433                 /* Start addresses of Y, Cb, Cr planes */
434                 y = ctx->internal_frames[i].paddr;
435                 cb = y + ysize;
436                 cr = y + ysize + ysize/4;
437                 mvcol = y + ysize + ysize/4 + ysize/4;
438                 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) {
439                         cb = round_up(cb, 4096);
440                         mvcol = cb + ysize/2;
441                         cr = 0;
442                         /* Packed 20-bit MSB of base addresses */
443                         /* YYYYYCCC, CCyyyyyc, cccc.... */
444                         y = (y & 0xfffff000) | cb >> 20;
445                         cb = (cb & 0x000ff000) << 12;
446                 }
447                 coda_parabuf_write(ctx, i * 3 + 0, y);
448                 coda_parabuf_write(ctx, i * 3 + 1, cb);
449                 coda_parabuf_write(ctx, i * 3 + 2, cr);
450
451                 /* mvcol buffer for h.264 */
452                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 &&
453                     dev->devtype->product != CODA_DX6)
454                         coda_parabuf_write(ctx, 96 + i, mvcol);
455         }
456
457         /* mvcol buffer for mpeg4 */
458         if ((dev->devtype->product != CODA_DX6) &&
459             (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4))
460                 coda_parabuf_write(ctx, 97, ctx->internal_frames[0].paddr +
461                                             ysize + ysize/4 + ysize/4);
462
463         return 0;
464 }
465
466 static void coda_free_context_buffers(struct coda_ctx *ctx)
467 {
468         struct coda_dev *dev = ctx->dev;
469
470         coda_free_aux_buf(dev, &ctx->slicebuf);
471         coda_free_aux_buf(dev, &ctx->psbuf);
472         if (dev->devtype->product != CODA_DX6)
473                 coda_free_aux_buf(dev, &ctx->workbuf);
474         coda_free_aux_buf(dev, &ctx->parabuf);
475 }
476
477 static int coda_alloc_context_buffers(struct coda_ctx *ctx,
478                                       struct coda_q_data *q_data)
479 {
480         struct coda_dev *dev = ctx->dev;
481         size_t size;
482         int ret;
483
484         if (!ctx->parabuf.vaddr) {
485                 ret = coda_alloc_context_buf(ctx, &ctx->parabuf,
486                                              CODA_PARA_BUF_SIZE, "parabuf");
487                 if (ret < 0)
488                         return ret;
489         }
490
491         if (dev->devtype->product == CODA_DX6)
492                 return 0;
493
494         if (!ctx->slicebuf.vaddr && q_data->fourcc == V4L2_PIX_FMT_H264) {
495                 /* worst case slice size */
496                 size = (DIV_ROUND_UP(q_data->width, 16) *
497                         DIV_ROUND_UP(q_data->height, 16)) * 3200 / 8 + 512;
498                 ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size,
499                                              "slicebuf");
500                 if (ret < 0)
501                         goto err;
502         }
503
504         if (!ctx->psbuf.vaddr && dev->devtype->product == CODA_7541) {
505                 ret = coda_alloc_context_buf(ctx, &ctx->psbuf,
506                                              CODA7_PS_BUF_SIZE, "psbuf");
507                 if (ret < 0)
508                         goto err;
509         }
510
511         if (!ctx->workbuf.vaddr) {
512                 size = dev->devtype->workbuf_size;
513                 if (dev->devtype->product == CODA_960 &&
514                     q_data->fourcc == V4L2_PIX_FMT_H264)
515                         size += CODA9_PS_SAVE_SIZE;
516                 ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size,
517                                              "workbuf");
518                 if (ret < 0)
519                         goto err;
520         }
521
522         return 0;
523
524 err:
525         coda_free_context_buffers(ctx);
526         return ret;
527 }
528
529 static int coda_encode_header(struct coda_ctx *ctx, struct vb2_v4l2_buffer *buf,
530                               int header_code, u8 *header, int *size)
531 {
532         struct vb2_buffer *vb = &buf->vb2_buf;
533         struct coda_dev *dev = ctx->dev;
534         size_t bufsize;
535         int ret;
536         int i;
537
538         if (dev->devtype->product == CODA_960)
539                 memset(vb2_plane_vaddr(vb, 0), 0, 64);
540
541         coda_write(dev, vb2_dma_contig_plane_dma_addr(vb, 0),
542                    CODA_CMD_ENC_HEADER_BB_START);
543         bufsize = vb2_plane_size(vb, 0);
544         if (dev->devtype->product == CODA_960)
545                 bufsize /= 1024;
546         coda_write(dev, bufsize, CODA_CMD_ENC_HEADER_BB_SIZE);
547         coda_write(dev, header_code, CODA_CMD_ENC_HEADER_CODE);
548         ret = coda_command_sync(ctx, CODA_COMMAND_ENCODE_HEADER);
549         if (ret < 0) {
550                 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_ENCODE_HEADER timeout\n");
551                 return ret;
552         }
553
554         if (dev->devtype->product == CODA_960) {
555                 for (i = 63; i > 0; i--)
556                         if (((char *)vb2_plane_vaddr(vb, 0))[i] != 0)
557                                 break;
558                 *size = i + 1;
559         } else {
560                 *size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)) -
561                         coda_read(dev, CODA_CMD_ENC_HEADER_BB_START);
562         }
563         memcpy(header, vb2_plane_vaddr(vb, 0), *size);
564
565         return 0;
566 }
567
568 static phys_addr_t coda_iram_alloc(struct coda_iram_info *iram, size_t size)
569 {
570         phys_addr_t ret;
571
572         size = round_up(size, 1024);
573         if (size > iram->remaining)
574                 return 0;
575         iram->remaining -= size;
576
577         ret = iram->next_paddr;
578         iram->next_paddr += size;
579
580         return ret;
581 }
582
583 static void coda_setup_iram(struct coda_ctx *ctx)
584 {
585         struct coda_iram_info *iram_info = &ctx->iram_info;
586         struct coda_dev *dev = ctx->dev;
587         int w64, w128;
588         int mb_width;
589         int dbk_bits;
590         int bit_bits;
591         int ip_bits;
592
593         memset(iram_info, 0, sizeof(*iram_info));
594         iram_info->next_paddr = dev->iram.paddr;
595         iram_info->remaining = dev->iram.size;
596
597         if (!dev->iram.vaddr)
598                 return;
599
600         switch (dev->devtype->product) {
601         case CODA_7541:
602                 dbk_bits = CODA7_USE_HOST_DBK_ENABLE | CODA7_USE_DBK_ENABLE;
603                 bit_bits = CODA7_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
604                 ip_bits = CODA7_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
605                 break;
606         case CODA_960:
607                 dbk_bits = CODA9_USE_HOST_DBK_ENABLE | CODA9_USE_DBK_ENABLE;
608                 bit_bits = CODA9_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
609                 ip_bits = CODA9_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
610                 break;
611         default: /* CODA_DX6 */
612                 return;
613         }
614
615         if (ctx->inst_type == CODA_INST_ENCODER) {
616                 struct coda_q_data *q_data_src;
617
618                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
619                 mb_width = DIV_ROUND_UP(q_data_src->width, 16);
620                 w128 = mb_width * 128;
621                 w64 = mb_width * 64;
622
623                 /* Prioritize in case IRAM is too small for everything */
624                 if (dev->devtype->product == CODA_7541) {
625                         iram_info->search_ram_size = round_up(mb_width * 16 *
626                                                               36 + 2048, 1024);
627                         iram_info->search_ram_paddr = coda_iram_alloc(iram_info,
628                                                 iram_info->search_ram_size);
629                         if (!iram_info->search_ram_paddr) {
630                                 pr_err("IRAM is smaller than the search ram size\n");
631                                 goto out;
632                         }
633                         iram_info->axi_sram_use |= CODA7_USE_HOST_ME_ENABLE |
634                                                    CODA7_USE_ME_ENABLE;
635                 }
636
637                 /* Only H.264BP and H.263P3 are considered */
638                 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w64);
639                 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w64);
640                 if (!iram_info->buf_dbk_y_use || !iram_info->buf_dbk_c_use)
641                         goto out;
642                 iram_info->axi_sram_use |= dbk_bits;
643
644                 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
645                 if (!iram_info->buf_bit_use)
646                         goto out;
647                 iram_info->axi_sram_use |= bit_bits;
648
649                 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
650                 if (!iram_info->buf_ip_ac_dc_use)
651                         goto out;
652                 iram_info->axi_sram_use |= ip_bits;
653
654                 /* OVL and BTP disabled for encoder */
655         } else if (ctx->inst_type == CODA_INST_DECODER) {
656                 struct coda_q_data *q_data_dst;
657
658                 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
659                 mb_width = DIV_ROUND_UP(q_data_dst->width, 16);
660                 w128 = mb_width * 128;
661
662                 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w128);
663                 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w128);
664                 if (!iram_info->buf_dbk_y_use || !iram_info->buf_dbk_c_use)
665                         goto out;
666                 iram_info->axi_sram_use |= dbk_bits;
667
668                 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
669                 if (!iram_info->buf_bit_use)
670                         goto out;
671                 iram_info->axi_sram_use |= bit_bits;
672
673                 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
674                 if (!iram_info->buf_ip_ac_dc_use)
675                         goto out;
676                 iram_info->axi_sram_use |= ip_bits;
677
678                 /* OVL and BTP unused as there is no VC1 support yet */
679         }
680
681 out:
682         if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE))
683                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
684                          "IRAM smaller than needed\n");
685
686         if (dev->devtype->product == CODA_7541) {
687                 /* TODO - Enabling these causes picture errors on CODA7541 */
688                 if (ctx->inst_type == CODA_INST_DECODER) {
689                         /* fw 1.4.50 */
690                         iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
691                                                      CODA7_USE_IP_ENABLE);
692                 } else {
693                         /* fw 13.4.29 */
694                         iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
695                                                      CODA7_USE_HOST_DBK_ENABLE |
696                                                      CODA7_USE_IP_ENABLE |
697                                                      CODA7_USE_DBK_ENABLE);
698                 }
699         }
700 }
701
702 static u32 coda_supported_firmwares[] = {
703         CODA_FIRMWARE_VERNUM(CODA_DX6, 2, 2, 5),
704         CODA_FIRMWARE_VERNUM(CODA_7541, 1, 4, 50),
705         CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 5),
706         CODA_FIRMWARE_VERNUM(CODA_960, 2, 3, 10),
707         CODA_FIRMWARE_VERNUM(CODA_960, 3, 1, 1),
708 };
709
710 static bool coda_firmware_supported(u32 vernum)
711 {
712         int i;
713
714         for (i = 0; i < ARRAY_SIZE(coda_supported_firmwares); i++)
715                 if (vernum == coda_supported_firmwares[i])
716                         return true;
717         return false;
718 }
719
720 int coda_check_firmware(struct coda_dev *dev)
721 {
722         u16 product, major, minor, release;
723         u32 data;
724         int ret;
725
726         ret = clk_prepare_enable(dev->clk_per);
727         if (ret)
728                 goto err_clk_per;
729
730         ret = clk_prepare_enable(dev->clk_ahb);
731         if (ret)
732                 goto err_clk_ahb;
733
734         coda_write(dev, 0, CODA_CMD_FIRMWARE_VERNUM);
735         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
736         coda_write(dev, 0, CODA_REG_BIT_RUN_INDEX);
737         coda_write(dev, 0, CODA_REG_BIT_RUN_COD_STD);
738         coda_write(dev, CODA_COMMAND_FIRMWARE_GET, CODA_REG_BIT_RUN_COMMAND);
739         if (coda_wait_timeout(dev)) {
740                 v4l2_err(&dev->v4l2_dev, "firmware get command error\n");
741                 ret = -EIO;
742                 goto err_run_cmd;
743         }
744
745         if (dev->devtype->product == CODA_960) {
746                 data = coda_read(dev, CODA9_CMD_FIRMWARE_CODE_REV);
747                 v4l2_info(&dev->v4l2_dev, "Firmware code revision: %d\n",
748                           data);
749         }
750
751         /* Check we are compatible with the loaded firmware */
752         data = coda_read(dev, CODA_CMD_FIRMWARE_VERNUM);
753         product = CODA_FIRMWARE_PRODUCT(data);
754         major = CODA_FIRMWARE_MAJOR(data);
755         minor = CODA_FIRMWARE_MINOR(data);
756         release = CODA_FIRMWARE_RELEASE(data);
757
758         clk_disable_unprepare(dev->clk_per);
759         clk_disable_unprepare(dev->clk_ahb);
760
761         if (product != dev->devtype->product) {
762                 v4l2_err(&dev->v4l2_dev,
763                          "Wrong firmware. Hw: %s, Fw: %s, Version: %u.%u.%u\n",
764                          coda_product_name(dev->devtype->product),
765                          coda_product_name(product), major, minor, release);
766                 return -EINVAL;
767         }
768
769         v4l2_info(&dev->v4l2_dev, "Initialized %s.\n",
770                   coda_product_name(product));
771
772         if (coda_firmware_supported(data)) {
773                 v4l2_info(&dev->v4l2_dev, "Firmware version: %u.%u.%u\n",
774                           major, minor, release);
775         } else {
776                 v4l2_warn(&dev->v4l2_dev,
777                           "Unsupported firmware version: %u.%u.%u\n",
778                           major, minor, release);
779         }
780
781         return 0;
782
783 err_run_cmd:
784         clk_disable_unprepare(dev->clk_ahb);
785 err_clk_ahb:
786         clk_disable_unprepare(dev->clk_per);
787 err_clk_per:
788         return ret;
789 }
790
791 static void coda9_set_frame_cache(struct coda_ctx *ctx, u32 fourcc)
792 {
793         u32 cache_size, cache_config;
794
795         if (ctx->tiled_map_type == GDI_LINEAR_FRAME_MAP) {
796                 /* Luma 2x0 page, 2x6 cache, chroma 2x0 page, 2x4 cache size */
797                 cache_size = 0x20262024;
798                 cache_config = 2 << CODA9_CACHE_PAGEMERGE_OFFSET;
799         } else {
800                 /* Luma 0x2 page, 4x4 cache, chroma 0x2 page, 4x3 cache size */
801                 cache_size = 0x02440243;
802                 cache_config = 1 << CODA9_CACHE_PAGEMERGE_OFFSET;
803         }
804         coda_write(ctx->dev, cache_size, CODA9_CMD_SET_FRAME_CACHE_SIZE);
805         if (fourcc == V4L2_PIX_FMT_NV12 || fourcc == V4L2_PIX_FMT_YUYV) {
806                 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
807                                 16 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET |
808                                 0 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET;
809         } else {
810                 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
811                                 8 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET |
812                                 8 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET;
813         }
814         coda_write(ctx->dev, cache_config, CODA9_CMD_SET_FRAME_CACHE_CONFIG);
815 }
816
817 /*
818  * Encoder context operations
819  */
820
821 static int coda_encoder_reqbufs(struct coda_ctx *ctx,
822                                 struct v4l2_requestbuffers *rb)
823 {
824         struct coda_q_data *q_data_src;
825         int ret;
826
827         if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
828                 return 0;
829
830         if (rb->count) {
831                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
832                 ret = coda_alloc_context_buffers(ctx, q_data_src);
833                 if (ret < 0)
834                         return ret;
835         } else {
836                 coda_free_context_buffers(ctx);
837         }
838
839         return 0;
840 }
841
842 static int coda_start_encoding(struct coda_ctx *ctx)
843 {
844         struct coda_dev *dev = ctx->dev;
845         struct v4l2_device *v4l2_dev = &dev->v4l2_dev;
846         struct coda_q_data *q_data_src, *q_data_dst;
847         u32 bitstream_buf, bitstream_size;
848         struct vb2_v4l2_buffer *buf;
849         int gamma, ret, value;
850         u32 dst_fourcc;
851         int num_fb;
852         u32 stride;
853
854         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
855         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
856         dst_fourcc = q_data_dst->fourcc;
857
858         buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
859         bitstream_buf = vb2_dma_contig_plane_dma_addr(&buf->vb2_buf, 0);
860         bitstream_size = q_data_dst->sizeimage;
861
862         if (!coda_is_initialized(dev)) {
863                 v4l2_err(v4l2_dev, "coda is not initialized.\n");
864                 return -EFAULT;
865         }
866
867         if (dst_fourcc == V4L2_PIX_FMT_JPEG) {
868                 if (!ctx->params.jpeg_qmat_tab[0]) {
869                         ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL);
870                         if (!ctx->params.jpeg_qmat_tab[0])
871                                 return -ENOMEM;
872                 }
873                 if (!ctx->params.jpeg_qmat_tab[1]) {
874                         ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL);
875                         if (!ctx->params.jpeg_qmat_tab[1])
876                                 return -ENOMEM;
877                 }
878                 coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality);
879         }
880
881         mutex_lock(&dev->coda_mutex);
882
883         coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
884         coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
885         coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
886         switch (dev->devtype->product) {
887         case CODA_DX6:
888                 coda_write(dev, CODADX6_STREAM_BUF_DYNALLOC_EN |
889                         CODADX6_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
890                 break;
891         case CODA_960:
892                 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
893                 /* fallthrough */
894         case CODA_7541:
895                 coda_write(dev, CODA7_STREAM_BUF_DYNALLOC_EN |
896                         CODA7_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
897                 break;
898         }
899
900         ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) |
901                                  CODA9_FRAME_TILED2LINEAR);
902         if (q_data_src->fourcc == V4L2_PIX_FMT_NV12)
903                 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
904         if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
905                 ctx->frame_mem_ctrl |= (0x3 << 9) | CODA9_FRAME_TILED2LINEAR;
906         coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
907
908         if (dev->devtype->product == CODA_DX6) {
909                 /* Configure the coda */
910                 coda_write(dev, dev->iram.paddr,
911                            CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR);
912         }
913
914         /* Could set rotation here if needed */
915         value = 0;
916         switch (dev->devtype->product) {
917         case CODA_DX6:
918                 value = (q_data_src->width & CODADX6_PICWIDTH_MASK)
919                         << CODADX6_PICWIDTH_OFFSET;
920                 value |= (q_data_src->height & CODADX6_PICHEIGHT_MASK)
921                          << CODA_PICHEIGHT_OFFSET;
922                 break;
923         case CODA_7541:
924                 if (dst_fourcc == V4L2_PIX_FMT_H264) {
925                         value = (round_up(q_data_src->width, 16) &
926                                  CODA7_PICWIDTH_MASK) << CODA7_PICWIDTH_OFFSET;
927                         value |= (round_up(q_data_src->height, 16) &
928                                  CODA7_PICHEIGHT_MASK) << CODA_PICHEIGHT_OFFSET;
929                         break;
930                 }
931                 /* fallthrough */
932         case CODA_960:
933                 value = (q_data_src->width & CODA7_PICWIDTH_MASK)
934                         << CODA7_PICWIDTH_OFFSET;
935                 value |= (q_data_src->height & CODA7_PICHEIGHT_MASK)
936                          << CODA_PICHEIGHT_OFFSET;
937         }
938         coda_write(dev, value, CODA_CMD_ENC_SEQ_SRC_SIZE);
939         if (dst_fourcc == V4L2_PIX_FMT_JPEG)
940                 ctx->params.framerate = 0;
941         coda_write(dev, ctx->params.framerate,
942                    CODA_CMD_ENC_SEQ_SRC_F_RATE);
943
944         ctx->params.codec_mode = ctx->codec->mode;
945         switch (dst_fourcc) {
946         case V4L2_PIX_FMT_MPEG4:
947                 if (dev->devtype->product == CODA_960)
948                         coda_write(dev, CODA9_STD_MPEG4,
949                                    CODA_CMD_ENC_SEQ_COD_STD);
950                 else
951                         coda_write(dev, CODA_STD_MPEG4,
952                                    CODA_CMD_ENC_SEQ_COD_STD);
953                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_MP4_PARA);
954                 break;
955         case V4L2_PIX_FMT_H264:
956                 if (dev->devtype->product == CODA_960)
957                         coda_write(dev, CODA9_STD_H264,
958                                    CODA_CMD_ENC_SEQ_COD_STD);
959                 else
960                         coda_write(dev, CODA_STD_H264,
961                                    CODA_CMD_ENC_SEQ_COD_STD);
962                 value = ((ctx->params.h264_disable_deblocking_filter_idc &
963                           CODA_264PARAM_DISABLEDEBLK_MASK) <<
964                          CODA_264PARAM_DISABLEDEBLK_OFFSET) |
965                         ((ctx->params.h264_slice_alpha_c0_offset_div2 &
966                           CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) <<
967                          CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) |
968                         ((ctx->params.h264_slice_beta_offset_div2 &
969                           CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) <<
970                          CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET);
971                 coda_write(dev, value, CODA_CMD_ENC_SEQ_264_PARA);
972                 break;
973         case V4L2_PIX_FMT_JPEG:
974                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_PARA);
975                 coda_write(dev, ctx->params.jpeg_restart_interval,
976                                 CODA_CMD_ENC_SEQ_JPG_RST_INTERVAL);
977                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_EN);
978                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_SIZE);
979                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_OFFSET);
980
981                 coda_jpeg_write_tables(ctx);
982                 break;
983         default:
984                 v4l2_err(v4l2_dev,
985                          "dst format (0x%08x) invalid.\n", dst_fourcc);
986                 ret = -EINVAL;
987                 goto out;
988         }
989
990         /*
991          * slice mode and GOP size registers are used for thumb size/offset
992          * in JPEG mode
993          */
994         if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
995                 switch (ctx->params.slice_mode) {
996                 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE:
997                         value = 0;
998                         break;
999                 case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_MB:
1000                         value  = (ctx->params.slice_max_mb &
1001                                   CODA_SLICING_SIZE_MASK)
1002                                  << CODA_SLICING_SIZE_OFFSET;
1003                         value |= (1 & CODA_SLICING_UNIT_MASK)
1004                                  << CODA_SLICING_UNIT_OFFSET;
1005                         value |=  1 & CODA_SLICING_MODE_MASK;
1006                         break;
1007                 case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_BYTES:
1008                         value  = (ctx->params.slice_max_bits &
1009                                   CODA_SLICING_SIZE_MASK)
1010                                  << CODA_SLICING_SIZE_OFFSET;
1011                         value |= (0 & CODA_SLICING_UNIT_MASK)
1012                                  << CODA_SLICING_UNIT_OFFSET;
1013                         value |=  1 & CODA_SLICING_MODE_MASK;
1014                         break;
1015                 }
1016                 coda_write(dev, value, CODA_CMD_ENC_SEQ_SLICE_MODE);
1017                 value = ctx->params.gop_size;
1018                 coda_write(dev, value, CODA_CMD_ENC_SEQ_GOP_SIZE);
1019         }
1020
1021         if (ctx->params.bitrate) {
1022                 /* Rate control enabled */
1023                 value = (ctx->params.bitrate & CODA_RATECONTROL_BITRATE_MASK)
1024                         << CODA_RATECONTROL_BITRATE_OFFSET;
1025                 value |=  1 & CODA_RATECONTROL_ENABLE_MASK;
1026                 value |= (ctx->params.vbv_delay &
1027                           CODA_RATECONTROL_INITIALDELAY_MASK)
1028                          << CODA_RATECONTROL_INITIALDELAY_OFFSET;
1029                 if (dev->devtype->product == CODA_960)
1030                         value |= BIT(31); /* disable autoskip */
1031         } else {
1032                 value = 0;
1033         }
1034         coda_write(dev, value, CODA_CMD_ENC_SEQ_RC_PARA);
1035
1036         coda_write(dev, ctx->params.vbv_size, CODA_CMD_ENC_SEQ_RC_BUF_SIZE);
1037         coda_write(dev, ctx->params.intra_refresh,
1038                    CODA_CMD_ENC_SEQ_INTRA_REFRESH);
1039
1040         coda_write(dev, bitstream_buf, CODA_CMD_ENC_SEQ_BB_START);
1041         coda_write(dev, bitstream_size / 1024, CODA_CMD_ENC_SEQ_BB_SIZE);
1042
1043
1044         value = 0;
1045         if (dev->devtype->product == CODA_960)
1046                 gamma = CODA9_DEFAULT_GAMMA;
1047         else
1048                 gamma = CODA_DEFAULT_GAMMA;
1049         if (gamma > 0) {
1050                 coda_write(dev, (gamma & CODA_GAMMA_MASK) << CODA_GAMMA_OFFSET,
1051                            CODA_CMD_ENC_SEQ_RC_GAMMA);
1052         }
1053
1054         if (ctx->params.h264_min_qp || ctx->params.h264_max_qp) {
1055                 coda_write(dev,
1056                            ctx->params.h264_min_qp << CODA_QPMIN_OFFSET |
1057                            ctx->params.h264_max_qp << CODA_QPMAX_OFFSET,
1058                            CODA_CMD_ENC_SEQ_RC_QP_MIN_MAX);
1059         }
1060         if (dev->devtype->product == CODA_960) {
1061                 if (ctx->params.h264_max_qp)
1062                         value |= 1 << CODA9_OPTION_RCQPMAX_OFFSET;
1063                 if (CODA_DEFAULT_GAMMA > 0)
1064                         value |= 1 << CODA9_OPTION_GAMMA_OFFSET;
1065         } else {
1066                 if (CODA_DEFAULT_GAMMA > 0) {
1067                         if (dev->devtype->product == CODA_DX6)
1068                                 value |= 1 << CODADX6_OPTION_GAMMA_OFFSET;
1069                         else
1070                                 value |= 1 << CODA7_OPTION_GAMMA_OFFSET;
1071                 }
1072                 if (ctx->params.h264_min_qp)
1073                         value |= 1 << CODA7_OPTION_RCQPMIN_OFFSET;
1074                 if (ctx->params.h264_max_qp)
1075                         value |= 1 << CODA7_OPTION_RCQPMAX_OFFSET;
1076         }
1077         coda_write(dev, value, CODA_CMD_ENC_SEQ_OPTION);
1078
1079         coda_write(dev, 0, CODA_CMD_ENC_SEQ_RC_INTERVAL_MODE);
1080
1081         coda_setup_iram(ctx);
1082
1083         if (dst_fourcc == V4L2_PIX_FMT_H264) {
1084                 switch (dev->devtype->product) {
1085                 case CODA_DX6:
1086                         value = FMO_SLICE_SAVE_BUF_SIZE << 7;
1087                         coda_write(dev, value, CODADX6_CMD_ENC_SEQ_FMO);
1088                         break;
1089                 case CODA_7541:
1090                         coda_write(dev, ctx->iram_info.search_ram_paddr,
1091                                         CODA7_CMD_ENC_SEQ_SEARCH_BASE);
1092                         coda_write(dev, ctx->iram_info.search_ram_size,
1093                                         CODA7_CMD_ENC_SEQ_SEARCH_SIZE);
1094                         break;
1095                 case CODA_960:
1096                         coda_write(dev, 0, CODA9_CMD_ENC_SEQ_ME_OPTION);
1097                         coda_write(dev, 0, CODA9_CMD_ENC_SEQ_INTRA_WEIGHT);
1098                 }
1099         }
1100
1101         ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT);
1102         if (ret < 0) {
1103                 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1104                 goto out;
1105         }
1106
1107         if (coda_read(dev, CODA_RET_ENC_SEQ_SUCCESS) == 0) {
1108                 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT failed\n");
1109                 ret = -EFAULT;
1110                 goto out;
1111         }
1112         ctx->initialized = 1;
1113
1114         if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
1115                 if (dev->devtype->product == CODA_960)
1116                         ctx->num_internal_frames = 4;
1117                 else
1118                         ctx->num_internal_frames = 2;
1119                 ret = coda_alloc_framebuffers(ctx, q_data_src, dst_fourcc);
1120                 if (ret < 0) {
1121                         v4l2_err(v4l2_dev, "failed to allocate framebuffers\n");
1122                         goto out;
1123                 }
1124                 num_fb = 2;
1125                 stride = q_data_src->bytesperline;
1126         } else {
1127                 ctx->num_internal_frames = 0;
1128                 num_fb = 0;
1129                 stride = 0;
1130         }
1131         coda_write(dev, num_fb, CODA_CMD_SET_FRAME_BUF_NUM);
1132         coda_write(dev, stride, CODA_CMD_SET_FRAME_BUF_STRIDE);
1133
1134         if (dev->devtype->product == CODA_7541) {
1135                 coda_write(dev, q_data_src->bytesperline,
1136                                 CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE);
1137         }
1138         if (dev->devtype->product != CODA_DX6) {
1139                 coda_write(dev, ctx->iram_info.buf_bit_use,
1140                                 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1141                 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1142                                 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1143                 coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1144                                 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1145                 coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1146                                 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1147                 coda_write(dev, ctx->iram_info.buf_ovl_use,
1148                                 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1149                 if (dev->devtype->product == CODA_960) {
1150                         coda_write(dev, ctx->iram_info.buf_btp_use,
1151                                         CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1152
1153                         coda9_set_frame_cache(ctx, q_data_src->fourcc);
1154
1155                         /* FIXME */
1156                         coda_write(dev, ctx->internal_frames[2].paddr,
1157                                    CODA9_CMD_SET_FRAME_SUBSAMP_A);
1158                         coda_write(dev, ctx->internal_frames[3].paddr,
1159                                    CODA9_CMD_SET_FRAME_SUBSAMP_B);
1160                 }
1161         }
1162
1163         ret = coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF);
1164         if (ret < 0) {
1165                 v4l2_err(v4l2_dev, "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1166                 goto out;
1167         }
1168
1169         /* Save stream headers */
1170         buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1171         switch (dst_fourcc) {
1172         case V4L2_PIX_FMT_H264:
1173                 /*
1174                  * Get SPS in the first frame and copy it to an
1175                  * intermediate buffer.
1176                  */
1177                 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_SPS,
1178                                          &ctx->vpu_header[0][0],
1179                                          &ctx->vpu_header_size[0]);
1180                 if (ret < 0)
1181                         goto out;
1182
1183                 /*
1184                  * Get PPS in the first frame and copy it to an
1185                  * intermediate buffer.
1186                  */
1187                 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_PPS,
1188                                          &ctx->vpu_header[1][0],
1189                                          &ctx->vpu_header_size[1]);
1190                 if (ret < 0)
1191                         goto out;
1192
1193                 /*
1194                  * Length of H.264 headers is variable and thus it might not be
1195                  * aligned for the coda to append the encoded frame. In that is
1196                  * the case a filler NAL must be added to header 2.
1197                  */
1198                 ctx->vpu_header_size[2] = coda_h264_padding(
1199                                         (ctx->vpu_header_size[0] +
1200                                          ctx->vpu_header_size[1]),
1201                                          ctx->vpu_header[2]);
1202                 break;
1203         case V4L2_PIX_FMT_MPEG4:
1204                 /*
1205                  * Get VOS in the first frame and copy it to an
1206                  * intermediate buffer
1207                  */
1208                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOS,
1209                                          &ctx->vpu_header[0][0],
1210                                          &ctx->vpu_header_size[0]);
1211                 if (ret < 0)
1212                         goto out;
1213
1214                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VIS,
1215                                          &ctx->vpu_header[1][0],
1216                                          &ctx->vpu_header_size[1]);
1217                 if (ret < 0)
1218                         goto out;
1219
1220                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOL,
1221                                          &ctx->vpu_header[2][0],
1222                                          &ctx->vpu_header_size[2]);
1223                 if (ret < 0)
1224                         goto out;
1225                 break;
1226         default:
1227                 /* No more formats need to save headers at the moment */
1228                 break;
1229         }
1230
1231 out:
1232         mutex_unlock(&dev->coda_mutex);
1233         return ret;
1234 }
1235
1236 static int coda_prepare_encode(struct coda_ctx *ctx)
1237 {
1238         struct coda_q_data *q_data_src, *q_data_dst;
1239         struct vb2_v4l2_buffer *src_buf, *dst_buf;
1240         struct coda_dev *dev = ctx->dev;
1241         int force_ipicture;
1242         int quant_param = 0;
1243         u32 pic_stream_buffer_addr, pic_stream_buffer_size;
1244         u32 rot_mode = 0;
1245         u32 dst_fourcc;
1246         u32 reg;
1247
1248         src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
1249         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1250         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1251         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1252         dst_fourcc = q_data_dst->fourcc;
1253
1254         src_buf->sequence = ctx->osequence;
1255         dst_buf->sequence = ctx->osequence;
1256         ctx->osequence++;
1257
1258         force_ipicture = ctx->params.force_ipicture;
1259         if (force_ipicture)
1260                 ctx->params.force_ipicture = false;
1261         else if (ctx->params.gop_size != 0 &&
1262                  (src_buf->sequence % ctx->params.gop_size) == 0)
1263                 force_ipicture = 1;
1264
1265         /*
1266          * Workaround coda firmware BUG that only marks the first
1267          * frame as IDR. This is a problem for some decoders that can't
1268          * recover when a frame is lost.
1269          */
1270         if (!force_ipicture) {
1271                 src_buf->flags |= V4L2_BUF_FLAG_PFRAME;
1272                 src_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1273         } else {
1274                 src_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1275                 src_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1276         }
1277
1278         if (dev->devtype->product == CODA_960)
1279                 coda_set_gdi_regs(ctx);
1280
1281         /*
1282          * Copy headers in front of the first frame and forced I frames for
1283          * H.264 only. In MPEG4 they are already copied by the CODA.
1284          */
1285         if (src_buf->sequence == 0 || force_ipicture) {
1286                 pic_stream_buffer_addr =
1287                         vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0) +
1288                         ctx->vpu_header_size[0] +
1289                         ctx->vpu_header_size[1] +
1290                         ctx->vpu_header_size[2];
1291                 pic_stream_buffer_size = q_data_dst->sizeimage -
1292                         ctx->vpu_header_size[0] -
1293                         ctx->vpu_header_size[1] -
1294                         ctx->vpu_header_size[2];
1295                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0),
1296                        &ctx->vpu_header[0][0], ctx->vpu_header_size[0]);
1297                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0)
1298                         + ctx->vpu_header_size[0], &ctx->vpu_header[1][0],
1299                         ctx->vpu_header_size[1]);
1300                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0)
1301                         + ctx->vpu_header_size[0] + ctx->vpu_header_size[1],
1302                         &ctx->vpu_header[2][0], ctx->vpu_header_size[2]);
1303         } else {
1304                 pic_stream_buffer_addr =
1305                         vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
1306                 pic_stream_buffer_size = q_data_dst->sizeimage;
1307         }
1308
1309         if (force_ipicture) {
1310                 switch (dst_fourcc) {
1311                 case V4L2_PIX_FMT_H264:
1312                         quant_param = ctx->params.h264_intra_qp;
1313                         break;
1314                 case V4L2_PIX_FMT_MPEG4:
1315                         quant_param = ctx->params.mpeg4_intra_qp;
1316                         break;
1317                 case V4L2_PIX_FMT_JPEG:
1318                         quant_param = 30;
1319                         break;
1320                 default:
1321                         v4l2_warn(&ctx->dev->v4l2_dev,
1322                                 "cannot set intra qp, fmt not supported\n");
1323                         break;
1324                 }
1325         } else {
1326                 switch (dst_fourcc) {
1327                 case V4L2_PIX_FMT_H264:
1328                         quant_param = ctx->params.h264_inter_qp;
1329                         break;
1330                 case V4L2_PIX_FMT_MPEG4:
1331                         quant_param = ctx->params.mpeg4_inter_qp;
1332                         break;
1333                 default:
1334                         v4l2_warn(&ctx->dev->v4l2_dev,
1335                                 "cannot set inter qp, fmt not supported\n");
1336                         break;
1337                 }
1338         }
1339
1340         /* submit */
1341         if (ctx->params.rot_mode)
1342                 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1343         coda_write(dev, rot_mode, CODA_CMD_ENC_PIC_ROT_MODE);
1344         coda_write(dev, quant_param, CODA_CMD_ENC_PIC_QS);
1345
1346         if (dev->devtype->product == CODA_960) {
1347                 coda_write(dev, 4/*FIXME: 0*/, CODA9_CMD_ENC_PIC_SRC_INDEX);
1348                 coda_write(dev, q_data_src->width, CODA9_CMD_ENC_PIC_SRC_STRIDE);
1349                 coda_write(dev, 0, CODA9_CMD_ENC_PIC_SUB_FRAME_SYNC);
1350
1351                 reg = CODA9_CMD_ENC_PIC_SRC_ADDR_Y;
1352         } else {
1353                 reg = CODA_CMD_ENC_PIC_SRC_ADDR_Y;
1354         }
1355         coda_write_base(ctx, q_data_src, src_buf, reg);
1356
1357         coda_write(dev, force_ipicture << 1 & 0x2,
1358                    CODA_CMD_ENC_PIC_OPTION);
1359
1360         coda_write(dev, pic_stream_buffer_addr, CODA_CMD_ENC_PIC_BB_START);
1361         coda_write(dev, pic_stream_buffer_size / 1024,
1362                    CODA_CMD_ENC_PIC_BB_SIZE);
1363
1364         if (!ctx->streamon_out) {
1365                 /* After streamoff on the output side, set stream end flag */
1366                 ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
1367                 coda_write(dev, ctx->bit_stream_param,
1368                            CODA_REG_BIT_BIT_STREAM_PARAM);
1369         }
1370
1371         if (dev->devtype->product != CODA_DX6)
1372                 coda_write(dev, ctx->iram_info.axi_sram_use,
1373                                 CODA7_REG_BIT_AXI_SRAM_USE);
1374
1375         trace_coda_enc_pic_run(ctx, src_buf);
1376
1377         coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1378
1379         return 0;
1380 }
1381
1382 static void coda_finish_encode(struct coda_ctx *ctx)
1383 {
1384         struct vb2_v4l2_buffer *src_buf, *dst_buf;
1385         struct coda_dev *dev = ctx->dev;
1386         u32 wr_ptr, start_ptr;
1387
1388         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1389         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1390
1391         trace_coda_enc_pic_done(ctx, dst_buf);
1392
1393         /* Get results from the coda */
1394         start_ptr = coda_read(dev, CODA_CMD_ENC_PIC_BB_START);
1395         wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
1396
1397         /* Calculate bytesused field */
1398         if (dst_buf->sequence == 0 ||
1399             src_buf->flags & V4L2_BUF_FLAG_KEYFRAME) {
1400                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr +
1401                                         ctx->vpu_header_size[0] +
1402                                         ctx->vpu_header_size[1] +
1403                                         ctx->vpu_header_size[2]);
1404         } else {
1405                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr);
1406         }
1407
1408         v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, "frame size = %u\n",
1409                  wr_ptr - start_ptr);
1410
1411         coda_read(dev, CODA_RET_ENC_PIC_SLICE_NUM);
1412         coda_read(dev, CODA_RET_ENC_PIC_FLAG);
1413
1414         if (coda_read(dev, CODA_RET_ENC_PIC_TYPE) == 0) {
1415                 dst_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1416                 dst_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1417         } else {
1418                 dst_buf->flags |= V4L2_BUF_FLAG_PFRAME;
1419                 dst_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1420         }
1421
1422         dst_buf->vb2_buf.timestamp = src_buf->vb2_buf.timestamp;
1423         dst_buf->field = src_buf->field;
1424         dst_buf->flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1425         dst_buf->flags |=
1426                 src_buf->flags & V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1427         dst_buf->timecode = src_buf->timecode;
1428
1429         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
1430
1431         dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1432         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE);
1433
1434         ctx->gopcounter--;
1435         if (ctx->gopcounter < 0)
1436                 ctx->gopcounter = ctx->params.gop_size - 1;
1437
1438         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1439                 "job finished: encoding frame (%d) (%s)\n",
1440                 dst_buf->sequence,
1441                 (dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
1442                 "KEYFRAME" : "PFRAME");
1443 }
1444
1445 static void coda_seq_end_work(struct work_struct *work)
1446 {
1447         struct coda_ctx *ctx = container_of(work, struct coda_ctx, seq_end_work);
1448         struct coda_dev *dev = ctx->dev;
1449
1450         mutex_lock(&ctx->buffer_mutex);
1451         mutex_lock(&dev->coda_mutex);
1452
1453         if (ctx->initialized == 0)
1454                 goto out;
1455
1456         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1457                  "%d: %s: sent command 'SEQ_END' to coda\n", ctx->idx,
1458                  __func__);
1459         if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
1460                 v4l2_err(&dev->v4l2_dev,
1461                          "CODA_COMMAND_SEQ_END failed\n");
1462         }
1463
1464         /*
1465          * FIXME: Sometimes h.264 encoding fails with 8-byte sequences missing
1466          * from the output stream after the h.264 decoder has run. Resetting the
1467          * hardware after the decoder has finished seems to help.
1468          */
1469         if (dev->devtype->product == CODA_960)
1470                 coda_hw_reset(ctx);
1471
1472         kfifo_init(&ctx->bitstream_fifo,
1473                 ctx->bitstream.vaddr, ctx->bitstream.size);
1474
1475         coda_free_framebuffers(ctx);
1476
1477         ctx->initialized = 0;
1478
1479 out:
1480         mutex_unlock(&dev->coda_mutex);
1481         mutex_unlock(&ctx->buffer_mutex);
1482 }
1483
1484 static void coda_bit_release(struct coda_ctx *ctx)
1485 {
1486         mutex_lock(&ctx->buffer_mutex);
1487         coda_free_framebuffers(ctx);
1488         coda_free_context_buffers(ctx);
1489         coda_free_bitstream_buffer(ctx);
1490         mutex_unlock(&ctx->buffer_mutex);
1491 }
1492
1493 const struct coda_context_ops coda_bit_encode_ops = {
1494         .queue_init = coda_encoder_queue_init,
1495         .reqbufs = coda_encoder_reqbufs,
1496         .start_streaming = coda_start_encoding,
1497         .prepare_run = coda_prepare_encode,
1498         .finish_run = coda_finish_encode,
1499         .seq_end_work = coda_seq_end_work,
1500         .release = coda_bit_release,
1501 };
1502
1503 /*
1504  * Decoder context operations
1505  */
1506
1507 static int coda_alloc_bitstream_buffer(struct coda_ctx *ctx,
1508                                        struct coda_q_data *q_data)
1509 {
1510         if (ctx->bitstream.vaddr)
1511                 return 0;
1512
1513         ctx->bitstream.size = roundup_pow_of_two(q_data->sizeimage * 2);
1514         ctx->bitstream.vaddr = dma_alloc_wc(&ctx->dev->plat_dev->dev,
1515                                             ctx->bitstream.size,
1516                                             &ctx->bitstream.paddr, GFP_KERNEL);
1517         if (!ctx->bitstream.vaddr) {
1518                 v4l2_err(&ctx->dev->v4l2_dev,
1519                          "failed to allocate bitstream ringbuffer");
1520                 return -ENOMEM;
1521         }
1522         kfifo_init(&ctx->bitstream_fifo,
1523                    ctx->bitstream.vaddr, ctx->bitstream.size);
1524
1525         return 0;
1526 }
1527
1528 static void coda_free_bitstream_buffer(struct coda_ctx *ctx)
1529 {
1530         if (ctx->bitstream.vaddr == NULL)
1531                 return;
1532
1533         dma_free_wc(&ctx->dev->plat_dev->dev, ctx->bitstream.size,
1534                     ctx->bitstream.vaddr, ctx->bitstream.paddr);
1535         ctx->bitstream.vaddr = NULL;
1536         kfifo_init(&ctx->bitstream_fifo, NULL, 0);
1537 }
1538
1539 static int coda_decoder_reqbufs(struct coda_ctx *ctx,
1540                                 struct v4l2_requestbuffers *rb)
1541 {
1542         struct coda_q_data *q_data_src;
1543         int ret;
1544
1545         if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
1546                 return 0;
1547
1548         if (rb->count) {
1549                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1550                 ret = coda_alloc_context_buffers(ctx, q_data_src);
1551                 if (ret < 0)
1552                         return ret;
1553                 ret = coda_alloc_bitstream_buffer(ctx, q_data_src);
1554                 if (ret < 0) {
1555                         coda_free_context_buffers(ctx);
1556                         return ret;
1557                 }
1558         } else {
1559                 coda_free_bitstream_buffer(ctx);
1560                 coda_free_context_buffers(ctx);
1561         }
1562
1563         return 0;
1564 }
1565
1566 static bool coda_reorder_enable(struct coda_ctx *ctx)
1567 {
1568         const char * const *profile_names;
1569         const char * const *level_names;
1570         struct coda_dev *dev = ctx->dev;
1571         int profile, level;
1572
1573         if (dev->devtype->product != CODA_7541 &&
1574             dev->devtype->product != CODA_960)
1575                 return false;
1576
1577         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1578                 return false;
1579
1580         if (ctx->codec->src_fourcc != V4L2_PIX_FMT_H264)
1581                 return true;
1582
1583         profile = coda_h264_profile(ctx->params.h264_profile_idc);
1584         if (profile < 0) {
1585                 v4l2_warn(&dev->v4l2_dev, "Invalid H264 Profile: %d\n",
1586                          ctx->params.h264_profile_idc);
1587                 return false;
1588         }
1589
1590         level = coda_h264_level(ctx->params.h264_level_idc);
1591         if (level < 0) {
1592                 v4l2_warn(&dev->v4l2_dev, "Invalid H264 Level: %d\n",
1593                          ctx->params.h264_level_idc);
1594                 return false;
1595         }
1596
1597         profile_names = v4l2_ctrl_get_menu(V4L2_CID_MPEG_VIDEO_H264_PROFILE);
1598         level_names = v4l2_ctrl_get_menu(V4L2_CID_MPEG_VIDEO_H264_LEVEL);
1599
1600         v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "H264 Profile/Level: %s L%s\n",
1601                  profile_names[profile], level_names[level]);
1602
1603         /* Baseline profile does not support reordering */
1604         return profile > V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE;
1605 }
1606
1607 static int __coda_start_decoding(struct coda_ctx *ctx)
1608 {
1609         struct coda_q_data *q_data_src, *q_data_dst;
1610         u32 bitstream_buf, bitstream_size;
1611         struct coda_dev *dev = ctx->dev;
1612         int width, height;
1613         u32 src_fourcc, dst_fourcc;
1614         u32 val;
1615         int ret;
1616
1617         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1618                  "Video Data Order Adapter: %s\n",
1619                  ctx->use_vdoa ? "Enabled" : "Disabled");
1620
1621         /* Start decoding */
1622         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1623         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1624         bitstream_buf = ctx->bitstream.paddr;
1625         bitstream_size = ctx->bitstream.size;
1626         src_fourcc = q_data_src->fourcc;
1627         dst_fourcc = q_data_dst->fourcc;
1628
1629         coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
1630
1631         /* Update coda bitstream read and write pointers from kfifo */
1632         coda_kfifo_sync_to_device_full(ctx);
1633
1634         ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) |
1635                                  CODA9_FRAME_TILED2LINEAR);
1636         if (dst_fourcc == V4L2_PIX_FMT_NV12 || dst_fourcc == V4L2_PIX_FMT_YUYV)
1637                 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
1638         if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
1639                 ctx->frame_mem_ctrl |= (0x3 << 9) |
1640                         ((ctx->use_vdoa) ? 0 : CODA9_FRAME_TILED2LINEAR);
1641         coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
1642
1643         ctx->display_idx = -1;
1644         ctx->frm_dis_flg = 0;
1645         coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
1646
1647         coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START);
1648         coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE);
1649         val = 0;
1650         if (coda_reorder_enable(ctx))
1651                 val |= CODA_REORDER_ENABLE;
1652         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1653                 val |= CODA_NO_INT_ENABLE;
1654         coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION);
1655
1656         ctx->params.codec_mode = ctx->codec->mode;
1657         if (dev->devtype->product == CODA_960 &&
1658             src_fourcc == V4L2_PIX_FMT_MPEG4)
1659                 ctx->params.codec_mode_aux = CODA_MP4_AUX_MPEG4;
1660         else
1661                 ctx->params.codec_mode_aux = 0;
1662         if (src_fourcc == V4L2_PIX_FMT_MPEG4) {
1663                 coda_write(dev, CODA_MP4_CLASS_MPEG4,
1664                            CODA_CMD_DEC_SEQ_MP4_ASP_CLASS);
1665         }
1666         if (src_fourcc == V4L2_PIX_FMT_H264) {
1667                 if (dev->devtype->product == CODA_7541) {
1668                         coda_write(dev, ctx->psbuf.paddr,
1669                                         CODA_CMD_DEC_SEQ_PS_BB_START);
1670                         coda_write(dev, (CODA7_PS_BUF_SIZE / 1024),
1671                                         CODA_CMD_DEC_SEQ_PS_BB_SIZE);
1672                 }
1673                 if (dev->devtype->product == CODA_960) {
1674                         coda_write(dev, 0, CODA_CMD_DEC_SEQ_X264_MV_EN);
1675                         coda_write(dev, 512, CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE);
1676                 }
1677         }
1678         if (dev->devtype->product != CODA_960)
1679                 coda_write(dev, 0, CODA_CMD_DEC_SEQ_SRC_SIZE);
1680
1681         ctx->bit_stream_param = CODA_BIT_DEC_SEQ_INIT_ESCAPE;
1682         ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT);
1683         ctx->bit_stream_param = 0;
1684         if (ret) {
1685                 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1686                 return ret;
1687         }
1688         ctx->sequence_offset = ~0U;
1689         ctx->initialized = 1;
1690
1691         /* Update kfifo out pointer from coda bitstream read pointer */
1692         coda_kfifo_sync_from_device(ctx);
1693
1694         if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) {
1695                 v4l2_err(&dev->v4l2_dev,
1696                         "CODA_COMMAND_SEQ_INIT failed, error code = %d\n",
1697                         coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON));
1698                 return -EAGAIN;
1699         }
1700
1701         val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE);
1702         if (dev->devtype->product == CODA_DX6) {
1703                 width = (val >> CODADX6_PICWIDTH_OFFSET) & CODADX6_PICWIDTH_MASK;
1704                 height = val & CODADX6_PICHEIGHT_MASK;
1705         } else {
1706                 width = (val >> CODA7_PICWIDTH_OFFSET) & CODA7_PICWIDTH_MASK;
1707                 height = val & CODA7_PICHEIGHT_MASK;
1708         }
1709
1710         if (width > q_data_dst->bytesperline || height > q_data_dst->height) {
1711                 v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n",
1712                          width, height, q_data_dst->bytesperline,
1713                          q_data_dst->height);
1714                 return -EINVAL;
1715         }
1716
1717         width = round_up(width, 16);
1718         height = round_up(height, 16);
1719
1720         v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "%s instance %d now: %dx%d\n",
1721                  __func__, ctx->idx, width, height);
1722
1723         ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED);
1724         /*
1725          * If the VDOA is used, the decoder needs one additional frame,
1726          * because the frames are freed when the next frame is decoded.
1727          * Otherwise there are visible errors in the decoded frames (green
1728          * regions in displayed frames) and a broken order of frames (earlier
1729          * frames are sporadically displayed after later frames).
1730          */
1731         if (ctx->use_vdoa)
1732                 ctx->num_internal_frames += 1;
1733         if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) {
1734                 v4l2_err(&dev->v4l2_dev,
1735                          "not enough framebuffers to decode (%d < %d)\n",
1736                          CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames);
1737                 return -EINVAL;
1738         }
1739
1740         if (src_fourcc == V4L2_PIX_FMT_H264) {
1741                 u32 left_right;
1742                 u32 top_bottom;
1743
1744                 left_right = coda_read(dev, CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT);
1745                 top_bottom = coda_read(dev, CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM);
1746
1747                 q_data_dst->rect.left = (left_right >> 10) & 0x3ff;
1748                 q_data_dst->rect.top = (top_bottom >> 10) & 0x3ff;
1749                 q_data_dst->rect.width = width - q_data_dst->rect.left -
1750                                          (left_right & 0x3ff);
1751                 q_data_dst->rect.height = height - q_data_dst->rect.top -
1752                                           (top_bottom & 0x3ff);
1753         }
1754
1755         ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc);
1756         if (ret < 0) {
1757                 v4l2_err(&dev->v4l2_dev, "failed to allocate framebuffers\n");
1758                 return ret;
1759         }
1760
1761         /* Tell the decoder how many frame buffers we allocated. */
1762         coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM);
1763         coda_write(dev, width, CODA_CMD_SET_FRAME_BUF_STRIDE);
1764
1765         if (dev->devtype->product != CODA_DX6) {
1766                 /* Set secondary AXI IRAM */
1767                 coda_setup_iram(ctx);
1768
1769                 coda_write(dev, ctx->iram_info.buf_bit_use,
1770                                 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1771                 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1772                                 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1773                 coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1774                                 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1775                 coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1776                                 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1777                 coda_write(dev, ctx->iram_info.buf_ovl_use,
1778                                 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1779                 if (dev->devtype->product == CODA_960) {
1780                         coda_write(dev, ctx->iram_info.buf_btp_use,
1781                                         CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1782
1783                         coda_write(dev, -1, CODA9_CMD_SET_FRAME_DELAY);
1784                         coda9_set_frame_cache(ctx, dst_fourcc);
1785                 }
1786         }
1787
1788         if (src_fourcc == V4L2_PIX_FMT_H264) {
1789                 coda_write(dev, ctx->slicebuf.paddr,
1790                                 CODA_CMD_SET_FRAME_SLICE_BB_START);
1791                 coda_write(dev, ctx->slicebuf.size / 1024,
1792                                 CODA_CMD_SET_FRAME_SLICE_BB_SIZE);
1793         }
1794
1795         if (dev->devtype->product == CODA_7541) {
1796                 int max_mb_x = 1920 / 16;
1797                 int max_mb_y = 1088 / 16;
1798                 int max_mb_num = max_mb_x * max_mb_y;
1799
1800                 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1801                                 CODA7_CMD_SET_FRAME_MAX_DEC_SIZE);
1802         } else if (dev->devtype->product == CODA_960) {
1803                 int max_mb_x = 1920 / 16;
1804                 int max_mb_y = 1088 / 16;
1805                 int max_mb_num = max_mb_x * max_mb_y;
1806
1807                 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1808                                 CODA9_CMD_SET_FRAME_MAX_DEC_SIZE);
1809         }
1810
1811         if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) {
1812                 v4l2_err(&ctx->dev->v4l2_dev,
1813                          "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1814                 return -ETIMEDOUT;
1815         }
1816
1817         return 0;
1818 }
1819
1820 static int coda_start_decoding(struct coda_ctx *ctx)
1821 {
1822         struct coda_dev *dev = ctx->dev;
1823         int ret;
1824
1825         mutex_lock(&dev->coda_mutex);
1826         ret = __coda_start_decoding(ctx);
1827         mutex_unlock(&dev->coda_mutex);
1828
1829         return ret;
1830 }
1831
1832 static int coda_prepare_decode(struct coda_ctx *ctx)
1833 {
1834         struct vb2_v4l2_buffer *dst_buf;
1835         struct coda_dev *dev = ctx->dev;
1836         struct coda_q_data *q_data_dst;
1837         struct coda_buffer_meta *meta;
1838         unsigned long flags;
1839         u32 rot_mode = 0;
1840         u32 reg_addr, reg_stride;
1841
1842         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1843         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1844
1845         /* Try to copy source buffer contents into the bitstream ringbuffer */
1846         mutex_lock(&ctx->bitstream_mutex);
1847         coda_fill_bitstream(ctx, NULL);
1848         mutex_unlock(&ctx->bitstream_mutex);
1849
1850         if (coda_get_bitstream_payload(ctx) < 512 &&
1851             (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
1852                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1853                          "bitstream payload: %d, skipping\n",
1854                          coda_get_bitstream_payload(ctx));
1855                 v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1856                 return -EAGAIN;
1857         }
1858
1859         /* Run coda_start_decoding (again) if not yet initialized */
1860         if (!ctx->initialized) {
1861                 int ret = __coda_start_decoding(ctx);
1862
1863                 if (ret < 0) {
1864                         v4l2_err(&dev->v4l2_dev, "failed to start decoding\n");
1865                         v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1866                         return -EAGAIN;
1867                 } else {
1868                         ctx->initialized = 1;
1869                 }
1870         }
1871
1872         if (dev->devtype->product == CODA_960)
1873                 coda_set_gdi_regs(ctx);
1874
1875         if (ctx->use_vdoa &&
1876             ctx->display_idx >= 0 &&
1877             ctx->display_idx < ctx->num_internal_frames) {
1878                 vdoa_device_run(ctx->vdoa,
1879                                 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0),
1880                                 ctx->internal_frames[ctx->display_idx].paddr);
1881         } else {
1882                 if (dev->devtype->product == CODA_960) {
1883                         /*
1884                          * The CODA960 seems to have an internal list of
1885                          * buffers with 64 entries that includes the
1886                          * registered frame buffers as well as the rotator
1887                          * buffer output.
1888                          *
1889                          * ROT_INDEX needs to be < 0x40, but >
1890                          * ctx->num_internal_frames.
1891                          */
1892                         coda_write(dev,
1893                                    CODA_MAX_FRAMEBUFFERS + dst_buf->vb2_buf.index,
1894                                    CODA9_CMD_DEC_PIC_ROT_INDEX);
1895
1896                         reg_addr = CODA9_CMD_DEC_PIC_ROT_ADDR_Y;
1897                         reg_stride = CODA9_CMD_DEC_PIC_ROT_STRIDE;
1898                 } else {
1899                         reg_addr = CODA_CMD_DEC_PIC_ROT_ADDR_Y;
1900                         reg_stride = CODA_CMD_DEC_PIC_ROT_STRIDE;
1901                 }
1902                 coda_write_base(ctx, q_data_dst, dst_buf, reg_addr);
1903                 coda_write(dev, q_data_dst->bytesperline, reg_stride);
1904
1905                 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1906         }
1907
1908         coda_write(dev, rot_mode, CODA_CMD_DEC_PIC_ROT_MODE);
1909
1910         switch (dev->devtype->product) {
1911         case CODA_DX6:
1912                 /* TBD */
1913         case CODA_7541:
1914                 coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION);
1915                 break;
1916         case CODA_960:
1917                 /* 'hardcode to use interrupt disable mode'? */
1918                 coda_write(dev, (1 << 10), CODA_CMD_DEC_PIC_OPTION);
1919                 break;
1920         }
1921
1922         coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM);
1923
1924         coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START);
1925         coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE);
1926
1927         if (dev->devtype->product != CODA_DX6)
1928                 coda_write(dev, ctx->iram_info.axi_sram_use,
1929                                 CODA7_REG_BIT_AXI_SRAM_USE);
1930
1931         spin_lock_irqsave(&ctx->buffer_meta_lock, flags);
1932         meta = list_first_entry_or_null(&ctx->buffer_meta_list,
1933                                         struct coda_buffer_meta, list);
1934
1935         if (meta && ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) {
1936
1937                 /* If this is the last buffer in the bitstream, add padding */
1938                 if (meta->end == (ctx->bitstream_fifo.kfifo.in &
1939                                   ctx->bitstream_fifo.kfifo.mask)) {
1940                         static unsigned char buf[512];
1941                         unsigned int pad;
1942
1943                         /* Pad to multiple of 256 and then add 256 more */
1944                         pad = ((0 - meta->end) & 0xff) + 256;
1945
1946                         memset(buf, 0xff, sizeof(buf));
1947
1948                         kfifo_in(&ctx->bitstream_fifo, buf, pad);
1949                 }
1950         }
1951         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
1952
1953         coda_kfifo_sync_to_device_full(ctx);
1954
1955         /* Clear decode success flag */
1956         coda_write(dev, 0, CODA_RET_DEC_PIC_SUCCESS);
1957
1958         /* Clear error return value */
1959         coda_write(dev, 0, CODA_RET_DEC_PIC_ERR_MB);
1960
1961         trace_coda_dec_pic_run(ctx, meta);
1962
1963         coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1964
1965         return 0;
1966 }
1967
1968 static void coda_finish_decode(struct coda_ctx *ctx)
1969 {
1970         struct coda_dev *dev = ctx->dev;
1971         struct coda_q_data *q_data_src;
1972         struct coda_q_data *q_data_dst;
1973         struct vb2_v4l2_buffer *dst_buf;
1974         struct coda_buffer_meta *meta;
1975         unsigned long payload;
1976         unsigned long flags;
1977         int width, height;
1978         int decoded_idx;
1979         int display_idx;
1980         u32 src_fourcc;
1981         int success;
1982         u32 err_mb;
1983         int err_vdoa = 0;
1984         u32 val;
1985
1986         /* Update kfifo out pointer from coda bitstream read pointer */
1987         coda_kfifo_sync_from_device(ctx);
1988
1989         /*
1990          * in stream-end mode, the read pointer can overshoot the write pointer
1991          * by up to 512 bytes
1992          */
1993         if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) {
1994                 if (coda_get_bitstream_payload(ctx) >= ctx->bitstream.size - 512)
1995                         kfifo_init(&ctx->bitstream_fifo,
1996                                 ctx->bitstream.vaddr, ctx->bitstream.size);
1997         }
1998
1999         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
2000         src_fourcc = q_data_src->fourcc;
2001
2002         val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS);
2003         if (val != 1)
2004                 pr_err("DEC_PIC_SUCCESS = %d\n", val);
2005
2006         success = val & 0x1;
2007         if (!success)
2008                 v4l2_err(&dev->v4l2_dev, "decode failed\n");
2009
2010         if (src_fourcc == V4L2_PIX_FMT_H264) {
2011                 if (val & (1 << 3))
2012                         v4l2_err(&dev->v4l2_dev,
2013                                  "insufficient PS buffer space (%d bytes)\n",
2014                                  ctx->psbuf.size);
2015                 if (val & (1 << 2))
2016                         v4l2_err(&dev->v4l2_dev,
2017                                  "insufficient slice buffer space (%d bytes)\n",
2018                                  ctx->slicebuf.size);
2019         }
2020
2021         val = coda_read(dev, CODA_RET_DEC_PIC_SIZE);
2022         width = (val >> 16) & 0xffff;
2023         height = val & 0xffff;
2024
2025         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
2026
2027         /* frame crop information */
2028         if (src_fourcc == V4L2_PIX_FMT_H264) {
2029                 u32 left_right;
2030                 u32 top_bottom;
2031
2032                 left_right = coda_read(dev, CODA_RET_DEC_PIC_CROP_LEFT_RIGHT);
2033                 top_bottom = coda_read(dev, CODA_RET_DEC_PIC_CROP_TOP_BOTTOM);
2034
2035                 if (left_right == 0xffffffff && top_bottom == 0xffffffff) {
2036                         /* Keep current crop information */
2037                 } else {
2038                         struct v4l2_rect *rect = &q_data_dst->rect;
2039
2040                         rect->left = left_right >> 16 & 0xffff;
2041                         rect->top = top_bottom >> 16 & 0xffff;
2042                         rect->width = width - rect->left -
2043                                       (left_right & 0xffff);
2044                         rect->height = height - rect->top -
2045                                        (top_bottom & 0xffff);
2046                 }
2047         } else {
2048                 /* no cropping */
2049         }
2050
2051         err_mb = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB);
2052         if (err_mb > 0)
2053                 v4l2_err(&dev->v4l2_dev,
2054                          "errors in %d macroblocks\n", err_mb);
2055
2056         if (dev->devtype->product == CODA_7541) {
2057                 val = coda_read(dev, CODA_RET_DEC_PIC_OPTION);
2058                 if (val == 0) {
2059                         /* not enough bitstream data */
2060                         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2061                                  "prescan failed: %d\n", val);
2062                         ctx->hold = true;
2063                         return;
2064                 }
2065         }
2066
2067         /* Wait until the VDOA finished writing the previous display frame */
2068         if (ctx->use_vdoa &&
2069             ctx->display_idx >= 0 &&
2070             ctx->display_idx < ctx->num_internal_frames) {
2071                 err_vdoa = vdoa_wait_for_completion(ctx->vdoa);
2072         }
2073
2074         ctx->frm_dis_flg = coda_read(dev,
2075                                      CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
2076
2077         /* The previous display frame was copied out and can be overwritten */
2078         if (ctx->display_idx >= 0 &&
2079             ctx->display_idx < ctx->num_internal_frames) {
2080                 ctx->frm_dis_flg &= ~(1 << ctx->display_idx);
2081                 coda_write(dev, ctx->frm_dis_flg,
2082                                 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
2083         }
2084
2085         /*
2086          * The index of the last decoded frame, not necessarily in
2087          * display order, and the index of the next display frame.
2088          * The latter could have been decoded in a previous run.
2089          */
2090         decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX);
2091         display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX);
2092
2093         if (decoded_idx == -1) {
2094                 /* no frame was decoded, but we might have a display frame */
2095                 if (display_idx >= 0 && display_idx < ctx->num_internal_frames)
2096                         ctx->sequence_offset++;
2097                 else if (ctx->display_idx < 0)
2098                         ctx->hold = true;
2099         } else if (decoded_idx == -2) {
2100                 if (ctx->display_idx >= 0 &&
2101                     ctx->display_idx < ctx->num_internal_frames)
2102                         ctx->sequence_offset++;
2103                 /* no frame was decoded, we still return remaining buffers */
2104         } else if (decoded_idx < 0 || decoded_idx >= ctx->num_internal_frames) {
2105                 v4l2_err(&dev->v4l2_dev,
2106                          "decoded frame index out of range: %d\n", decoded_idx);
2107         } else {
2108                 val = coda_read(dev, CODA_RET_DEC_PIC_FRAME_NUM);
2109                 if (ctx->sequence_offset == -1)
2110                         ctx->sequence_offset = val;
2111                 val -= ctx->sequence_offset;
2112                 spin_lock_irqsave(&ctx->buffer_meta_lock, flags);
2113                 if (!list_empty(&ctx->buffer_meta_list)) {
2114                         meta = list_first_entry(&ctx->buffer_meta_list,
2115                                               struct coda_buffer_meta, list);
2116                         list_del(&meta->list);
2117                         ctx->num_metas--;
2118                         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
2119                         /*
2120                          * Clamp counters to 16 bits for comparison, as the HW
2121                          * counter rolls over at this point for h.264. This
2122                          * may be different for other formats, but using 16 bits
2123                          * should be enough to detect most errors and saves us
2124                          * from doing different things based on the format.
2125                          */
2126                         if ((val & 0xffff) != (meta->sequence & 0xffff)) {
2127                                 v4l2_err(&dev->v4l2_dev,
2128                                          "sequence number mismatch (%d(%d) != %d)\n",
2129                                          val, ctx->sequence_offset,
2130                                          meta->sequence);
2131                         }
2132                         ctx->frame_metas[decoded_idx] = *meta;
2133                         kfree(meta);
2134                 } else {
2135                         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
2136                         v4l2_err(&dev->v4l2_dev, "empty timestamp list!\n");
2137                         memset(&ctx->frame_metas[decoded_idx], 0,
2138                                sizeof(struct coda_buffer_meta));
2139                         ctx->frame_metas[decoded_idx].sequence = val;
2140                         ctx->sequence_offset++;
2141                 }
2142
2143                 trace_coda_dec_pic_done(ctx, &ctx->frame_metas[decoded_idx]);
2144
2145                 val = coda_read(dev, CODA_RET_DEC_PIC_TYPE) & 0x7;
2146                 if (val == 0)
2147                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_KEYFRAME;
2148                 else if (val == 1)
2149                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_PFRAME;
2150                 else
2151                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_BFRAME;
2152
2153                 ctx->frame_errors[decoded_idx] = err_mb;
2154         }
2155
2156         if (display_idx == -1) {
2157                 /*
2158                  * no more frames to be decoded, but there could still
2159                  * be rotator output to dequeue
2160                  */
2161                 ctx->hold = true;
2162         } else if (display_idx == -3) {
2163                 /* possibly prescan failure */
2164         } else if (display_idx < 0 || display_idx >= ctx->num_internal_frames) {
2165                 v4l2_err(&dev->v4l2_dev,
2166                          "presentation frame index out of range: %d\n",
2167                          display_idx);
2168         }
2169
2170         /* If a frame was copied out, return it */
2171         if (ctx->display_idx >= 0 &&
2172             ctx->display_idx < ctx->num_internal_frames) {
2173                 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2174                 dst_buf->sequence = ctx->osequence++;
2175
2176                 dst_buf->field = V4L2_FIELD_NONE;
2177                 dst_buf->flags &= ~(V4L2_BUF_FLAG_KEYFRAME |
2178                                              V4L2_BUF_FLAG_PFRAME |
2179                                              V4L2_BUF_FLAG_BFRAME);
2180                 dst_buf->flags |= ctx->frame_types[ctx->display_idx];
2181                 meta = &ctx->frame_metas[ctx->display_idx];
2182                 dst_buf->timecode = meta->timecode;
2183                 dst_buf->vb2_buf.timestamp = meta->timestamp;
2184
2185                 trace_coda_dec_rot_done(ctx, dst_buf, meta);
2186
2187                 switch (q_data_dst->fourcc) {
2188                 case V4L2_PIX_FMT_YUYV:
2189                         payload = width * height * 2;
2190                         break;
2191                 case V4L2_PIX_FMT_YUV420:
2192                 case V4L2_PIX_FMT_YVU420:
2193                 case V4L2_PIX_FMT_NV12:
2194                 default:
2195                         payload = width * height * 3 / 2;
2196                         break;
2197                 case V4L2_PIX_FMT_YUV422P:
2198                         payload = width * height * 2;
2199                         break;
2200                 }
2201                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, payload);
2202
2203                 if (ctx->frame_errors[ctx->display_idx] || err_vdoa)
2204                         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR);
2205                 else
2206                         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE);
2207
2208                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2209                         "job finished: decoding frame (%d) (%s)\n",
2210                         dst_buf->sequence,
2211                         (dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
2212                         "KEYFRAME" : "PFRAME");
2213         } else {
2214                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2215                         "job finished: no frame decoded\n");
2216         }
2217
2218         /* The rotator will copy the current display frame next time */
2219         ctx->display_idx = display_idx;
2220 }
2221
2222 static void coda_decode_timeout(struct coda_ctx *ctx)
2223 {
2224         struct vb2_v4l2_buffer *dst_buf;
2225
2226         /*
2227          * For now this only handles the case where we would deadlock with
2228          * userspace, i.e. userspace issued DEC_CMD_STOP and waits for EOS,
2229          * but after a failed decode run we would hold the context and wait for
2230          * userspace to queue more buffers.
2231          */
2232         if (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))
2233                 return;
2234
2235         dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2236         dst_buf->sequence = ctx->qsequence - 1;
2237
2238         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR);
2239 }
2240
2241 const struct coda_context_ops coda_bit_decode_ops = {
2242         .queue_init = coda_decoder_queue_init,
2243         .reqbufs = coda_decoder_reqbufs,
2244         .start_streaming = coda_start_decoding,
2245         .prepare_run = coda_prepare_decode,
2246         .finish_run = coda_finish_decode,
2247         .run_timeout = coda_decode_timeout,
2248         .seq_end_work = coda_seq_end_work,
2249         .release = coda_bit_release,
2250 };
2251
2252 irqreturn_t coda_irq_handler(int irq, void *data)
2253 {
2254         struct coda_dev *dev = data;
2255         struct coda_ctx *ctx;
2256
2257         /* read status register to attend the IRQ */
2258         coda_read(dev, CODA_REG_BIT_INT_STATUS);
2259         coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET,
2260                       CODA_REG_BIT_INT_CLEAR);
2261
2262         ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
2263         if (ctx == NULL) {
2264                 v4l2_err(&dev->v4l2_dev,
2265                          "Instance released before the end of transaction\n");
2266                 return IRQ_HANDLED;
2267         }
2268
2269         trace_coda_bit_done(ctx);
2270
2271         if (ctx->aborting) {
2272                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
2273                          "task has been aborted\n");
2274         }
2275
2276         if (coda_isbusy(ctx->dev)) {
2277                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
2278                          "coda is still busy!!!!\n");
2279                 return IRQ_NONE;
2280         }
2281
2282         complete(&ctx->completion);
2283
2284         return IRQ_HANDLED;
2285 }