2 * Copyright (C) STMicroelectronics SA 2014
3 * Authors: Fabien Dessenne <fabien.dessenne@st.com> for STMicroelectronics.
4 * License terms: GNU General Public License (GPL), version 2
7 #include <linux/delay.h>
10 #include "bdisp-filter.h"
11 #include "bdisp-reg.h"
13 /* Max width of the source frame in a single node */
14 #define MAX_SRC_WIDTH 2048
16 /* Reset & boot poll config */
17 #define POLL_RST_MAX 500
18 #define POLL_RST_DELAY_MS 2
20 enum bdisp_target_plan {
27 bool cconv; /* RGB - YUV conversion */
28 bool hflip; /* Horizontal flip */
29 bool vflip; /* Vertical flip */
30 bool wide; /* Wide (>MAX_SRC_WIDTH) */
31 bool scale; /* Scale */
32 u16 h_inc; /* Horizontal increment in 6.10 format */
33 u16 v_inc; /* Vertical increment in 6.10 format */
34 bool src_interlaced; /* is the src an interlaced buffer */
35 u8 src_nbp; /* nb of planes of the src */
36 bool src_yuv; /* is the src a YUV color format */
37 bool src_420; /* is the src 4:2:0 chroma subsampled */
38 u8 dst_nbp; /* nb of planes of the dst */
39 bool dst_yuv; /* is the dst a YUV color format */
40 bool dst_420; /* is the dst 4:2:0 chroma subsampled */
43 struct bdisp_filter_addr {
44 u16 min; /* Filter min scale factor (6.10 fixed point) */
45 u16 max; /* Filter max scale factor (6.10 fixed point) */
46 void *virt; /* Virtual address for filter table */
47 dma_addr_t paddr; /* Physical address for filter table */
50 static struct bdisp_filter_addr bdisp_h_filter[NB_H_FILTER];
51 static struct bdisp_filter_addr bdisp_v_filter[NB_V_FILTER];
55 * @bdisp: bdisp entity
62 int bdisp_hw_reset(struct bdisp_dev *bdisp)
66 dev_dbg(bdisp->dev, "%s\n", __func__);
69 writel(0, bdisp->regs + BLT_ITM0);
72 writel(readl(bdisp->regs + BLT_CTL) | BLT_CTL_RESET,
73 bdisp->regs + BLT_CTL);
74 writel(0, bdisp->regs + BLT_CTL);
76 /* Wait for reset done */
77 for (i = 0; i < POLL_RST_MAX; i++) {
78 if (readl(bdisp->regs + BLT_STA1) & BLT_STA1_IDLE)
80 udelay(POLL_RST_DELAY_MS * 1000);
82 if (i == POLL_RST_MAX)
83 dev_err(bdisp->dev, "Reset timeout\n");
85 return (i == POLL_RST_MAX) ? -EAGAIN : 0;
89 * bdisp_hw_get_and_clear_irq
90 * @bdisp: bdisp entity
92 * Read then reset interrupt status
95 * 0 if expected interrupt was raised.
97 int bdisp_hw_get_and_clear_irq(struct bdisp_dev *bdisp)
101 its = readl(bdisp->regs + BLT_ITS);
103 /* Check for the only expected IT: LastNode of AQ1 */
104 if (!(its & BLT_ITS_AQ1_LNA)) {
105 dev_dbg(bdisp->dev, "Unexpected IT status: 0x%08X\n", its);
106 writel(its, bdisp->regs + BLT_ITS);
111 writel(its, bdisp->regs + BLT_ITS);
112 writel(0, bdisp->regs + BLT_ITM0);
118 * bdisp_hw_free_nodes
119 * @ctx: bdisp context
126 void bdisp_hw_free_nodes(struct bdisp_ctx *ctx)
128 if (ctx && ctx->node[0]) {
129 DEFINE_DMA_ATTRS(attrs);
131 dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
132 dma_free_attrs(ctx->bdisp_dev->dev,
133 sizeof(struct bdisp_node) * MAX_NB_NODE,
134 ctx->node[0], ctx->node_paddr[0], &attrs);
139 * bdisp_hw_alloc_nodes
140 * @ctx: bdisp context
142 * Allocate dma memory for nodes
147 int bdisp_hw_alloc_nodes(struct bdisp_ctx *ctx)
149 struct device *dev = ctx->bdisp_dev->dev;
150 unsigned int i, node_size = sizeof(struct bdisp_node);
153 DEFINE_DMA_ATTRS(attrs);
155 /* Allocate all the nodes within a single memory page */
156 dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
157 base = dma_alloc_attrs(dev, node_size * MAX_NB_NODE, &paddr,
158 GFP_KERNEL | GFP_DMA, &attrs);
160 dev_err(dev, "%s no mem\n", __func__);
164 memset(base, 0, node_size * MAX_NB_NODE);
166 for (i = 0; i < MAX_NB_NODE; i++) {
168 ctx->node_paddr[i] = paddr;
169 dev_dbg(dev, "node[%d]=0x%p (paddr=%pad)\n", i, ctx->node[i],
179 * bdisp_hw_free_filters
182 * Free filters memory
187 void bdisp_hw_free_filters(struct device *dev)
189 int size = (BDISP_HF_NB * NB_H_FILTER) + (BDISP_VF_NB * NB_V_FILTER);
191 if (bdisp_h_filter[0].virt) {
192 DEFINE_DMA_ATTRS(attrs);
194 dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
195 dma_free_attrs(dev, size, bdisp_h_filter[0].virt,
196 bdisp_h_filter[0].paddr, &attrs);
201 * bdisp_hw_alloc_filters
204 * Allocate dma memory for filters
209 int bdisp_hw_alloc_filters(struct device *dev)
211 unsigned int i, size;
214 DEFINE_DMA_ATTRS(attrs);
216 /* Allocate all the filters within a single memory page */
217 size = (BDISP_HF_NB * NB_H_FILTER) + (BDISP_VF_NB * NB_V_FILTER);
218 dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
219 base = dma_alloc_attrs(dev, size, &paddr, GFP_KERNEL | GFP_DMA, &attrs);
223 /* Setup filter addresses */
224 for (i = 0; i < NB_H_FILTER; i++) {
225 bdisp_h_filter[i].min = bdisp_h_spec[i].min;
226 bdisp_h_filter[i].max = bdisp_h_spec[i].max;
227 memcpy(base, bdisp_h_spec[i].coef, BDISP_HF_NB);
228 bdisp_h_filter[i].virt = base;
229 bdisp_h_filter[i].paddr = paddr;
231 paddr += BDISP_HF_NB;
234 for (i = 0; i < NB_V_FILTER; i++) {
235 bdisp_v_filter[i].min = bdisp_v_spec[i].min;
236 bdisp_v_filter[i].max = bdisp_v_spec[i].max;
237 memcpy(base, bdisp_v_spec[i].coef, BDISP_VF_NB);
238 bdisp_v_filter[i].virt = base;
239 bdisp_v_filter[i].paddr = paddr;
241 paddr += BDISP_VF_NB;
248 * bdisp_hw_get_hf_addr
249 * @inc: resize increment
251 * Find the horizontal filter table that fits the resize increment
254 * table physical address
256 static dma_addr_t bdisp_hw_get_hf_addr(u16 inc)
260 for (i = NB_H_FILTER - 1; i > 0; i--)
261 if ((bdisp_h_filter[i].min < inc) &&
262 (inc <= bdisp_h_filter[i].max))
265 return bdisp_h_filter[i].paddr;
269 * bdisp_hw_get_vf_addr
270 * @inc: resize increment
272 * Find the vertical filter table that fits the resize increment
275 * table physical address
277 static dma_addr_t bdisp_hw_get_vf_addr(u16 inc)
281 for (i = NB_V_FILTER - 1; i > 0; i--)
282 if ((bdisp_v_filter[i].min < inc) &&
283 (inc <= bdisp_v_filter[i].max))
286 return bdisp_v_filter[i].paddr;
293 * @inc: resize increment in 6.10 format
295 * Computes the increment (inverse of scale) in 6.10 format
300 static int bdisp_hw_get_inc(u32 from, u32 to, u16 *inc)
312 tmp = (from << 10) / to;
313 if ((tmp > 0xFFFF) || (!tmp))
314 /* overflow (downscale x 63) or too small (upscale x 1024) */
323 * bdisp_hw_get_hv_inc
324 * @ctx: device context
325 * @h_inc: horizontal increment
326 * @v_inc: vertical increment
328 * Computes the horizontal & vertical increments (inverse of scale)
333 static int bdisp_hw_get_hv_inc(struct bdisp_ctx *ctx, u16 *h_inc, u16 *v_inc)
335 u32 src_w, src_h, dst_w, dst_h;
337 src_w = ctx->src.crop.width;
338 src_h = ctx->src.crop.height;
339 dst_w = ctx->dst.crop.width;
340 dst_h = ctx->dst.crop.height;
342 if (bdisp_hw_get_inc(src_w, dst_w, h_inc) ||
343 bdisp_hw_get_inc(src_h, dst_h, v_inc)) {
344 dev_err(ctx->bdisp_dev->dev,
345 "scale factors failed (%dx%d)->(%dx%d)\n",
346 src_w, src_h, dst_w, dst_h);
354 * bdisp_hw_get_op_cfg
355 * @ctx: device context
356 * @c: operation configuration
358 * Check which blitter operations are expected and sets the scaling increments
363 static int bdisp_hw_get_op_cfg(struct bdisp_ctx *ctx, struct bdisp_op_cfg *c)
365 struct device *dev = ctx->bdisp_dev->dev;
366 struct bdisp_frame *src = &ctx->src;
367 struct bdisp_frame *dst = &ctx->dst;
369 if (src->width > MAX_SRC_WIDTH * MAX_VERTICAL_STRIDES) {
370 dev_err(dev, "Image width out of HW caps\n");
374 c->wide = src->width > MAX_SRC_WIDTH;
376 c->hflip = ctx->hflip;
377 c->vflip = ctx->vflip;
379 c->src_interlaced = (src->field == V4L2_FIELD_INTERLACED);
381 c->src_nbp = src->fmt->nb_planes;
382 c->src_yuv = (src->fmt->pixelformat == V4L2_PIX_FMT_NV12) ||
383 (src->fmt->pixelformat == V4L2_PIX_FMT_YUV420);
384 c->src_420 = c->src_yuv;
386 c->dst_nbp = dst->fmt->nb_planes;
387 c->dst_yuv = (dst->fmt->pixelformat == V4L2_PIX_FMT_NV12) ||
388 (dst->fmt->pixelformat == V4L2_PIX_FMT_YUV420);
389 c->dst_420 = c->dst_yuv;
391 c->cconv = (c->src_yuv != c->dst_yuv);
393 if (bdisp_hw_get_hv_inc(ctx, &c->h_inc, &c->v_inc)) {
394 dev_err(dev, "Scale factor out of HW caps\n");
398 /* Deinterlacing adjustment : stretch a field to a frame */
399 if (c->src_interlaced)
402 if ((c->h_inc != (1 << 10)) || (c->v_inc != (1 << 10)))
411 * bdisp_hw_color_format
412 * @pixelformat: v4l2 pixel format
414 * v4l2 to bdisp pixel format convert
419 static u32 bdisp_hw_color_format(u32 pixelformat)
423 switch (pixelformat) {
424 case V4L2_PIX_FMT_YUV420:
425 ret = (BDISP_YUV_3B << BLT_TTY_COL_SHIFT);
427 case V4L2_PIX_FMT_NV12:
428 ret = (BDISP_NV12 << BLT_TTY_COL_SHIFT) | BLT_TTY_BIG_END;
430 case V4L2_PIX_FMT_RGB565:
431 ret = (BDISP_RGB565 << BLT_TTY_COL_SHIFT);
433 case V4L2_PIX_FMT_XBGR32: /* This V4L format actually refers to xRGB */
434 ret = (BDISP_XRGB8888 << BLT_TTY_COL_SHIFT);
436 case V4L2_PIX_FMT_RGB24: /* RGB888 format */
437 ret = (BDISP_RGB888 << BLT_TTY_COL_SHIFT) | BLT_TTY_BIG_END;
439 case V4L2_PIX_FMT_ABGR32: /* This V4L format actually refers to ARGB */
442 ret = (BDISP_ARGB8888 << BLT_TTY_COL_SHIFT) | BLT_TTY_ALPHA_R;
450 * bdisp_hw_build_node
451 * @ctx: device context
452 * @cfg: operation configuration
453 * @node: node to be set
454 * @t_plan: whether the node refers to a RGB/Y or a CbCr plane
455 * @src_x_offset: x offset in the source image
462 static void bdisp_hw_build_node(struct bdisp_ctx *ctx,
463 struct bdisp_op_cfg *cfg,
464 struct bdisp_node *node,
465 enum bdisp_target_plan t_plan, int src_x_offset)
467 struct bdisp_frame *src = &ctx->src;
468 struct bdisp_frame *dst = &ctx->dst;
469 u16 h_inc, v_inc, yh_inc, yv_inc;
470 struct v4l2_rect src_rect = src->crop;
471 struct v4l2_rect dst_rect = dst->crop;
473 s32 dst_width = dst->crop.width;
474 u32 src_fmt, dst_fmt;
477 dev_dbg(ctx->bdisp_dev->dev, "%s\n", __func__);
479 memset(node, 0, sizeof(*node));
481 /* Adjust src and dst areas wrt src_x_offset */
482 src_rect.left += src_x_offset;
483 src_rect.width -= src_x_offset;
484 src_rect.width = min_t(__s32, MAX_SRC_WIDTH, src_rect.width);
486 dst_x_offset = (src_x_offset * dst_width) / ctx->src.crop.width;
487 dst_rect.left += dst_x_offset;
488 dst_rect.width = (src_rect.width * dst_width) / ctx->src.crop.width;
491 src_fmt = src->fmt->pixelformat;
492 dst_fmt = dst->fmt->pixelformat;
495 node->cic = BLT_CIC_ALL_GRP;
496 node->ack = BLT_ACK_BYPASS_S2S3;
498 switch (cfg->src_nbp) {
500 /* Src2 = RGB / Src1 = Src3 = off */
501 node->ins = BLT_INS_S1_OFF | BLT_INS_S2_MEM | BLT_INS_S3_OFF;
505 * Src2 = CbCr or ColorFill if writing the Y plane
507 node->ins = BLT_INS_S1_OFF | BLT_INS_S3_MEM;
508 if (t_plan == BDISP_Y)
509 node->ins |= BLT_INS_S2_CF;
511 node->ins |= BLT_INS_S2_MEM;
516 * Src2 = Cb or ColorFill if writing the Y plane
517 * Src1 = Cr or ColorFill if writing the Y plane */
518 node->ins = BLT_INS_S3_MEM;
519 if (t_plan == BDISP_Y)
520 node->ins |= BLT_INS_S2_CF | BLT_INS_S1_CF;
522 node->ins |= BLT_INS_S2_MEM | BLT_INS_S1_MEM;
527 node->ins |= cfg->cconv ? BLT_INS_IVMX : 0;
528 /* Scale needed if scaling OR 4:2:0 up/downsampling */
529 node->ins |= (cfg->scale || cfg->src_420 || cfg->dst_420) ?
533 node->tba = (t_plan == BDISP_CBCR) ? dst->paddr[1] : dst->paddr[0];
535 node->tty = dst->bytesperline;
536 node->tty |= bdisp_hw_color_format(dst_fmt);
537 node->tty |= BLT_TTY_DITHER;
538 node->tty |= (t_plan == BDISP_CBCR) ? BLT_TTY_CHROMA : 0;
539 node->tty |= cfg->hflip ? BLT_TTY_HSO : 0;
540 node->tty |= cfg->vflip ? BLT_TTY_VSO : 0;
542 if (cfg->dst_420 && (t_plan == BDISP_CBCR)) {
543 /* 420 chroma downsampling */
544 dst_rect.height /= 2;
552 node->txy = cfg->vflip ? (dst_rect.height - 1) : dst_rect.top;
554 node->txy |= cfg->hflip ? (dst_width - dst_x_offset - 1) :
557 node->tsz = dst_rect.height << 16 | dst_rect.width;
559 if (cfg->src_interlaced) {
560 /* handle only the top field which is half height of a frame */
562 src_rect.height /= 2;
565 if (cfg->src_nbp == 1) {
567 node->s2ba = src->paddr[0];
569 node->s2ty = src->bytesperline;
570 if (cfg->src_interlaced)
573 node->s2ty |= bdisp_hw_color_format(src_fmt);
575 node->s2xy = src_rect.top << 16 | src_rect.left;
576 node->s2sz = src_rect.height << 16 | src_rect.width;
578 /* Src 2 : Cb or CbCr */
580 /* 420 chroma upsampling */
584 src_rect.height /= 2;
587 node->s2ba = src->paddr[1];
589 node->s2ty = src->bytesperline;
590 if (cfg->src_nbp == 3)
592 if (cfg->src_interlaced)
595 node->s2ty |= bdisp_hw_color_format(src_fmt);
597 node->s2xy = src_rect.top << 16 | src_rect.left;
598 node->s2sz = src_rect.height << 16 | src_rect.width;
600 if (cfg->src_nbp == 3) {
602 node->s1ba = src->paddr[2];
604 node->s1ty = node->s2ty;
605 node->s1xy = node->s2xy;
609 node->s3ba = src->paddr[0];
611 node->s3ty = src->bytesperline;
612 if (cfg->src_interlaced)
614 node->s3ty |= bdisp_hw_color_format(src_fmt);
616 if ((t_plan != BDISP_CBCR) && cfg->src_420) {
617 /* No chroma upsampling for output RGB / Y plane */
618 node->s3xy = node->s2xy * 2;
619 node->s3sz = node->s2sz * 2;
621 /* No need to read Y (Src3) when writing Chroma */
622 node->s3ty |= BLT_S3TY_BLANK_ACC;
623 node->s3xy = node->s2xy;
624 node->s3sz = node->s2sz;
628 /* Resize (scale OR 4:2:0: chroma up/downsampling) */
629 if (node->ins & BLT_INS_SCALE) {
630 /* no need to compute Y when writing CbCr from RGB input */
631 bool skip_y = (t_plan == BDISP_CBCR) && !cfg->src_yuv;
635 node->fctl = BLT_FCTL_HV_SCALE;
637 node->fctl |= BLT_FCTL_Y_HV_SCALE;
639 node->fctl = BLT_FCTL_HV_SAMPLE;
641 node->fctl |= BLT_FCTL_Y_HV_SAMPLE;
644 /* RSF - Chroma may need to be up/downsampled */
647 if (!cfg->src_420 && cfg->dst_420 && (t_plan == BDISP_CBCR)) {
648 /* RGB to 4:2:0 for Chroma: downsample */
651 } else if (cfg->src_420 && !cfg->dst_420) {
652 /* 4:2:0: to RGB: upsample*/
656 node->rsf = v_inc << 16 | h_inc;
659 node->rzi = BLT_RZI_DEFAULT;
661 /* Filter table physical addr */
662 node->hfp = bdisp_hw_get_hf_addr(h_inc);
663 node->vfp = bdisp_hw_get_vf_addr(v_inc);
670 node->y_rsf = yv_inc << 16 | yh_inc;
671 node->y_rzi = BLT_RZI_DEFAULT;
672 node->y_hfp = bdisp_hw_get_hf_addr(yh_inc);
673 node->y_vfp = bdisp_hw_get_vf_addr(yv_inc);
677 /* Versatile matrix for RGB / YUV conversion */
679 ivmx = cfg->src_yuv ? bdisp_yuv_to_rgb : bdisp_rgb_to_yuv;
681 node->ivmx0 = ivmx[0];
682 node->ivmx1 = ivmx[1];
683 node->ivmx2 = ivmx[2];
684 node->ivmx3 = ivmx[3];
689 * bdisp_hw_build_all_nodes
690 * @ctx: device context
692 * Build all the nodes for the blitter operation
697 static int bdisp_hw_build_all_nodes(struct bdisp_ctx *ctx)
699 struct bdisp_op_cfg cfg;
700 unsigned int i, nid = 0;
701 int src_x_offset = 0;
703 for (i = 0; i < MAX_NB_NODE; i++)
705 dev_err(ctx->bdisp_dev->dev, "node %d is null\n", i);
709 /* Get configuration (scale, flip, ...) */
710 if (bdisp_hw_get_op_cfg(ctx, &cfg))
713 /* Split source in vertical strides (HW constraint) */
714 for (i = 0; i < MAX_VERTICAL_STRIDES; i++) {
715 /* Build RGB/Y node and link it to the previous node */
716 bdisp_hw_build_node(ctx, &cfg, ctx->node[nid],
717 cfg.dst_nbp == 1 ? BDISP_RGB : BDISP_Y,
720 ctx->node[nid - 1]->nip = ctx->node_paddr[nid];
723 /* Build additional Cb(Cr) node, link it to the previous one */
724 if (cfg.dst_nbp > 1) {
725 bdisp_hw_build_node(ctx, &cfg, ctx->node[nid],
726 BDISP_CBCR, src_x_offset);
727 ctx->node[nid - 1]->nip = ctx->node_paddr[nid];
731 /* Next stride until full width covered */
732 src_x_offset += MAX_SRC_WIDTH;
733 if (src_x_offset >= ctx->src.crop.width)
737 /* Mark last node as the last */
738 ctx->node[nid - 1]->nip = 0;
744 * bdisp_hw_save_request
745 * @ctx: device context
747 * Save a copy of the request and of the built nodes
752 static void bdisp_hw_save_request(struct bdisp_ctx *ctx)
754 struct bdisp_node **copy_node = ctx->bdisp_dev->dbg.copy_node;
755 struct bdisp_request *request = &ctx->bdisp_dev->dbg.copy_request;
756 struct bdisp_node **node = ctx->node;
760 request->src = ctx->src;
761 request->dst = ctx->dst;
762 request->hflip = ctx->hflip;
763 request->vflip = ctx->vflip;
767 for (i = 0; i < MAX_NB_NODE; i++) {
768 /* Allocate memory if not done yet */
770 copy_node[i] = devm_kzalloc(ctx->bdisp_dev->dev,
771 sizeof(*copy_node[i]),
776 *copy_node[i] = *node[i];
782 * @ctx: device context
784 * Send the request to the HW
789 int bdisp_hw_update(struct bdisp_ctx *ctx)
792 struct bdisp_dev *bdisp = ctx->bdisp_dev;
793 struct device *dev = bdisp->dev;
794 unsigned int node_id;
796 dev_dbg(dev, "%s\n", __func__);
799 ret = bdisp_hw_build_all_nodes(ctx);
801 dev_err(dev, "cannot build nodes (%d)\n", ret);
805 /* Save a copy of the request */
806 bdisp_hw_save_request(ctx);
808 /* Configure interrupt to 'Last Node Reached for AQ1' */
809 writel(BLT_AQ1_CTL_CFG, bdisp->regs + BLT_AQ1_CTL);
810 writel(BLT_ITS_AQ1_LNA, bdisp->regs + BLT_ITM0);
812 /* Write first node addr */
813 writel(ctx->node_paddr[0], bdisp->regs + BLT_AQ1_IP);
815 /* Find and write last node addr : this starts the HW processing */
816 for (node_id = 0; node_id < MAX_NB_NODE - 1; node_id++) {
817 if (!ctx->node[node_id]->nip)
820 writel(ctx->node_paddr[node_id], bdisp->regs + BLT_AQ1_LNA);